diff options
Diffstat (limited to 'fs')
163 files changed, 8777 insertions, 4533 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index c509123bea49..028ae38ecc52 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -444,6 +444,32 @@ config OCFS2_FS | |||
| 444 | For more information on OCFS2, see the file | 444 | For more information on OCFS2, see the file |
| 445 | <file:Documentation/filesystems/ocfs2.txt>. | 445 | <file:Documentation/filesystems/ocfs2.txt>. |
| 446 | 446 | ||
| 447 | config OCFS2_FS_O2CB | ||
| 448 | tristate "O2CB Kernelspace Clustering" | ||
| 449 | depends on OCFS2_FS | ||
| 450 | default y | ||
| 451 | help | ||
| 452 | OCFS2 includes a simple kernelspace clustering package, the OCFS2 | ||
| 453 | Cluster Base. It only requires a very small userspace component | ||
| 454 | to configure it. This comes with the standard ocfs2-tools package. | ||
| 455 | O2CB is limited to maintaining a cluster for OCFS2 file systems. | ||
| 456 | It cannot manage any other cluster applications. | ||
| 457 | |||
| 458 | It is always safe to say Y here, as the clustering method is | ||
| 459 | run-time selectable. | ||
| 460 | |||
| 461 | config OCFS2_FS_USERSPACE_CLUSTER | ||
| 462 | tristate "OCFS2 Userspace Clustering" | ||
| 463 | depends on OCFS2_FS && DLM | ||
| 464 | default y | ||
| 465 | help | ||
| 466 | This option will allow OCFS2 to use userspace clustering services | ||
| 467 | in conjunction with the DLM in fs/dlm. If you are using a | ||
| 468 | userspace cluster manager, say Y here. | ||
| 469 | |||
| 470 | It is safe to say Y, as the clustering method is run-time | ||
| 471 | selectable. | ||
| 472 | |||
| 447 | config OCFS2_DEBUG_MASKLOG | 473 | config OCFS2_DEBUG_MASKLOG |
| 448 | bool "OCFS2 logging support" | 474 | bool "OCFS2 logging support" |
| 449 | depends on OCFS2_FS | 475 | depends on OCFS2_FS |
diff --git a/fs/afs/main.c b/fs/afs/main.c index 0f60f6b35769..2d3e5d4fb9f7 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c | |||
| @@ -22,7 +22,7 @@ MODULE_LICENSE("GPL"); | |||
| 22 | 22 | ||
| 23 | unsigned afs_debug; | 23 | unsigned afs_debug; |
| 24 | module_param_named(debug, afs_debug, uint, S_IWUSR | S_IRUGO); | 24 | module_param_named(debug, afs_debug, uint, S_IWUSR | S_IRUGO); |
| 25 | MODULE_PARM_DESC(afs_debug, "AFS debugging mask"); | 25 | MODULE_PARM_DESC(debug, "AFS debugging mask"); |
| 26 | 26 | ||
| 27 | static char *rootcell; | 27 | static char *rootcell; |
| 28 | 28 | ||
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 3e8683dbb13f..a99d46f3b26e 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c | |||
| @@ -835,7 +835,7 @@ ext2_xattr_cache_insert(struct buffer_head *bh) | |||
| 835 | struct mb_cache_entry *ce; | 835 | struct mb_cache_entry *ce; |
| 836 | int error; | 836 | int error; |
| 837 | 837 | ||
| 838 | ce = mb_cache_entry_alloc(ext2_xattr_cache); | 838 | ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS); |
| 839 | if (!ce) | 839 | if (!ce) |
| 840 | return -ENOMEM; | 840 | return -ENOMEM; |
| 841 | error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); | 841 | error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); |
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index a6ea4d6a8bb2..42856541e9a5 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c | |||
| @@ -1126,7 +1126,7 @@ ext3_xattr_cache_insert(struct buffer_head *bh) | |||
| 1126 | struct mb_cache_entry *ce; | 1126 | struct mb_cache_entry *ce; |
| 1127 | int error; | 1127 | int error; |
| 1128 | 1128 | ||
| 1129 | ce = mb_cache_entry_alloc(ext3_xattr_cache); | 1129 | ce = mb_cache_entry_alloc(ext3_xattr_cache, GFP_NOFS); |
| 1130 | if (!ce) { | 1130 | if (!ce) { |
| 1131 | ea_bdebug(bh, "out of memory"); | 1131 | ea_bdebug(bh, "out of memory"); |
| 1132 | return; | 1132 | return; |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index d7962139c010..e9054c1c7d93 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
| @@ -1386,7 +1386,7 @@ ext4_xattr_cache_insert(struct buffer_head *bh) | |||
| 1386 | struct mb_cache_entry *ce; | 1386 | struct mb_cache_entry *ce; |
| 1387 | int error; | 1387 | int error; |
| 1388 | 1388 | ||
| 1389 | ce = mb_cache_entry_alloc(ext4_xattr_cache); | 1389 | ce = mb_cache_entry_alloc(ext4_xattr_cache, GFP_NOFS); |
| 1390 | if (!ce) { | 1390 | if (!ce) { |
| 1391 | ea_bdebug(bh, "out of memory"); | 1391 | ea_bdebug(bh, "out of memory"); |
| 1392 | return; | 1392 | return; |
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index de8e64c03f73..7f7947e3dfbb 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | config GFS2_FS | 1 | config GFS2_FS |
| 2 | tristate "GFS2 file system support" | 2 | tristate "GFS2 file system support" |
| 3 | depends on EXPERIMENTAL | 3 | depends on EXPERIMENTAL && (64BIT || (LSF && LBD)) |
| 4 | select FS_POSIX_ACL | 4 | select FS_POSIX_ACL |
| 5 | select CRC32 | 5 | select CRC32 |
| 6 | help | 6 | help |
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index 8fff11058cee..e2350df02a07 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | obj-$(CONFIG_GFS2_FS) += gfs2.o | 1 | obj-$(CONFIG_GFS2_FS) += gfs2.o |
| 2 | gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ | 2 | gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ |
| 3 | glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ | 3 | glops.o inode.o log.o lops.o locking.o main.o meta_io.o \ |
| 4 | mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ | 4 | mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ |
| 5 | ops_fstype.o ops_inode.o ops_super.o quota.o \ | 5 | ops_fstype.o ops_inode.o ops_super.o quota.o \ |
| 6 | recovery.o rgrp.o super.o sys.o trans.o util.o | 6 | recovery.o rgrp.o super.o sys.o trans.o util.o |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 1047a8c7226a..3e9bd46f27e3 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
| @@ -116,7 +116,7 @@ static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl, | |||
| 116 | goto out; | 116 | goto out; |
| 117 | 117 | ||
| 118 | er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea); | 118 | er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea); |
| 119 | er.er_data = kmalloc(er.er_data_len, GFP_KERNEL); | 119 | er.er_data = kmalloc(er.er_data_len, GFP_NOFS); |
| 120 | error = -ENOMEM; | 120 | error = -ENOMEM; |
| 121 | if (!er.er_data) | 121 | if (!er.er_data) |
| 122 | goto out; | 122 | goto out; |
| @@ -222,7 +222,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) | |||
| 222 | return error; | 222 | return error; |
| 223 | } | 223 | } |
| 224 | 224 | ||
| 225 | clone = posix_acl_clone(acl, GFP_KERNEL); | 225 | clone = posix_acl_clone(acl, GFP_NOFS); |
| 226 | error = -ENOMEM; | 226 | error = -ENOMEM; |
| 227 | if (!clone) | 227 | if (!clone) |
| 228 | goto out; | 228 | goto out; |
| @@ -272,7 +272,7 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) | |||
| 272 | if (!acl) | 272 | if (!acl) |
| 273 | return gfs2_setattr_simple(ip, attr); | 273 | return gfs2_setattr_simple(ip, attr); |
| 274 | 274 | ||
| 275 | clone = posix_acl_clone(acl, GFP_KERNEL); | 275 | clone = posix_acl_clone(acl, GFP_NOFS); |
| 276 | error = -ENOMEM; | 276 | error = -ENOMEM; |
| 277 | if (!clone) | 277 | if (!clone) |
| 278 | goto out; | 278 | goto out; |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index e9456ebd3bb6..c19184f2e70e 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | * keep it small. | 33 | * keep it small. |
| 34 | */ | 34 | */ |
| 35 | struct metapath { | 35 | struct metapath { |
| 36 | struct buffer_head *mp_bh[GFS2_MAX_META_HEIGHT]; | ||
| 36 | __u16 mp_list[GFS2_MAX_META_HEIGHT]; | 37 | __u16 mp_list[GFS2_MAX_META_HEIGHT]; |
| 37 | }; | 38 | }; |
| 38 | 39 | ||
| @@ -135,9 +136,10 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
| 135 | /* Get a free block, fill it with the stuffed data, | 136 | /* Get a free block, fill it with the stuffed data, |
| 136 | and write it out to disk */ | 137 | and write it out to disk */ |
| 137 | 138 | ||
| 139 | unsigned int n = 1; | ||
| 140 | block = gfs2_alloc_block(ip, &n); | ||
| 138 | if (isdir) { | 141 | if (isdir) { |
| 139 | block = gfs2_alloc_meta(ip); | 142 | gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1); |
| 140 | |||
| 141 | error = gfs2_dir_get_new_buffer(ip, block, &bh); | 143 | error = gfs2_dir_get_new_buffer(ip, block, &bh); |
| 142 | if (error) | 144 | if (error) |
| 143 | goto out_brelse; | 145 | goto out_brelse; |
| @@ -145,8 +147,6 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
| 145 | dibh, sizeof(struct gfs2_dinode)); | 147 | dibh, sizeof(struct gfs2_dinode)); |
| 146 | brelse(bh); | 148 | brelse(bh); |
| 147 | } else { | 149 | } else { |
| 148 | block = gfs2_alloc_data(ip); | ||
| 149 | |||
| 150 | error = gfs2_unstuffer_page(ip, dibh, block, page); | 150 | error = gfs2_unstuffer_page(ip, dibh, block, page); |
| 151 | if (error) | 151 | if (error) |
| 152 | goto out_brelse; | 152 | goto out_brelse; |
| @@ -161,12 +161,11 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
| 161 | 161 | ||
| 162 | if (ip->i_di.di_size) { | 162 | if (ip->i_di.di_size) { |
| 163 | *(__be64 *)(di + 1) = cpu_to_be64(block); | 163 | *(__be64 *)(di + 1) = cpu_to_be64(block); |
| 164 | ip->i_di.di_blocks++; | 164 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
| 165 | gfs2_set_inode_blocks(&ip->i_inode); | 165 | di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); |
| 166 | di->di_blocks = cpu_to_be64(ip->i_di.di_blocks); | ||
| 167 | } | 166 | } |
| 168 | 167 | ||
| 169 | ip->i_di.di_height = 1; | 168 | ip->i_height = 1; |
| 170 | di->di_height = cpu_to_be16(1); | 169 | di->di_height = cpu_to_be16(1); |
| 171 | 170 | ||
| 172 | out_brelse: | 171 | out_brelse: |
| @@ -176,114 +175,13 @@ out: | |||
| 176 | return error; | 175 | return error; |
| 177 | } | 176 | } |
| 178 | 177 | ||
| 179 | /** | ||
| 180 | * calc_tree_height - Calculate the height of a metadata tree | ||
| 181 | * @ip: The GFS2 inode | ||
| 182 | * @size: The proposed size of the file | ||
| 183 | * | ||
| 184 | * Work out how tall a metadata tree needs to be in order to accommodate a | ||
| 185 | * file of a particular size. If size is less than the current size of | ||
| 186 | * the inode, then the current size of the inode is used instead of the | ||
| 187 | * supplied one. | ||
| 188 | * | ||
| 189 | * Returns: the height the tree should be | ||
| 190 | */ | ||
| 191 | |||
| 192 | static unsigned int calc_tree_height(struct gfs2_inode *ip, u64 size) | ||
| 193 | { | ||
| 194 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 195 | u64 *arr; | ||
| 196 | unsigned int max, height; | ||
| 197 | |||
| 198 | if (ip->i_di.di_size > size) | ||
| 199 | size = ip->i_di.di_size; | ||
| 200 | |||
| 201 | if (gfs2_is_dir(ip)) { | ||
| 202 | arr = sdp->sd_jheightsize; | ||
| 203 | max = sdp->sd_max_jheight; | ||
| 204 | } else { | ||
| 205 | arr = sdp->sd_heightsize; | ||
| 206 | max = sdp->sd_max_height; | ||
| 207 | } | ||
| 208 | |||
| 209 | for (height = 0; height < max; height++) | ||
| 210 | if (arr[height] >= size) | ||
| 211 | break; | ||
| 212 | |||
| 213 | return height; | ||
| 214 | } | ||
| 215 | |||
| 216 | /** | ||
| 217 | * build_height - Build a metadata tree of the requested height | ||
| 218 | * @ip: The GFS2 inode | ||
| 219 | * @height: The height to build to | ||
| 220 | * | ||
| 221 | * | ||
| 222 | * Returns: errno | ||
| 223 | */ | ||
| 224 | |||
| 225 | static int build_height(struct inode *inode, unsigned height) | ||
| 226 | { | ||
| 227 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 228 | unsigned new_height = height - ip->i_di.di_height; | ||
| 229 | struct buffer_head *dibh; | ||
| 230 | struct buffer_head *blocks[GFS2_MAX_META_HEIGHT]; | ||
| 231 | struct gfs2_dinode *di; | ||
| 232 | int error; | ||
| 233 | __be64 *bp; | ||
| 234 | u64 bn; | ||
| 235 | unsigned n; | ||
| 236 | |||
| 237 | if (height <= ip->i_di.di_height) | ||
| 238 | return 0; | ||
| 239 | |||
| 240 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 241 | if (error) | ||
| 242 | return error; | ||
| 243 | |||
| 244 | for(n = 0; n < new_height; n++) { | ||
| 245 | bn = gfs2_alloc_meta(ip); | ||
| 246 | blocks[n] = gfs2_meta_new(ip->i_gl, bn); | ||
| 247 | gfs2_trans_add_bh(ip->i_gl, blocks[n], 1); | ||
| 248 | } | ||
| 249 | |||
| 250 | n = 0; | ||
| 251 | bn = blocks[0]->b_blocknr; | ||
| 252 | if (new_height > 1) { | ||
| 253 | for(; n < new_height-1; n++) { | ||
| 254 | gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, | ||
| 255 | GFS2_FORMAT_IN); | ||
| 256 | gfs2_buffer_clear_tail(blocks[n], | ||
| 257 | sizeof(struct gfs2_meta_header)); | ||
| 258 | bp = (__be64 *)(blocks[n]->b_data + | ||
| 259 | sizeof(struct gfs2_meta_header)); | ||
| 260 | *bp = cpu_to_be64(blocks[n+1]->b_blocknr); | ||
| 261 | brelse(blocks[n]); | ||
| 262 | blocks[n] = NULL; | ||
| 263 | } | ||
| 264 | } | ||
| 265 | gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN); | ||
| 266 | gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header), | ||
| 267 | dibh, sizeof(struct gfs2_dinode)); | ||
| 268 | brelse(blocks[n]); | ||
| 269 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 270 | di = (struct gfs2_dinode *)dibh->b_data; | ||
| 271 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | ||
| 272 | *(__be64 *)(di + 1) = cpu_to_be64(bn); | ||
| 273 | ip->i_di.di_height += new_height; | ||
| 274 | ip->i_di.di_blocks += new_height; | ||
| 275 | gfs2_set_inode_blocks(&ip->i_inode); | ||
| 276 | di->di_height = cpu_to_be16(ip->i_di.di_height); | ||
| 277 | di->di_blocks = cpu_to_be64(ip->i_di.di_blocks); | ||
| 278 | brelse(dibh); | ||
| 279 | return error; | ||
| 280 | } | ||
| 281 | 178 | ||
| 282 | /** | 179 | /** |
| 283 | * find_metapath - Find path through the metadata tree | 180 | * find_metapath - Find path through the metadata tree |
| 284 | * @ip: The inode pointer | 181 | * @sdp: The superblock |
| 285 | * @mp: The metapath to return the result in | 182 | * @mp: The metapath to return the result in |
| 286 | * @block: The disk block to look up | 183 | * @block: The disk block to look up |
| 184 | * @height: The pre-calculated height of the metadata tree | ||
| 287 | * | 185 | * |
| 288 | * This routine returns a struct metapath structure that defines a path | 186 | * This routine returns a struct metapath structure that defines a path |
| 289 | * through the metadata of inode "ip" to get to block "block". | 187 | * through the metadata of inode "ip" to get to block "block". |
| @@ -338,21 +236,29 @@ static int build_height(struct inode *inode, unsigned height) | |||
| 338 | * | 236 | * |
| 339 | */ | 237 | */ |
| 340 | 238 | ||
| 341 | static void find_metapath(struct gfs2_inode *ip, u64 block, | 239 | static void find_metapath(const struct gfs2_sbd *sdp, u64 block, |
| 342 | struct metapath *mp) | 240 | struct metapath *mp, unsigned int height) |
| 343 | { | 241 | { |
| 344 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 345 | u64 b = block; | ||
| 346 | unsigned int i; | 242 | unsigned int i; |
| 347 | 243 | ||
| 348 | for (i = ip->i_di.di_height; i--;) | 244 | for (i = height; i--;) |
| 349 | mp->mp_list[i] = do_div(b, sdp->sd_inptrs); | 245 | mp->mp_list[i] = do_div(block, sdp->sd_inptrs); |
| 246 | |||
| 247 | } | ||
| 350 | 248 | ||
| 249 | static inline unsigned int zero_metapath_length(const struct metapath *mp, | ||
| 250 | unsigned height) | ||
| 251 | { | ||
| 252 | unsigned int i; | ||
| 253 | for (i = 0; i < height - 1; i++) { | ||
| 254 | if (mp->mp_list[i] != 0) | ||
| 255 | return i; | ||
| 256 | } | ||
| 257 | return height; | ||
| 351 | } | 258 | } |
| 352 | 259 | ||
| 353 | /** | 260 | /** |
| 354 | * metapointer - Return pointer to start of metadata in a buffer | 261 | * metapointer - Return pointer to start of metadata in a buffer |
| 355 | * @bh: The buffer | ||
| 356 | * @height: The metadata height (0 = dinode) | 262 | * @height: The metadata height (0 = dinode) |
| 357 | * @mp: The metapath | 263 | * @mp: The metapath |
| 358 | * | 264 | * |
| @@ -361,93 +267,302 @@ static void find_metapath(struct gfs2_inode *ip, u64 block, | |||
| 361 | * metadata tree. | 267 | * metadata tree. |
| 362 | */ | 268 | */ |
| 363 | 269 | ||
| 364 | static inline __be64 *metapointer(struct buffer_head *bh, int *boundary, | 270 | static inline __be64 *metapointer(unsigned int height, const struct metapath *mp) |
| 365 | unsigned int height, const struct metapath *mp) | ||
| 366 | { | 271 | { |
| 272 | struct buffer_head *bh = mp->mp_bh[height]; | ||
| 367 | unsigned int head_size = (height > 0) ? | 273 | unsigned int head_size = (height > 0) ? |
| 368 | sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); | 274 | sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); |
| 369 | __be64 *ptr; | 275 | return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height]; |
| 370 | *boundary = 0; | ||
| 371 | ptr = ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height]; | ||
| 372 | if (ptr + 1 == (__be64 *)(bh->b_data + bh->b_size)) | ||
| 373 | *boundary = 1; | ||
| 374 | return ptr; | ||
| 375 | } | 276 | } |
| 376 | 277 | ||
| 377 | /** | 278 | /** |
| 378 | * lookup_block - Get the next metadata block in metadata tree | 279 | * lookup_metapath - Walk the metadata tree to a specific point |
| 379 | * @ip: The GFS2 inode | 280 | * @ip: The inode |
| 380 | * @bh: Buffer containing the pointers to metadata blocks | ||
| 381 | * @height: The height of the tree (0 = dinode) | ||
| 382 | * @mp: The metapath | 281 | * @mp: The metapath |
| 383 | * @create: Non-zero if we may create a new meatdata block | ||
| 384 | * @new: Used to indicate if we did create a new metadata block | ||
| 385 | * @block: the returned disk block number | ||
| 386 | * | 282 | * |
| 387 | * Given a metatree, complete to a particular height, checks to see if the next | 283 | * Assumes that the inode's buffer has already been looked up and |
| 388 | * height of the tree exists. If not the next height of the tree is created. | 284 | * hooked onto mp->mp_bh[0] and that the metapath has been initialised |
| 389 | * The block number of the next height of the metadata tree is returned. | 285 | * by find_metapath(). |
| 286 | * | ||
| 287 | * If this function encounters part of the tree which has not been | ||
| 288 | * allocated, it returns the current height of the tree at the point | ||
| 289 | * at which it found the unallocated block. Blocks which are found are | ||
| 290 | * added to the mp->mp_bh[] list. | ||
| 390 | * | 291 | * |
| 292 | * Returns: error or height of metadata tree | ||
| 391 | */ | 293 | */ |
| 392 | 294 | ||
| 393 | static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh, | 295 | static int lookup_metapath(struct gfs2_inode *ip, struct metapath *mp) |
| 394 | unsigned int height, struct metapath *mp, int create, | ||
| 395 | int *new, u64 *block) | ||
| 396 | { | 296 | { |
| 397 | int boundary; | 297 | unsigned int end_of_metadata = ip->i_height - 1; |
| 398 | __be64 *ptr = metapointer(bh, &boundary, height, mp); | 298 | unsigned int x; |
| 299 | __be64 *ptr; | ||
| 300 | u64 dblock; | ||
| 301 | int ret; | ||
| 399 | 302 | ||
| 400 | if (*ptr) { | 303 | for (x = 0; x < end_of_metadata; x++) { |
| 401 | *block = be64_to_cpu(*ptr); | 304 | ptr = metapointer(x, mp); |
| 402 | return boundary; | 305 | dblock = be64_to_cpu(*ptr); |
| 403 | } | 306 | if (!dblock) |
| 307 | return x + 1; | ||
| 404 | 308 | ||
| 405 | *block = 0; | 309 | ret = gfs2_meta_indirect_buffer(ip, x+1, dblock, 0, &mp->mp_bh[x+1]); |
| 310 | if (ret) | ||
| 311 | return ret; | ||
| 312 | } | ||
| 406 | 313 | ||
| 407 | if (!create) | 314 | return ip->i_height; |
| 408 | return 0; | 315 | } |
| 409 | 316 | ||
| 410 | if (height == ip->i_di.di_height - 1 && !gfs2_is_dir(ip)) | 317 | static inline void release_metapath(struct metapath *mp) |
| 411 | *block = gfs2_alloc_data(ip); | 318 | { |
| 412 | else | 319 | int i; |
| 413 | *block = gfs2_alloc_meta(ip); | ||
| 414 | 320 | ||
| 415 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 321 | for (i = 0; i < GFS2_MAX_META_HEIGHT; i++) { |
| 322 | if (mp->mp_bh[i] == NULL) | ||
| 323 | break; | ||
| 324 | brelse(mp->mp_bh[i]); | ||
| 325 | } | ||
| 326 | } | ||
| 416 | 327 | ||
| 417 | *ptr = cpu_to_be64(*block); | 328 | /** |
| 418 | ip->i_di.di_blocks++; | 329 | * gfs2_extent_length - Returns length of an extent of blocks |
| 419 | gfs2_set_inode_blocks(&ip->i_inode); | 330 | * @start: Start of the buffer |
| 331 | * @len: Length of the buffer in bytes | ||
| 332 | * @ptr: Current position in the buffer | ||
| 333 | * @limit: Max extent length to return (0 = unlimited) | ||
| 334 | * @eob: Set to 1 if we hit "end of block" | ||
| 335 | * | ||
| 336 | * If the first block is zero (unallocated) it will return the number of | ||
| 337 | * unallocated blocks in the extent, otherwise it will return the number | ||
| 338 | * of contiguous blocks in the extent. | ||
| 339 | * | ||
| 340 | * Returns: The length of the extent (minimum of one block) | ||
| 341 | */ | ||
| 420 | 342 | ||
| 421 | *new = 1; | 343 | static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, unsigned limit, int *eob) |
| 422 | return 0; | 344 | { |
| 345 | const __be64 *end = (start + len); | ||
| 346 | const __be64 *first = ptr; | ||
| 347 | u64 d = be64_to_cpu(*ptr); | ||
| 348 | |||
| 349 | *eob = 0; | ||
| 350 | do { | ||
| 351 | ptr++; | ||
| 352 | if (ptr >= end) | ||
| 353 | break; | ||
| 354 | if (limit && --limit == 0) | ||
| 355 | break; | ||
| 356 | if (d) | ||
| 357 | d++; | ||
| 358 | } while(be64_to_cpu(*ptr) == d); | ||
| 359 | if (ptr >= end) | ||
| 360 | *eob = 1; | ||
| 361 | return (ptr - first); | ||
| 423 | } | 362 | } |
| 424 | 363 | ||
| 425 | static inline void bmap_lock(struct inode *inode, int create) | 364 | static inline void bmap_lock(struct gfs2_inode *ip, int create) |
| 426 | { | 365 | { |
| 427 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 428 | if (create) | 366 | if (create) |
| 429 | down_write(&ip->i_rw_mutex); | 367 | down_write(&ip->i_rw_mutex); |
| 430 | else | 368 | else |
| 431 | down_read(&ip->i_rw_mutex); | 369 | down_read(&ip->i_rw_mutex); |
| 432 | } | 370 | } |
| 433 | 371 | ||
| 434 | static inline void bmap_unlock(struct inode *inode, int create) | 372 | static inline void bmap_unlock(struct gfs2_inode *ip, int create) |
| 435 | { | 373 | { |
| 436 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 437 | if (create) | 374 | if (create) |
| 438 | up_write(&ip->i_rw_mutex); | 375 | up_write(&ip->i_rw_mutex); |
| 439 | else | 376 | else |
| 440 | up_read(&ip->i_rw_mutex); | 377 | up_read(&ip->i_rw_mutex); |
| 441 | } | 378 | } |
| 442 | 379 | ||
| 380 | static inline __be64 *gfs2_indirect_init(struct metapath *mp, | ||
| 381 | struct gfs2_glock *gl, unsigned int i, | ||
| 382 | unsigned offset, u64 bn) | ||
| 383 | { | ||
| 384 | __be64 *ptr = (__be64 *)(mp->mp_bh[i - 1]->b_data + | ||
| 385 | ((i > 1) ? sizeof(struct gfs2_meta_header) : | ||
| 386 | sizeof(struct gfs2_dinode))); | ||
| 387 | BUG_ON(i < 1); | ||
| 388 | BUG_ON(mp->mp_bh[i] != NULL); | ||
| 389 | mp->mp_bh[i] = gfs2_meta_new(gl, bn); | ||
| 390 | gfs2_trans_add_bh(gl, mp->mp_bh[i], 1); | ||
| 391 | gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); | ||
| 392 | gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); | ||
| 393 | ptr += offset; | ||
| 394 | *ptr = cpu_to_be64(bn); | ||
| 395 | return ptr; | ||
| 396 | } | ||
| 397 | |||
| 398 | enum alloc_state { | ||
| 399 | ALLOC_DATA = 0, | ||
| 400 | ALLOC_GROW_DEPTH = 1, | ||
| 401 | ALLOC_GROW_HEIGHT = 2, | ||
| 402 | /* ALLOC_UNSTUFF = 3, TBD and rather complicated */ | ||
| 403 | }; | ||
| 404 | |||
| 405 | /** | ||
| 406 | * gfs2_bmap_alloc - Build a metadata tree of the requested height | ||
| 407 | * @inode: The GFS2 inode | ||
| 408 | * @lblock: The logical starting block of the extent | ||
| 409 | * @bh_map: This is used to return the mapping details | ||
| 410 | * @mp: The metapath | ||
| 411 | * @sheight: The starting height (i.e. whats already mapped) | ||
| 412 | * @height: The height to build to | ||
| 413 | * @maxlen: The max number of data blocks to alloc | ||
| 414 | * | ||
| 415 | * In this routine we may have to alloc: | ||
| 416 | * i) Indirect blocks to grow the metadata tree height | ||
| 417 | * ii) Indirect blocks to fill in lower part of the metadata tree | ||
| 418 | * iii) Data blocks | ||
| 419 | * | ||
| 420 | * The function is in two parts. The first part works out the total | ||
| 421 | * number of blocks which we need. The second part does the actual | ||
| 422 | * allocation asking for an extent at a time (if enough contiguous free | ||
| 423 | * blocks are available, there will only be one request per bmap call) | ||
| 424 | * and uses the state machine to initialise the blocks in order. | ||
| 425 | * | ||
| 426 | * Returns: errno on error | ||
| 427 | */ | ||
| 428 | |||
| 429 | static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, | ||
| 430 | struct buffer_head *bh_map, struct metapath *mp, | ||
| 431 | const unsigned int sheight, | ||
| 432 | const unsigned int height, | ||
| 433 | const unsigned int maxlen) | ||
| 434 | { | ||
| 435 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 436 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
| 437 | struct buffer_head *dibh = mp->mp_bh[0]; | ||
| 438 | u64 bn, dblock = 0; | ||
| 439 | unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0; | ||
| 440 | unsigned dblks = 0; | ||
| 441 | unsigned ptrs_per_blk; | ||
| 442 | const unsigned end_of_metadata = height - 1; | ||
| 443 | int eob = 0; | ||
| 444 | enum alloc_state state; | ||
| 445 | __be64 *ptr; | ||
| 446 | __be64 zero_bn = 0; | ||
| 447 | |||
| 448 | BUG_ON(sheight < 1); | ||
| 449 | BUG_ON(dibh == NULL); | ||
| 450 | |||
| 451 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 452 | |||
| 453 | if (height == sheight) { | ||
| 454 | struct buffer_head *bh; | ||
| 455 | /* Bottom indirect block exists, find unalloced extent size */ | ||
| 456 | ptr = metapointer(end_of_metadata, mp); | ||
| 457 | bh = mp->mp_bh[end_of_metadata]; | ||
| 458 | dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, | ||
| 459 | &eob); | ||
| 460 | BUG_ON(dblks < 1); | ||
| 461 | state = ALLOC_DATA; | ||
| 462 | } else { | ||
| 463 | /* Need to allocate indirect blocks */ | ||
| 464 | ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs; | ||
| 465 | dblks = min(maxlen, ptrs_per_blk - mp->mp_list[end_of_metadata]); | ||
| 466 | if (height == ip->i_height) { | ||
| 467 | /* Writing into existing tree, extend tree down */ | ||
| 468 | iblks = height - sheight; | ||
| 469 | state = ALLOC_GROW_DEPTH; | ||
| 470 | } else { | ||
| 471 | /* Building up tree height */ | ||
| 472 | state = ALLOC_GROW_HEIGHT; | ||
| 473 | iblks = height - ip->i_height; | ||
| 474 | zmpl = zero_metapath_length(mp, height); | ||
| 475 | iblks -= zmpl; | ||
| 476 | iblks += height; | ||
| 477 | } | ||
| 478 | } | ||
| 479 | |||
| 480 | /* start of the second part of the function (state machine) */ | ||
| 481 | |||
| 482 | blks = dblks + iblks; | ||
| 483 | i = sheight; | ||
| 484 | do { | ||
| 485 | n = blks - alloced; | ||
| 486 | bn = gfs2_alloc_block(ip, &n); | ||
| 487 | alloced += n; | ||
| 488 | if (state != ALLOC_DATA || gfs2_is_jdata(ip)) | ||
| 489 | gfs2_trans_add_unrevoke(sdp, bn, n); | ||
| 490 | switch (state) { | ||
| 491 | /* Growing height of tree */ | ||
| 492 | case ALLOC_GROW_HEIGHT: | ||
| 493 | if (i == 1) { | ||
| 494 | ptr = (__be64 *)(dibh->b_data + | ||
| 495 | sizeof(struct gfs2_dinode)); | ||
| 496 | zero_bn = *ptr; | ||
| 497 | } | ||
| 498 | for (; i - 1 < height - ip->i_height && n > 0; i++, n--) | ||
| 499 | gfs2_indirect_init(mp, ip->i_gl, i, 0, bn++); | ||
| 500 | if (i - 1 == height - ip->i_height) { | ||
| 501 | i--; | ||
| 502 | gfs2_buffer_copy_tail(mp->mp_bh[i], | ||
| 503 | sizeof(struct gfs2_meta_header), | ||
| 504 | dibh, sizeof(struct gfs2_dinode)); | ||
| 505 | gfs2_buffer_clear_tail(dibh, | ||
| 506 | sizeof(struct gfs2_dinode) + | ||
| 507 | sizeof(__be64)); | ||
| 508 | ptr = (__be64 *)(mp->mp_bh[i]->b_data + | ||
| 509 | sizeof(struct gfs2_meta_header)); | ||
| 510 | *ptr = zero_bn; | ||
| 511 | state = ALLOC_GROW_DEPTH; | ||
| 512 | for(i = zmpl; i < height; i++) { | ||
| 513 | if (mp->mp_bh[i] == NULL) | ||
| 514 | break; | ||
| 515 | brelse(mp->mp_bh[i]); | ||
| 516 | mp->mp_bh[i] = NULL; | ||
| 517 | } | ||
| 518 | i = zmpl; | ||
| 519 | } | ||
| 520 | if (n == 0) | ||
| 521 | break; | ||
| 522 | /* Branching from existing tree */ | ||
| 523 | case ALLOC_GROW_DEPTH: | ||
| 524 | if (i > 1 && i < height) | ||
| 525 | gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1); | ||
| 526 | for (; i < height && n > 0; i++, n--) | ||
| 527 | gfs2_indirect_init(mp, ip->i_gl, i, | ||
| 528 | mp->mp_list[i-1], bn++); | ||
| 529 | if (i == height) | ||
| 530 | state = ALLOC_DATA; | ||
| 531 | if (n == 0) | ||
| 532 | break; | ||
| 533 | /* Tree complete, adding data blocks */ | ||
| 534 | case ALLOC_DATA: | ||
| 535 | BUG_ON(n > dblks); | ||
| 536 | BUG_ON(mp->mp_bh[end_of_metadata] == NULL); | ||
| 537 | gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 1); | ||
| 538 | dblks = n; | ||
| 539 | ptr = metapointer(end_of_metadata, mp); | ||
| 540 | dblock = bn; | ||
| 541 | while (n-- > 0) | ||
| 542 | *ptr++ = cpu_to_be64(bn++); | ||
| 543 | break; | ||
| 544 | } | ||
| 545 | } while (state != ALLOC_DATA); | ||
| 546 | |||
| 547 | ip->i_height = height; | ||
| 548 | gfs2_add_inode_blocks(&ip->i_inode, alloced); | ||
| 549 | gfs2_dinode_out(ip, mp->mp_bh[0]->b_data); | ||
| 550 | map_bh(bh_map, inode->i_sb, dblock); | ||
| 551 | bh_map->b_size = dblks << inode->i_blkbits; | ||
| 552 | set_buffer_new(bh_map); | ||
| 553 | return 0; | ||
| 554 | } | ||
| 555 | |||
| 443 | /** | 556 | /** |
| 444 | * gfs2_block_map - Map a block from an inode to a disk block | 557 | * gfs2_block_map - Map a block from an inode to a disk block |
| 445 | * @inode: The inode | 558 | * @inode: The inode |
| 446 | * @lblock: The logical block number | 559 | * @lblock: The logical block number |
| 447 | * @bh_map: The bh to be mapped | 560 | * @bh_map: The bh to be mapped |
| 561 | * @create: True if its ok to alloc blocks to satify the request | ||
| 448 | * | 562 | * |
| 449 | * Find the block number on the current device which corresponds to an | 563 | * Sets buffer_mapped() if successful, sets buffer_boundary() if a |
| 450 | * inode's block. If the block had to be created, "new" will be set. | 564 | * read of metadata will be required before the next block can be |
| 565 | * mapped. Sets buffer_new() if new blocks were allocated. | ||
| 451 | * | 566 | * |
| 452 | * Returns: errno | 567 | * Returns: errno |
| 453 | */ | 568 | */ |
| @@ -457,97 +572,78 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, | |||
| 457 | { | 572 | { |
| 458 | struct gfs2_inode *ip = GFS2_I(inode); | 573 | struct gfs2_inode *ip = GFS2_I(inode); |
| 459 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 574 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
| 460 | struct buffer_head *bh; | 575 | unsigned int bsize = sdp->sd_sb.sb_bsize; |
| 461 | unsigned int bsize; | 576 | const unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; |
| 462 | unsigned int height; | 577 | const u64 *arr = sdp->sd_heightsize; |
| 463 | unsigned int end_of_metadata; | 578 | __be64 *ptr; |
| 464 | unsigned int x; | ||
| 465 | int error = 0; | ||
| 466 | int new = 0; | ||
| 467 | u64 dblock = 0; | ||
| 468 | int boundary; | ||
| 469 | unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; | ||
| 470 | struct metapath mp; | ||
| 471 | u64 size; | 579 | u64 size; |
| 472 | struct buffer_head *dibh = NULL; | 580 | struct metapath mp; |
| 581 | int ret; | ||
| 582 | int eob; | ||
| 583 | unsigned int len; | ||
| 584 | struct buffer_head *bh; | ||
| 585 | u8 height; | ||
| 473 | 586 | ||
| 474 | BUG_ON(maxlen == 0); | 587 | BUG_ON(maxlen == 0); |
| 475 | 588 | ||
| 476 | if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) | 589 | memset(mp.mp_bh, 0, sizeof(mp.mp_bh)); |
| 477 | return 0; | 590 | bmap_lock(ip, create); |
| 478 | |||
| 479 | bmap_lock(inode, create); | ||
| 480 | clear_buffer_mapped(bh_map); | 591 | clear_buffer_mapped(bh_map); |
| 481 | clear_buffer_new(bh_map); | 592 | clear_buffer_new(bh_map); |
| 482 | clear_buffer_boundary(bh_map); | 593 | clear_buffer_boundary(bh_map); |
| 483 | bsize = gfs2_is_dir(ip) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; | 594 | if (gfs2_is_dir(ip)) { |
| 484 | size = (lblock + 1) * bsize; | 595 | bsize = sdp->sd_jbsize; |
| 485 | 596 | arr = sdp->sd_jheightsize; | |
| 486 | if (size > ip->i_di.di_size) { | ||
| 487 | height = calc_tree_height(ip, size); | ||
| 488 | if (ip->i_di.di_height < height) { | ||
| 489 | if (!create) | ||
| 490 | goto out_ok; | ||
| 491 | |||
| 492 | error = build_height(inode, height); | ||
| 493 | if (error) | ||
| 494 | goto out_fail; | ||
| 495 | } | ||
| 496 | } | 597 | } |
| 497 | 598 | ||
| 498 | find_metapath(ip, lblock, &mp); | 599 | ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]); |
| 499 | end_of_metadata = ip->i_di.di_height - 1; | 600 | if (ret) |
| 500 | error = gfs2_meta_inode_buffer(ip, &bh); | 601 | goto out; |
| 501 | if (error) | ||
| 502 | goto out_fail; | ||
| 503 | dibh = bh; | ||
| 504 | get_bh(dibh); | ||
| 505 | 602 | ||
| 506 | for (x = 0; x < end_of_metadata; x++) { | 603 | height = ip->i_height; |
| 507 | lookup_block(ip, bh, x, &mp, create, &new, &dblock); | 604 | size = (lblock + 1) * bsize; |
| 508 | brelse(bh); | 605 | while (size > arr[height]) |
| 509 | if (!dblock) | 606 | height++; |
| 510 | goto out_ok; | 607 | find_metapath(sdp, lblock, &mp, height); |
| 608 | ret = 1; | ||
| 609 | if (height > ip->i_height || gfs2_is_stuffed(ip)) | ||
| 610 | goto do_alloc; | ||
| 611 | ret = lookup_metapath(ip, &mp); | ||
| 612 | if (ret < 0) | ||
| 613 | goto out; | ||
| 614 | if (ret != ip->i_height) | ||
| 615 | goto do_alloc; | ||
| 616 | ptr = metapointer(ip->i_height - 1, &mp); | ||
| 617 | if (*ptr == 0) | ||
| 618 | goto do_alloc; | ||
| 619 | map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr)); | ||
| 620 | bh = mp.mp_bh[ip->i_height - 1]; | ||
| 621 | len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob); | ||
| 622 | bh_map->b_size = (len << inode->i_blkbits); | ||
| 623 | if (eob) | ||
| 624 | set_buffer_boundary(bh_map); | ||
| 625 | ret = 0; | ||
| 626 | out: | ||
| 627 | release_metapath(&mp); | ||
| 628 | bmap_unlock(ip, create); | ||
| 629 | return ret; | ||
| 511 | 630 | ||
| 512 | error = gfs2_meta_indirect_buffer(ip, x+1, dblock, new, &bh); | 631 | do_alloc: |
| 513 | if (error) | 632 | /* All allocations are done here, firstly check create flag */ |
| 514 | goto out_fail; | 633 | if (!create) { |
| 634 | BUG_ON(gfs2_is_stuffed(ip)); | ||
| 635 | ret = 0; | ||
| 636 | goto out; | ||
| 515 | } | 637 | } |
| 516 | 638 | ||
| 517 | boundary = lookup_block(ip, bh, end_of_metadata, &mp, create, &new, &dblock); | 639 | /* At this point ret is the tree depth of already allocated blocks */ |
| 518 | if (dblock) { | 640 | ret = gfs2_bmap_alloc(inode, lblock, bh_map, &mp, ret, height, maxlen); |
| 519 | map_bh(bh_map, inode->i_sb, dblock); | 641 | goto out; |
| 520 | if (boundary) | ||
| 521 | set_buffer_boundary(bh_map); | ||
| 522 | if (new) { | ||
| 523 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 524 | gfs2_dinode_out(ip, dibh->b_data); | ||
| 525 | set_buffer_new(bh_map); | ||
| 526 | goto out_brelse; | ||
| 527 | } | ||
| 528 | while(--maxlen && !buffer_boundary(bh_map)) { | ||
| 529 | u64 eblock; | ||
| 530 | |||
| 531 | mp.mp_list[end_of_metadata]++; | ||
| 532 | boundary = lookup_block(ip, bh, end_of_metadata, &mp, 0, &new, &eblock); | ||
| 533 | if (eblock != ++dblock) | ||
| 534 | break; | ||
| 535 | bh_map->b_size += (1 << inode->i_blkbits); | ||
| 536 | if (boundary) | ||
| 537 | set_buffer_boundary(bh_map); | ||
| 538 | } | ||
| 539 | } | ||
| 540 | out_brelse: | ||
| 541 | brelse(bh); | ||
| 542 | out_ok: | ||
| 543 | error = 0; | ||
| 544 | out_fail: | ||
| 545 | if (dibh) | ||
| 546 | brelse(dibh); | ||
| 547 | bmap_unlock(inode, create); | ||
| 548 | return error; | ||
| 549 | } | 642 | } |
| 550 | 643 | ||
| 644 | /* | ||
| 645 | * Deprecated: do not use in new code | ||
| 646 | */ | ||
| 551 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen) | 647 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen) |
| 552 | { | 648 | { |
| 553 | struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 }; | 649 | struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 }; |
| @@ -558,7 +654,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi | |||
| 558 | BUG_ON(!dblock); | 654 | BUG_ON(!dblock); |
| 559 | BUG_ON(!new); | 655 | BUG_ON(!new); |
| 560 | 656 | ||
| 561 | bh.b_size = 1 << (inode->i_blkbits + 5); | 657 | bh.b_size = 1 << (inode->i_blkbits + (create ? 0 : 5)); |
| 562 | ret = gfs2_block_map(inode, lblock, &bh, create); | 658 | ret = gfs2_block_map(inode, lblock, &bh, create); |
| 563 | *extlen = bh.b_size >> inode->i_blkbits; | 659 | *extlen = bh.b_size >> inode->i_blkbits; |
| 564 | *dblock = bh.b_blocknr; | 660 | *dblock = bh.b_blocknr; |
| @@ -621,7 +717,7 @@ static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
| 621 | if (error) | 717 | if (error) |
| 622 | goto out; | 718 | goto out; |
| 623 | 719 | ||
| 624 | if (height < ip->i_di.di_height - 1) | 720 | if (height < ip->i_height - 1) |
| 625 | for (; top < bottom; top++, first = 0) { | 721 | for (; top < bottom; top++, first = 0) { |
| 626 | if (!*top) | 722 | if (!*top) |
| 627 | continue; | 723 | continue; |
| @@ -679,7 +775,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
| 679 | sm->sm_first = 0; | 775 | sm->sm_first = 0; |
| 680 | } | 776 | } |
| 681 | 777 | ||
| 682 | metadata = (height != ip->i_di.di_height - 1); | 778 | metadata = (height != ip->i_height - 1); |
| 683 | if (metadata) | 779 | if (metadata) |
| 684 | revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; | 780 | revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; |
| 685 | 781 | ||
| @@ -713,7 +809,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
| 713 | else | 809 | else |
| 714 | goto out; /* Nothing to do */ | 810 | goto out; /* Nothing to do */ |
| 715 | 811 | ||
| 716 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); | 812 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); |
| 717 | 813 | ||
| 718 | for (x = 0; x < rlist.rl_rgrps; x++) { | 814 | for (x = 0; x < rlist.rl_rgrps; x++) { |
| 719 | struct gfs2_rgrpd *rgd; | 815 | struct gfs2_rgrpd *rgd; |
| @@ -760,10 +856,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
| 760 | } | 856 | } |
| 761 | 857 | ||
| 762 | *p = 0; | 858 | *p = 0; |
| 763 | if (!ip->i_di.di_blocks) | 859 | gfs2_add_inode_blocks(&ip->i_inode, -1); |
| 764 | gfs2_consist_inode(ip); | ||
| 765 | ip->i_di.di_blocks--; | ||
| 766 | gfs2_set_inode_blocks(&ip->i_inode); | ||
| 767 | } | 860 | } |
| 768 | if (bstart) { | 861 | if (bstart) { |
| 769 | if (metadata) | 862 | if (metadata) |
| @@ -804,19 +897,16 @@ static int do_grow(struct gfs2_inode *ip, u64 size) | |||
| 804 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 897 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 805 | struct gfs2_alloc *al; | 898 | struct gfs2_alloc *al; |
| 806 | struct buffer_head *dibh; | 899 | struct buffer_head *dibh; |
| 807 | unsigned int h; | ||
| 808 | int error; | 900 | int error; |
| 809 | 901 | ||
| 810 | al = gfs2_alloc_get(ip); | 902 | al = gfs2_alloc_get(ip); |
| 903 | if (!al) | ||
| 904 | return -ENOMEM; | ||
| 811 | 905 | ||
| 812 | error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 906 | error = gfs2_quota_lock_check(ip); |
| 813 | if (error) | 907 | if (error) |
| 814 | goto out; | 908 | goto out; |
| 815 | 909 | ||
| 816 | error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
| 817 | if (error) | ||
| 818 | goto out_gunlock_q; | ||
| 819 | |||
| 820 | al->al_requested = sdp->sd_max_height + RES_DATA; | 910 | al->al_requested = sdp->sd_max_height + RES_DATA; |
| 821 | 911 | ||
| 822 | error = gfs2_inplace_reserve(ip); | 912 | error = gfs2_inplace_reserve(ip); |
| @@ -829,34 +919,25 @@ static int do_grow(struct gfs2_inode *ip, u64 size) | |||
| 829 | if (error) | 919 | if (error) |
| 830 | goto out_ipres; | 920 | goto out_ipres; |
| 831 | 921 | ||
| 922 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 923 | if (error) | ||
| 924 | goto out_end_trans; | ||
| 925 | |||
| 832 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { | 926 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { |
| 833 | if (gfs2_is_stuffed(ip)) { | 927 | if (gfs2_is_stuffed(ip)) { |
| 834 | error = gfs2_unstuff_dinode(ip, NULL); | 928 | error = gfs2_unstuff_dinode(ip, NULL); |
| 835 | if (error) | 929 | if (error) |
| 836 | goto out_end_trans; | 930 | goto out_brelse; |
| 837 | } | ||
| 838 | |||
| 839 | h = calc_tree_height(ip, size); | ||
| 840 | if (ip->i_di.di_height < h) { | ||
| 841 | down_write(&ip->i_rw_mutex); | ||
| 842 | error = build_height(&ip->i_inode, h); | ||
| 843 | up_write(&ip->i_rw_mutex); | ||
| 844 | if (error) | ||
| 845 | goto out_end_trans; | ||
| 846 | } | 931 | } |
| 847 | } | 932 | } |
| 848 | 933 | ||
| 849 | ip->i_di.di_size = size; | 934 | ip->i_di.di_size = size; |
| 850 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 935 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
| 851 | |||
| 852 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 853 | if (error) | ||
| 854 | goto out_end_trans; | ||
| 855 | |||
| 856 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 936 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
| 857 | gfs2_dinode_out(ip, dibh->b_data); | 937 | gfs2_dinode_out(ip, dibh->b_data); |
| 858 | brelse(dibh); | ||
| 859 | 938 | ||
| 939 | out_brelse: | ||
| 940 | brelse(dibh); | ||
| 860 | out_end_trans: | 941 | out_end_trans: |
| 861 | gfs2_trans_end(sdp); | 942 | gfs2_trans_end(sdp); |
| 862 | out_ipres: | 943 | out_ipres: |
| @@ -986,7 +1067,8 @@ out: | |||
| 986 | 1067 | ||
| 987 | static int trunc_dealloc(struct gfs2_inode *ip, u64 size) | 1068 | static int trunc_dealloc(struct gfs2_inode *ip, u64 size) |
| 988 | { | 1069 | { |
| 989 | unsigned int height = ip->i_di.di_height; | 1070 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 1071 | unsigned int height = ip->i_height; | ||
| 990 | u64 lblock; | 1072 | u64 lblock; |
| 991 | struct metapath mp; | 1073 | struct metapath mp; |
| 992 | int error; | 1074 | int error; |
| @@ -994,10 +1076,11 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size) | |||
| 994 | if (!size) | 1076 | if (!size) |
| 995 | lblock = 0; | 1077 | lblock = 0; |
| 996 | else | 1078 | else |
| 997 | lblock = (size - 1) >> GFS2_SB(&ip->i_inode)->sd_sb.sb_bsize_shift; | 1079 | lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; |
| 998 | 1080 | ||
| 999 | find_metapath(ip, lblock, &mp); | 1081 | find_metapath(sdp, lblock, &mp, ip->i_height); |
| 1000 | gfs2_alloc_get(ip); | 1082 | if (!gfs2_alloc_get(ip)) |
| 1083 | return -ENOMEM; | ||
| 1001 | 1084 | ||
| 1002 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 1085 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); |
| 1003 | if (error) | 1086 | if (error) |
| @@ -1037,10 +1120,8 @@ static int trunc_end(struct gfs2_inode *ip) | |||
| 1037 | goto out; | 1120 | goto out; |
| 1038 | 1121 | ||
| 1039 | if (!ip->i_di.di_size) { | 1122 | if (!ip->i_di.di_size) { |
| 1040 | ip->i_di.di_height = 0; | 1123 | ip->i_height = 0; |
| 1041 | ip->i_di.di_goal_meta = | 1124 | ip->i_goal = ip->i_no_addr; |
| 1042 | ip->i_di.di_goal_data = | ||
| 1043 | ip->i_no_addr; | ||
| 1044 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 1125 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
| 1045 | } | 1126 | } |
| 1046 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1127 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
| @@ -1197,10 +1278,9 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | |||
| 1197 | unsigned int len, int *alloc_required) | 1278 | unsigned int len, int *alloc_required) |
| 1198 | { | 1279 | { |
| 1199 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1280 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 1200 | u64 lblock, lblock_stop, dblock; | 1281 | struct buffer_head bh; |
| 1201 | u32 extlen; | 1282 | unsigned int shift; |
| 1202 | int new = 0; | 1283 | u64 lblock, lblock_stop, size; |
| 1203 | int error = 0; | ||
| 1204 | 1284 | ||
| 1205 | *alloc_required = 0; | 1285 | *alloc_required = 0; |
| 1206 | 1286 | ||
| @@ -1214,6 +1294,8 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | |||
| 1214 | return 0; | 1294 | return 0; |
| 1215 | } | 1295 | } |
| 1216 | 1296 | ||
| 1297 | *alloc_required = 1; | ||
| 1298 | shift = sdp->sd_sb.sb_bsize_shift; | ||
| 1217 | if (gfs2_is_dir(ip)) { | 1299 | if (gfs2_is_dir(ip)) { |
| 1218 | unsigned int bsize = sdp->sd_jbsize; | 1300 | unsigned int bsize = sdp->sd_jbsize; |
| 1219 | lblock = offset; | 1301 | lblock = offset; |
| @@ -1221,27 +1303,25 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | |||
| 1221 | lblock_stop = offset + len + bsize - 1; | 1303 | lblock_stop = offset + len + bsize - 1; |
| 1222 | do_div(lblock_stop, bsize); | 1304 | do_div(lblock_stop, bsize); |
| 1223 | } else { | 1305 | } else { |
| 1224 | unsigned int shift = sdp->sd_sb.sb_bsize_shift; | ||
| 1225 | u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift; | 1306 | u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift; |
| 1226 | lblock = offset >> shift; | 1307 | lblock = offset >> shift; |
| 1227 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; | 1308 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; |
| 1228 | if (lblock_stop > end_of_file) { | 1309 | if (lblock_stop > end_of_file) |
| 1229 | *alloc_required = 1; | ||
| 1230 | return 0; | 1310 | return 0; |
| 1231 | } | ||
| 1232 | } | 1311 | } |
| 1233 | 1312 | ||
| 1234 | for (; lblock < lblock_stop; lblock += extlen) { | 1313 | size = (lblock_stop - lblock) << shift; |
| 1235 | error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen); | 1314 | do { |
| 1236 | if (error) | 1315 | bh.b_state = 0; |
| 1237 | return error; | 1316 | bh.b_size = size; |
| 1238 | 1317 | gfs2_block_map(&ip->i_inode, lblock, &bh, 0); | |
| 1239 | if (!dblock) { | 1318 | if (!buffer_mapped(&bh)) |
| 1240 | *alloc_required = 1; | ||
| 1241 | return 0; | 1319 | return 0; |
| 1242 | } | 1320 | size -= bh.b_size; |
| 1243 | } | 1321 | lblock += (bh.b_size >> ip->i_inode.i_blkbits); |
| 1322 | } while(size > 0); | ||
| 1244 | 1323 | ||
| 1324 | *alloc_required = 0; | ||
| 1245 | return 0; | 1325 | return 0; |
| 1246 | } | 1326 | } |
| 1247 | 1327 | ||
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index c34709512b19..eed040d8ba3a 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
| @@ -159,6 +159,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf, | |||
| 159 | unsigned int o; | 159 | unsigned int o; |
| 160 | int copied = 0; | 160 | int copied = 0; |
| 161 | int error = 0; | 161 | int error = 0; |
| 162 | int new = 0; | ||
| 162 | 163 | ||
| 163 | if (!size) | 164 | if (!size) |
| 164 | return 0; | 165 | return 0; |
| @@ -183,7 +184,6 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf, | |||
| 183 | while (copied < size) { | 184 | while (copied < size) { |
| 184 | unsigned int amount; | 185 | unsigned int amount; |
| 185 | struct buffer_head *bh; | 186 | struct buffer_head *bh; |
| 186 | int new = 0; | ||
| 187 | 187 | ||
| 188 | amount = size - copied; | 188 | amount = size - copied; |
| 189 | if (amount > sdp->sd_sb.sb_bsize - o) | 189 | if (amount > sdp->sd_sb.sb_bsize - o) |
| @@ -757,7 +757,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, | |||
| 757 | 757 | ||
| 758 | if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { | 758 | if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { |
| 759 | struct gfs2_leaf *leaf; | 759 | struct gfs2_leaf *leaf; |
| 760 | unsigned hsize = 1 << ip->i_di.di_depth; | 760 | unsigned hsize = 1 << ip->i_depth; |
| 761 | unsigned index; | 761 | unsigned index; |
| 762 | u64 ln; | 762 | u64 ln; |
| 763 | if (hsize * sizeof(u64) != ip->i_di.di_size) { | 763 | if (hsize * sizeof(u64) != ip->i_di.di_size) { |
| @@ -765,7 +765,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, | |||
| 765 | return ERR_PTR(-EIO); | 765 | return ERR_PTR(-EIO); |
| 766 | } | 766 | } |
| 767 | 767 | ||
| 768 | index = name->hash >> (32 - ip->i_di.di_depth); | 768 | index = name->hash >> (32 - ip->i_depth); |
| 769 | error = get_first_leaf(ip, index, &bh); | 769 | error = get_first_leaf(ip, index, &bh); |
| 770 | if (error) | 770 | if (error) |
| 771 | return ERR_PTR(error); | 771 | return ERR_PTR(error); |
| @@ -803,14 +803,15 @@ got_dent: | |||
| 803 | static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth) | 803 | static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, u16 depth) |
| 804 | { | 804 | { |
| 805 | struct gfs2_inode *ip = GFS2_I(inode); | 805 | struct gfs2_inode *ip = GFS2_I(inode); |
| 806 | u64 bn = gfs2_alloc_meta(ip); | 806 | unsigned int n = 1; |
| 807 | u64 bn = gfs2_alloc_block(ip, &n); | ||
| 807 | struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn); | 808 | struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn); |
| 808 | struct gfs2_leaf *leaf; | 809 | struct gfs2_leaf *leaf; |
| 809 | struct gfs2_dirent *dent; | 810 | struct gfs2_dirent *dent; |
| 810 | struct qstr name = { .name = "", .len = 0, .hash = 0 }; | 811 | struct qstr name = { .name = "", .len = 0, .hash = 0 }; |
| 811 | if (!bh) | 812 | if (!bh) |
| 812 | return NULL; | 813 | return NULL; |
| 813 | 814 | gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); | |
| 814 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 815 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
| 815 | gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); | 816 | gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); |
| 816 | leaf = (struct gfs2_leaf *)bh->b_data; | 817 | leaf = (struct gfs2_leaf *)bh->b_data; |
| @@ -905,12 +906,11 @@ static int dir_make_exhash(struct inode *inode) | |||
| 905 | *lp = cpu_to_be64(bn); | 906 | *lp = cpu_to_be64(bn); |
| 906 | 907 | ||
| 907 | dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2; | 908 | dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2; |
| 908 | dip->i_di.di_blocks++; | 909 | gfs2_add_inode_blocks(&dip->i_inode, 1); |
| 909 | gfs2_set_inode_blocks(&dip->i_inode); | ||
| 910 | dip->i_di.di_flags |= GFS2_DIF_EXHASH; | 910 | dip->i_di.di_flags |= GFS2_DIF_EXHASH; |
| 911 | 911 | ||
| 912 | for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; | 912 | for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; |
| 913 | dip->i_di.di_depth = y; | 913 | dip->i_depth = y; |
| 914 | 914 | ||
| 915 | gfs2_dinode_out(dip, dibh->b_data); | 915 | gfs2_dinode_out(dip, dibh->b_data); |
| 916 | 916 | ||
| @@ -941,7 +941,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
| 941 | int x, moved = 0; | 941 | int x, moved = 0; |
| 942 | int error; | 942 | int error; |
| 943 | 943 | ||
| 944 | index = name->hash >> (32 - dip->i_di.di_depth); | 944 | index = name->hash >> (32 - dip->i_depth); |
| 945 | error = get_leaf_nr(dip, index, &leaf_no); | 945 | error = get_leaf_nr(dip, index, &leaf_no); |
| 946 | if (error) | 946 | if (error) |
| 947 | return error; | 947 | return error; |
| @@ -952,7 +952,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
| 952 | return error; | 952 | return error; |
| 953 | 953 | ||
| 954 | oleaf = (struct gfs2_leaf *)obh->b_data; | 954 | oleaf = (struct gfs2_leaf *)obh->b_data; |
| 955 | if (dip->i_di.di_depth == be16_to_cpu(oleaf->lf_depth)) { | 955 | if (dip->i_depth == be16_to_cpu(oleaf->lf_depth)) { |
| 956 | brelse(obh); | 956 | brelse(obh); |
| 957 | return 1; /* can't split */ | 957 | return 1; /* can't split */ |
| 958 | } | 958 | } |
| @@ -967,10 +967,10 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
| 967 | bn = nbh->b_blocknr; | 967 | bn = nbh->b_blocknr; |
| 968 | 968 | ||
| 969 | /* Compute the start and len of leaf pointers in the hash table. */ | 969 | /* Compute the start and len of leaf pointers in the hash table. */ |
| 970 | len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth)); | 970 | len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth)); |
| 971 | half_len = len >> 1; | 971 | half_len = len >> 1; |
| 972 | if (!half_len) { | 972 | if (!half_len) { |
| 973 | printk(KERN_WARNING "di_depth %u lf_depth %u index %u\n", dip->i_di.di_depth, be16_to_cpu(oleaf->lf_depth), index); | 973 | printk(KERN_WARNING "i_depth %u lf_depth %u index %u\n", dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); |
| 974 | gfs2_consist_inode(dip); | 974 | gfs2_consist_inode(dip); |
| 975 | error = -EIO; | 975 | error = -EIO; |
| 976 | goto fail_brelse; | 976 | goto fail_brelse; |
| @@ -997,7 +997,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
| 997 | kfree(lp); | 997 | kfree(lp); |
| 998 | 998 | ||
| 999 | /* Compute the divider */ | 999 | /* Compute the divider */ |
| 1000 | divider = (start + half_len) << (32 - dip->i_di.di_depth); | 1000 | divider = (start + half_len) << (32 - dip->i_depth); |
| 1001 | 1001 | ||
| 1002 | /* Copy the entries */ | 1002 | /* Copy the entries */ |
| 1003 | dirent_first(dip, obh, &dent); | 1003 | dirent_first(dip, obh, &dent); |
| @@ -1021,13 +1021,13 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
| 1021 | 1021 | ||
| 1022 | new->de_inum = dent->de_inum; /* No endian worries */ | 1022 | new->de_inum = dent->de_inum; /* No endian worries */ |
| 1023 | new->de_type = dent->de_type; /* No endian worries */ | 1023 | new->de_type = dent->de_type; /* No endian worries */ |
| 1024 | nleaf->lf_entries = cpu_to_be16(be16_to_cpu(nleaf->lf_entries)+1); | 1024 | be16_add_cpu(&nleaf->lf_entries, 1); |
| 1025 | 1025 | ||
| 1026 | dirent_del(dip, obh, prev, dent); | 1026 | dirent_del(dip, obh, prev, dent); |
| 1027 | 1027 | ||
| 1028 | if (!oleaf->lf_entries) | 1028 | if (!oleaf->lf_entries) |
| 1029 | gfs2_consist_inode(dip); | 1029 | gfs2_consist_inode(dip); |
| 1030 | oleaf->lf_entries = cpu_to_be16(be16_to_cpu(oleaf->lf_entries)-1); | 1030 | be16_add_cpu(&oleaf->lf_entries, -1); |
| 1031 | 1031 | ||
| 1032 | if (!prev) | 1032 | if (!prev) |
| 1033 | prev = dent; | 1033 | prev = dent; |
| @@ -1044,8 +1044,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
| 1044 | error = gfs2_meta_inode_buffer(dip, &dibh); | 1044 | error = gfs2_meta_inode_buffer(dip, &dibh); |
| 1045 | if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { | 1045 | if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { |
| 1046 | gfs2_trans_add_bh(dip->i_gl, dibh, 1); | 1046 | gfs2_trans_add_bh(dip->i_gl, dibh, 1); |
| 1047 | dip->i_di.di_blocks++; | 1047 | gfs2_add_inode_blocks(&dip->i_inode, 1); |
| 1048 | gfs2_set_inode_blocks(&dip->i_inode); | ||
| 1049 | gfs2_dinode_out(dip, dibh->b_data); | 1048 | gfs2_dinode_out(dip, dibh->b_data); |
| 1050 | brelse(dibh); | 1049 | brelse(dibh); |
| 1051 | } | 1050 | } |
| @@ -1082,7 +1081,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
| 1082 | int x; | 1081 | int x; |
| 1083 | int error = 0; | 1082 | int error = 0; |
| 1084 | 1083 | ||
| 1085 | hsize = 1 << dip->i_di.di_depth; | 1084 | hsize = 1 << dip->i_depth; |
| 1086 | if (hsize * sizeof(u64) != dip->i_di.di_size) { | 1085 | if (hsize * sizeof(u64) != dip->i_di.di_size) { |
| 1087 | gfs2_consist_inode(dip); | 1086 | gfs2_consist_inode(dip); |
| 1088 | return -EIO; | 1087 | return -EIO; |
| @@ -1090,7 +1089,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
| 1090 | 1089 | ||
| 1091 | /* Allocate both the "from" and "to" buffers in one big chunk */ | 1090 | /* Allocate both the "from" and "to" buffers in one big chunk */ |
| 1092 | 1091 | ||
| 1093 | buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL); | 1092 | buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS | __GFP_NOFAIL); |
| 1094 | 1093 | ||
| 1095 | for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) { | 1094 | for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) { |
| 1096 | error = gfs2_dir_read_data(dip, (char *)buf, | 1095 | error = gfs2_dir_read_data(dip, (char *)buf, |
| @@ -1125,7 +1124,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
| 1125 | 1124 | ||
| 1126 | error = gfs2_meta_inode_buffer(dip, &dibh); | 1125 | error = gfs2_meta_inode_buffer(dip, &dibh); |
| 1127 | if (!gfs2_assert_withdraw(sdp, !error)) { | 1126 | if (!gfs2_assert_withdraw(sdp, !error)) { |
| 1128 | dip->i_di.di_depth++; | 1127 | dip->i_depth++; |
| 1129 | gfs2_dinode_out(dip, dibh->b_data); | 1128 | gfs2_dinode_out(dip, dibh->b_data); |
| 1130 | brelse(dibh); | 1129 | brelse(dibh); |
| 1131 | } | 1130 | } |
| @@ -1370,16 +1369,16 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
| 1370 | int error = 0; | 1369 | int error = 0; |
| 1371 | unsigned depth = 0; | 1370 | unsigned depth = 0; |
| 1372 | 1371 | ||
| 1373 | hsize = 1 << dip->i_di.di_depth; | 1372 | hsize = 1 << dip->i_depth; |
| 1374 | if (hsize * sizeof(u64) != dip->i_di.di_size) { | 1373 | if (hsize * sizeof(u64) != dip->i_di.di_size) { |
| 1375 | gfs2_consist_inode(dip); | 1374 | gfs2_consist_inode(dip); |
| 1376 | return -EIO; | 1375 | return -EIO; |
| 1377 | } | 1376 | } |
| 1378 | 1377 | ||
| 1379 | hash = gfs2_dir_offset2hash(*offset); | 1378 | hash = gfs2_dir_offset2hash(*offset); |
| 1380 | index = hash >> (32 - dip->i_di.di_depth); | 1379 | index = hash >> (32 - dip->i_depth); |
| 1381 | 1380 | ||
| 1382 | lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL); | 1381 | lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); |
| 1383 | if (!lp) | 1382 | if (!lp) |
| 1384 | return -ENOMEM; | 1383 | return -ENOMEM; |
| 1385 | 1384 | ||
| @@ -1405,7 +1404,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
| 1405 | if (error) | 1404 | if (error) |
| 1406 | break; | 1405 | break; |
| 1407 | 1406 | ||
| 1408 | len = 1 << (dip->i_di.di_depth - depth); | 1407 | len = 1 << (dip->i_depth - depth); |
| 1409 | index = (index & ~(len - 1)) + len; | 1408 | index = (index & ~(len - 1)) + len; |
| 1410 | } | 1409 | } |
| 1411 | 1410 | ||
| @@ -1444,7 +1443,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | |||
| 1444 | 1443 | ||
| 1445 | error = -ENOMEM; | 1444 | error = -ENOMEM; |
| 1446 | /* 96 is max number of dirents which can be stuffed into an inode */ | 1445 | /* 96 is max number of dirents which can be stuffed into an inode */ |
| 1447 | darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_KERNEL); | 1446 | darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_NOFS); |
| 1448 | if (darr) { | 1447 | if (darr) { |
| 1449 | g.pdent = darr; | 1448 | g.pdent = darr; |
| 1450 | g.offset = 0; | 1449 | g.offset = 0; |
| @@ -1549,7 +1548,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) | |||
| 1549 | u32 index; | 1548 | u32 index; |
| 1550 | u64 bn; | 1549 | u64 bn; |
| 1551 | 1550 | ||
| 1552 | index = name->hash >> (32 - ip->i_di.di_depth); | 1551 | index = name->hash >> (32 - ip->i_depth); |
| 1553 | error = get_first_leaf(ip, index, &obh); | 1552 | error = get_first_leaf(ip, index, &obh); |
| 1554 | if (error) | 1553 | if (error) |
| 1555 | return error; | 1554 | return error; |
| @@ -1579,8 +1578,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) | |||
| 1579 | if (error) | 1578 | if (error) |
| 1580 | return error; | 1579 | return error; |
| 1581 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 1580 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
| 1582 | ip->i_di.di_blocks++; | 1581 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
| 1583 | gfs2_set_inode_blocks(&ip->i_inode); | ||
| 1584 | gfs2_dinode_out(ip, bh->b_data); | 1582 | gfs2_dinode_out(ip, bh->b_data); |
| 1585 | brelse(bh); | 1583 | brelse(bh); |
| 1586 | return 0; | 1584 | return 0; |
| @@ -1616,7 +1614,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, | |||
| 1616 | dent->de_type = cpu_to_be16(type); | 1614 | dent->de_type = cpu_to_be16(type); |
| 1617 | if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { | 1615 | if (ip->i_di.di_flags & GFS2_DIF_EXHASH) { |
| 1618 | leaf = (struct gfs2_leaf *)bh->b_data; | 1616 | leaf = (struct gfs2_leaf *)bh->b_data; |
| 1619 | leaf->lf_entries = cpu_to_be16(be16_to_cpu(leaf->lf_entries) + 1); | 1617 | be16_add_cpu(&leaf->lf_entries, 1); |
| 1620 | } | 1618 | } |
| 1621 | brelse(bh); | 1619 | brelse(bh); |
| 1622 | error = gfs2_meta_inode_buffer(ip, &bh); | 1620 | error = gfs2_meta_inode_buffer(ip, &bh); |
| @@ -1641,7 +1639,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, | |||
| 1641 | continue; | 1639 | continue; |
| 1642 | if (error < 0) | 1640 | if (error < 0) |
| 1643 | break; | 1641 | break; |
| 1644 | if (ip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) { | 1642 | if (ip->i_depth < GFS2_DIR_MAX_DEPTH) { |
| 1645 | error = dir_double_exhash(ip); | 1643 | error = dir_double_exhash(ip); |
| 1646 | if (error) | 1644 | if (error) |
| 1647 | break; | 1645 | break; |
| @@ -1785,13 +1783,13 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) | |||
| 1785 | u64 leaf_no; | 1783 | u64 leaf_no; |
| 1786 | int error = 0; | 1784 | int error = 0; |
| 1787 | 1785 | ||
| 1788 | hsize = 1 << dip->i_di.di_depth; | 1786 | hsize = 1 << dip->i_depth; |
| 1789 | if (hsize * sizeof(u64) != dip->i_di.di_size) { | 1787 | if (hsize * sizeof(u64) != dip->i_di.di_size) { |
| 1790 | gfs2_consist_inode(dip); | 1788 | gfs2_consist_inode(dip); |
| 1791 | return -EIO; | 1789 | return -EIO; |
| 1792 | } | 1790 | } |
| 1793 | 1791 | ||
| 1794 | lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL); | 1792 | lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); |
| 1795 | if (!lp) | 1793 | if (!lp) |
| 1796 | return -ENOMEM; | 1794 | return -ENOMEM; |
| 1797 | 1795 | ||
| @@ -1817,7 +1815,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) | |||
| 1817 | if (error) | 1815 | if (error) |
| 1818 | goto out; | 1816 | goto out; |
| 1819 | leaf = (struct gfs2_leaf *)bh->b_data; | 1817 | leaf = (struct gfs2_leaf *)bh->b_data; |
| 1820 | len = 1 << (dip->i_di.di_depth - be16_to_cpu(leaf->lf_depth)); | 1818 | len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth)); |
| 1821 | brelse(bh); | 1819 | brelse(bh); |
| 1822 | 1820 | ||
| 1823 | error = lc(dip, index, len, leaf_no, data); | 1821 | error = lc(dip, index, len, leaf_no, data); |
| @@ -1866,15 +1864,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, | |||
| 1866 | 1864 | ||
| 1867 | memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); | 1865 | memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); |
| 1868 | 1866 | ||
| 1869 | ht = kzalloc(size, GFP_KERNEL); | 1867 | ht = kzalloc(size, GFP_NOFS); |
| 1870 | if (!ht) | 1868 | if (!ht) |
| 1871 | return -ENOMEM; | 1869 | return -ENOMEM; |
| 1872 | 1870 | ||
| 1873 | gfs2_alloc_get(dip); | 1871 | if (!gfs2_alloc_get(dip)) { |
| 1872 | error = -ENOMEM; | ||
| 1873 | goto out; | ||
| 1874 | } | ||
| 1874 | 1875 | ||
| 1875 | error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 1876 | error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); |
| 1876 | if (error) | 1877 | if (error) |
| 1877 | goto out; | 1878 | goto out_put; |
| 1878 | 1879 | ||
| 1879 | error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh); | 1880 | error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh); |
| 1880 | if (error) | 1881 | if (error) |
| @@ -1894,7 +1895,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, | |||
| 1894 | l_blocks++; | 1895 | l_blocks++; |
| 1895 | } | 1896 | } |
| 1896 | 1897 | ||
| 1897 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); | 1898 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); |
| 1898 | 1899 | ||
| 1899 | for (x = 0; x < rlist.rl_rgrps; x++) { | 1900 | for (x = 0; x < rlist.rl_rgrps; x++) { |
| 1900 | struct gfs2_rgrpd *rgd; | 1901 | struct gfs2_rgrpd *rgd; |
| @@ -1921,11 +1922,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, | |||
| 1921 | brelse(bh); | 1922 | brelse(bh); |
| 1922 | 1923 | ||
| 1923 | gfs2_free_meta(dip, blk, 1); | 1924 | gfs2_free_meta(dip, blk, 1); |
| 1924 | 1925 | gfs2_add_inode_blocks(&dip->i_inode, -1); | |
| 1925 | if (!dip->i_di.di_blocks) | ||
| 1926 | gfs2_consist_inode(dip); | ||
| 1927 | dip->i_di.di_blocks--; | ||
| 1928 | gfs2_set_inode_blocks(&dip->i_inode); | ||
| 1929 | } | 1926 | } |
| 1930 | 1927 | ||
| 1931 | error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size); | 1928 | error = gfs2_dir_write_data(dip, ht, index * sizeof(u64), size); |
| @@ -1952,8 +1949,9 @@ out_rlist: | |||
| 1952 | gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh); | 1949 | gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh); |
| 1953 | out_qs: | 1950 | out_qs: |
| 1954 | gfs2_quota_unhold(dip); | 1951 | gfs2_quota_unhold(dip); |
| 1955 | out: | 1952 | out_put: |
| 1956 | gfs2_alloc_put(dip); | 1953 | gfs2_alloc_put(dip); |
| 1954 | out: | ||
| 1957 | kfree(ht); | 1955 | kfree(ht); |
| 1958 | return error; | 1956 | return error; |
| 1959 | } | 1957 | } |
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index bee99704ea10..e3f76f451b0a 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c | |||
| @@ -277,10 +277,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, | |||
| 277 | } | 277 | } |
| 278 | 278 | ||
| 279 | *dataptrs = 0; | 279 | *dataptrs = 0; |
| 280 | if (!ip->i_di.di_blocks) | 280 | gfs2_add_inode_blocks(&ip->i_inode, -1); |
| 281 | gfs2_consist_inode(ip); | ||
| 282 | ip->i_di.di_blocks--; | ||
| 283 | gfs2_set_inode_blocks(&ip->i_inode); | ||
| 284 | } | 281 | } |
| 285 | if (bstart) | 282 | if (bstart) |
| 286 | gfs2_free_meta(ip, bstart, blen); | 283 | gfs2_free_meta(ip, bstart, blen); |
| @@ -321,6 +318,8 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, | |||
| 321 | int error; | 318 | int error; |
| 322 | 319 | ||
| 323 | al = gfs2_alloc_get(ip); | 320 | al = gfs2_alloc_get(ip); |
| 321 | if (!al) | ||
| 322 | return -ENOMEM; | ||
| 324 | 323 | ||
| 325 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 324 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); |
| 326 | if (error) | 325 | if (error) |
| @@ -449,7 +448,7 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, | |||
| 449 | unsigned int x; | 448 | unsigned int x; |
| 450 | int error = 0; | 449 | int error = 0; |
| 451 | 450 | ||
| 452 | bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL); | 451 | bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); |
| 453 | if (!bh) | 452 | if (!bh) |
| 454 | return -ENOMEM; | 453 | return -ENOMEM; |
| 455 | 454 | ||
| @@ -582,10 +581,11 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) | |||
| 582 | { | 581 | { |
| 583 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 582 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 584 | struct gfs2_ea_header *ea; | 583 | struct gfs2_ea_header *ea; |
| 584 | unsigned int n = 1; | ||
| 585 | u64 block; | 585 | u64 block; |
| 586 | 586 | ||
| 587 | block = gfs2_alloc_meta(ip); | 587 | block = gfs2_alloc_block(ip, &n); |
| 588 | 588 | gfs2_trans_add_unrevoke(sdp, block, 1); | |
| 589 | *bhp = gfs2_meta_new(ip->i_gl, block); | 589 | *bhp = gfs2_meta_new(ip->i_gl, block); |
| 590 | gfs2_trans_add_bh(ip->i_gl, *bhp, 1); | 590 | gfs2_trans_add_bh(ip->i_gl, *bhp, 1); |
| 591 | gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); | 591 | gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); |
| @@ -597,8 +597,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) | |||
| 597 | ea->ea_flags = GFS2_EAFLAG_LAST; | 597 | ea->ea_flags = GFS2_EAFLAG_LAST; |
| 598 | ea->ea_num_ptrs = 0; | 598 | ea->ea_num_ptrs = 0; |
| 599 | 599 | ||
| 600 | ip->i_di.di_blocks++; | 600 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
| 601 | gfs2_set_inode_blocks(&ip->i_inode); | ||
| 602 | 601 | ||
| 603 | return 0; | 602 | return 0; |
| 604 | } | 603 | } |
| @@ -642,15 +641,15 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, | |||
| 642 | struct buffer_head *bh; | 641 | struct buffer_head *bh; |
| 643 | u64 block; | 642 | u64 block; |
| 644 | int mh_size = sizeof(struct gfs2_meta_header); | 643 | int mh_size = sizeof(struct gfs2_meta_header); |
| 644 | unsigned int n = 1; | ||
| 645 | 645 | ||
| 646 | block = gfs2_alloc_meta(ip); | 646 | block = gfs2_alloc_block(ip, &n); |
| 647 | 647 | gfs2_trans_add_unrevoke(sdp, block, 1); | |
| 648 | bh = gfs2_meta_new(ip->i_gl, block); | 648 | bh = gfs2_meta_new(ip->i_gl, block); |
| 649 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | 649 | gfs2_trans_add_bh(ip->i_gl, bh, 1); |
| 650 | gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); | 650 | gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); |
| 651 | 651 | ||
| 652 | ip->i_di.di_blocks++; | 652 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
| 653 | gfs2_set_inode_blocks(&ip->i_inode); | ||
| 654 | 653 | ||
| 655 | copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize : | 654 | copy = data_len > sdp->sd_jbsize ? sdp->sd_jbsize : |
| 656 | data_len; | 655 | data_len; |
| @@ -684,15 +683,13 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
| 684 | int error; | 683 | int error; |
| 685 | 684 | ||
| 686 | al = gfs2_alloc_get(ip); | 685 | al = gfs2_alloc_get(ip); |
| 686 | if (!al) | ||
| 687 | return -ENOMEM; | ||
| 687 | 688 | ||
| 688 | error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 689 | error = gfs2_quota_lock_check(ip); |
| 689 | if (error) | 690 | if (error) |
| 690 | goto out; | 691 | goto out; |
| 691 | 692 | ||
| 692 | error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
| 693 | if (error) | ||
| 694 | goto out_gunlock_q; | ||
| 695 | |||
| 696 | al->al_requested = blks; | 693 | al->al_requested = blks; |
| 697 | 694 | ||
| 698 | error = gfs2_inplace_reserve(ip); | 695 | error = gfs2_inplace_reserve(ip); |
| @@ -966,9 +963,9 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
| 966 | gfs2_trans_add_bh(ip->i_gl, indbh, 1); | 963 | gfs2_trans_add_bh(ip->i_gl, indbh, 1); |
| 967 | } else { | 964 | } else { |
| 968 | u64 blk; | 965 | u64 blk; |
| 969 | 966 | unsigned int n = 1; | |
| 970 | blk = gfs2_alloc_meta(ip); | 967 | blk = gfs2_alloc_block(ip, &n); |
| 971 | 968 | gfs2_trans_add_unrevoke(sdp, blk, 1); | |
| 972 | indbh = gfs2_meta_new(ip->i_gl, blk); | 969 | indbh = gfs2_meta_new(ip->i_gl, blk); |
| 973 | gfs2_trans_add_bh(ip->i_gl, indbh, 1); | 970 | gfs2_trans_add_bh(ip->i_gl, indbh, 1); |
| 974 | gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); | 971 | gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); |
| @@ -978,8 +975,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
| 978 | *eablk = cpu_to_be64(ip->i_di.di_eattr); | 975 | *eablk = cpu_to_be64(ip->i_di.di_eattr); |
| 979 | ip->i_di.di_eattr = blk; | 976 | ip->i_di.di_eattr = blk; |
| 980 | ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT; | 977 | ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT; |
| 981 | ip->i_di.di_blocks++; | 978 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
| 982 | gfs2_set_inode_blocks(&ip->i_inode); | ||
| 983 | 979 | ||
| 984 | eablk++; | 980 | eablk++; |
| 985 | } | 981 | } |
| @@ -1210,7 +1206,7 @@ static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, | |||
| 1210 | unsigned int x; | 1206 | unsigned int x; |
| 1211 | int error; | 1207 | int error; |
| 1212 | 1208 | ||
| 1213 | bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL); | 1209 | bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS); |
| 1214 | if (!bh) | 1210 | if (!bh) |
| 1215 | return -ENOMEM; | 1211 | return -ENOMEM; |
| 1216 | 1212 | ||
| @@ -1347,7 +1343,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) | |||
| 1347 | else | 1343 | else |
| 1348 | goto out; | 1344 | goto out; |
| 1349 | 1345 | ||
| 1350 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); | 1346 | gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE); |
| 1351 | 1347 | ||
| 1352 | for (x = 0; x < rlist.rl_rgrps; x++) { | 1348 | for (x = 0; x < rlist.rl_rgrps; x++) { |
| 1353 | struct gfs2_rgrpd *rgd; | 1349 | struct gfs2_rgrpd *rgd; |
| @@ -1387,10 +1383,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) | |||
| 1387 | } | 1383 | } |
| 1388 | 1384 | ||
| 1389 | *eablk = 0; | 1385 | *eablk = 0; |
| 1390 | if (!ip->i_di.di_blocks) | 1386 | gfs2_add_inode_blocks(&ip->i_inode, -1); |
| 1391 | gfs2_consist_inode(ip); | ||
| 1392 | ip->i_di.di_blocks--; | ||
| 1393 | gfs2_set_inode_blocks(&ip->i_inode); | ||
| 1394 | } | 1387 | } |
| 1395 | if (bstart) | 1388 | if (bstart) |
| 1396 | gfs2_free_meta(ip, bstart, blen); | 1389 | gfs2_free_meta(ip, bstart, blen); |
| @@ -1442,10 +1435,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip) | |||
| 1442 | gfs2_free_meta(ip, ip->i_di.di_eattr, 1); | 1435 | gfs2_free_meta(ip, ip->i_di.di_eattr, 1); |
| 1443 | 1436 | ||
| 1444 | ip->i_di.di_eattr = 0; | 1437 | ip->i_di.di_eattr = 0; |
| 1445 | if (!ip->i_di.di_blocks) | 1438 | gfs2_add_inode_blocks(&ip->i_inode, -1); |
| 1446 | gfs2_consist_inode(ip); | ||
| 1447 | ip->i_di.di_blocks--; | ||
| 1448 | gfs2_set_inode_blocks(&ip->i_inode); | ||
| 1449 | 1439 | ||
| 1450 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1440 | error = gfs2_meta_inode_buffer(ip, &dibh); |
| 1451 | if (!error) { | 1441 | if (!error) { |
| @@ -1474,6 +1464,8 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) | |||
| 1474 | int error; | 1464 | int error; |
| 1475 | 1465 | ||
| 1476 | al = gfs2_alloc_get(ip); | 1466 | al = gfs2_alloc_get(ip); |
| 1467 | if (!al) | ||
| 1468 | return -ENOMEM; | ||
| 1477 | 1469 | ||
| 1478 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 1470 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); |
| 1479 | if (error) | 1471 | if (error) |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 7175a4d06435..d636b3e80f5d 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
| 4 | * | 4 | * |
| 5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
| @@ -35,7 +35,6 @@ | |||
| 35 | #include "glock.h" | 35 | #include "glock.h" |
| 36 | #include "glops.h" | 36 | #include "glops.h" |
| 37 | #include "inode.h" | 37 | #include "inode.h" |
| 38 | #include "lm.h" | ||
| 39 | #include "lops.h" | 38 | #include "lops.h" |
| 40 | #include "meta_io.h" | 39 | #include "meta_io.h" |
| 41 | #include "quota.h" | 40 | #include "quota.h" |
| @@ -183,7 +182,8 @@ static void glock_free(struct gfs2_glock *gl) | |||
| 183 | struct gfs2_sbd *sdp = gl->gl_sbd; | 182 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| 184 | struct inode *aspace = gl->gl_aspace; | 183 | struct inode *aspace = gl->gl_aspace; |
| 185 | 184 | ||
| 186 | gfs2_lm_put_lock(sdp, gl->gl_lock); | 185 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 186 | sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock); | ||
| 187 | 187 | ||
| 188 | if (aspace) | 188 | if (aspace) |
| 189 | gfs2_aspace_put(aspace); | 189 | gfs2_aspace_put(aspace); |
| @@ -197,7 +197,7 @@ static void glock_free(struct gfs2_glock *gl) | |||
| 197 | * | 197 | * |
| 198 | */ | 198 | */ |
| 199 | 199 | ||
| 200 | void gfs2_glock_hold(struct gfs2_glock *gl) | 200 | static void gfs2_glock_hold(struct gfs2_glock *gl) |
| 201 | { | 201 | { |
| 202 | atomic_inc(&gl->gl_ref); | 202 | atomic_inc(&gl->gl_ref); |
| 203 | } | 203 | } |
| @@ -293,6 +293,16 @@ static void glock_work_func(struct work_struct *work) | |||
| 293 | gfs2_glock_put(gl); | 293 | gfs2_glock_put(gl); |
| 294 | } | 294 | } |
| 295 | 295 | ||
| 296 | static int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 297 | void **lockp) | ||
| 298 | { | ||
| 299 | int error = -EIO; | ||
| 300 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 301 | error = sdp->sd_lockstruct.ls_ops->lm_get_lock( | ||
| 302 | sdp->sd_lockstruct.ls_lockspace, name, lockp); | ||
| 303 | return error; | ||
| 304 | } | ||
| 305 | |||
| 296 | /** | 306 | /** |
| 297 | * gfs2_glock_get() - Get a glock, or create one if one doesn't exist | 307 | * gfs2_glock_get() - Get a glock, or create one if one doesn't exist |
| 298 | * @sdp: The GFS2 superblock | 308 | * @sdp: The GFS2 superblock |
| @@ -338,8 +348,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
| 338 | gl->gl_ip = 0; | 348 | gl->gl_ip = 0; |
| 339 | gl->gl_ops = glops; | 349 | gl->gl_ops = glops; |
| 340 | gl->gl_req_gh = NULL; | 350 | gl->gl_req_gh = NULL; |
| 341 | gl->gl_req_bh = NULL; | ||
| 342 | gl->gl_vn = 0; | ||
| 343 | gl->gl_stamp = jiffies; | 351 | gl->gl_stamp = jiffies; |
| 344 | gl->gl_tchange = jiffies; | 352 | gl->gl_tchange = jiffies; |
| 345 | gl->gl_object = NULL; | 353 | gl->gl_object = NULL; |
| @@ -595,11 +603,12 @@ static void run_queue(struct gfs2_glock *gl) | |||
| 595 | blocked = rq_mutex(gh); | 603 | blocked = rq_mutex(gh); |
| 596 | } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { | 604 | } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { |
| 597 | blocked = rq_demote(gl); | 605 | blocked = rq_demote(gl); |
| 598 | if (gl->gl_waiters2 && !blocked) { | 606 | if (test_bit(GLF_WAITERS2, &gl->gl_flags) && |
| 607 | !blocked) { | ||
| 599 | set_bit(GLF_DEMOTE, &gl->gl_flags); | 608 | set_bit(GLF_DEMOTE, &gl->gl_flags); |
| 600 | gl->gl_demote_state = LM_ST_UNLOCKED; | 609 | gl->gl_demote_state = LM_ST_UNLOCKED; |
| 601 | } | 610 | } |
| 602 | gl->gl_waiters2 = 0; | 611 | clear_bit(GLF_WAITERS2, &gl->gl_flags); |
| 603 | } else if (!list_empty(&gl->gl_waiters3)) { | 612 | } else if (!list_empty(&gl->gl_waiters3)) { |
| 604 | gh = list_entry(gl->gl_waiters3.next, | 613 | gh = list_entry(gl->gl_waiters3.next, |
| 605 | struct gfs2_holder, gh_list); | 614 | struct gfs2_holder, gh_list); |
| @@ -710,7 +719,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, | |||
| 710 | } else if (gl->gl_demote_state != LM_ST_UNLOCKED && | 719 | } else if (gl->gl_demote_state != LM_ST_UNLOCKED && |
| 711 | gl->gl_demote_state != state) { | 720 | gl->gl_demote_state != state) { |
| 712 | if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) | 721 | if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) |
| 713 | gl->gl_waiters2 = 1; | 722 | set_bit(GLF_WAITERS2, &gl->gl_flags); |
| 714 | else | 723 | else |
| 715 | gl->gl_demote_state = LM_ST_UNLOCKED; | 724 | gl->gl_demote_state = LM_ST_UNLOCKED; |
| 716 | } | 725 | } |
| @@ -743,6 +752,43 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) | |||
| 743 | } | 752 | } |
| 744 | 753 | ||
| 745 | /** | 754 | /** |
| 755 | * drop_bh - Called after a lock module unlock completes | ||
| 756 | * @gl: the glock | ||
| 757 | * @ret: the return status | ||
| 758 | * | ||
| 759 | * Doesn't wake up the process waiting on the struct gfs2_holder (if any) | ||
| 760 | * Doesn't drop the reference on the glock the top half took out | ||
| 761 | * | ||
| 762 | */ | ||
| 763 | |||
| 764 | static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | ||
| 765 | { | ||
| 766 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 767 | struct gfs2_holder *gh = gl->gl_req_gh; | ||
| 768 | |||
| 769 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 770 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | ||
| 771 | gfs2_assert_warn(sdp, !ret); | ||
| 772 | |||
| 773 | state_change(gl, LM_ST_UNLOCKED); | ||
| 774 | |||
| 775 | if (test_and_clear_bit(GLF_CONV_DEADLK, &gl->gl_flags)) { | ||
| 776 | spin_lock(&gl->gl_spin); | ||
| 777 | gh->gh_error = 0; | ||
| 778 | spin_unlock(&gl->gl_spin); | ||
| 779 | gfs2_glock_xmote_th(gl, gl->gl_req_gh); | ||
| 780 | gfs2_glock_put(gl); | ||
| 781 | return; | ||
| 782 | } | ||
| 783 | |||
| 784 | spin_lock(&gl->gl_spin); | ||
| 785 | gfs2_demote_wake(gl); | ||
| 786 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 787 | spin_unlock(&gl->gl_spin); | ||
| 788 | gfs2_glock_put(gl); | ||
| 789 | } | ||
| 790 | |||
| 791 | /** | ||
| 746 | * xmote_bh - Called after the lock module is done acquiring a lock | 792 | * xmote_bh - Called after the lock module is done acquiring a lock |
| 747 | * @gl: The glock in question | 793 | * @gl: The glock in question |
| 748 | * @ret: the int returned from the lock module | 794 | * @ret: the int returned from the lock module |
| @@ -754,25 +800,19 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
| 754 | struct gfs2_sbd *sdp = gl->gl_sbd; | 800 | struct gfs2_sbd *sdp = gl->gl_sbd; |
| 755 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 801 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
| 756 | struct gfs2_holder *gh = gl->gl_req_gh; | 802 | struct gfs2_holder *gh = gl->gl_req_gh; |
| 757 | int prev_state = gl->gl_state; | ||
| 758 | int op_done = 1; | 803 | int op_done = 1; |
| 759 | 804 | ||
| 805 | if (!gh && (ret & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) { | ||
| 806 | drop_bh(gl, ret); | ||
| 807 | return; | ||
| 808 | } | ||
| 809 | |||
| 760 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 810 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
| 761 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | 811 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
| 762 | gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); | 812 | gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC)); |
| 763 | 813 | ||
| 764 | state_change(gl, ret & LM_OUT_ST_MASK); | 814 | state_change(gl, ret & LM_OUT_ST_MASK); |
| 765 | 815 | ||
| 766 | if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) { | ||
| 767 | if (glops->go_inval) | ||
| 768 | glops->go_inval(gl, DIO_METADATA); | ||
| 769 | } else if (gl->gl_state == LM_ST_DEFERRED) { | ||
| 770 | /* We might not want to do this here. | ||
| 771 | Look at moving to the inode glops. */ | ||
| 772 | if (glops->go_inval) | ||
| 773 | glops->go_inval(gl, 0); | ||
| 774 | } | ||
| 775 | |||
| 776 | /* Deal with each possible exit condition */ | 816 | /* Deal with each possible exit condition */ |
| 777 | 817 | ||
| 778 | if (!gh) { | 818 | if (!gh) { |
| @@ -782,7 +822,6 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
| 782 | } else { | 822 | } else { |
| 783 | spin_lock(&gl->gl_spin); | 823 | spin_lock(&gl->gl_spin); |
| 784 | if (gl->gl_state != gl->gl_demote_state) { | 824 | if (gl->gl_state != gl->gl_demote_state) { |
| 785 | gl->gl_req_bh = NULL; | ||
| 786 | spin_unlock(&gl->gl_spin); | 825 | spin_unlock(&gl->gl_spin); |
| 787 | gfs2_glock_drop_th(gl); | 826 | gfs2_glock_drop_th(gl); |
| 788 | gfs2_glock_put(gl); | 827 | gfs2_glock_put(gl); |
| @@ -793,6 +832,14 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) | |||
| 793 | } | 832 | } |
| 794 | } else { | 833 | } else { |
| 795 | spin_lock(&gl->gl_spin); | 834 | spin_lock(&gl->gl_spin); |
| 835 | if (ret & LM_OUT_CONV_DEADLK) { | ||
| 836 | gh->gh_error = 0; | ||
| 837 | set_bit(GLF_CONV_DEADLK, &gl->gl_flags); | ||
| 838 | spin_unlock(&gl->gl_spin); | ||
| 839 | gfs2_glock_drop_th(gl); | ||
| 840 | gfs2_glock_put(gl); | ||
| 841 | return; | ||
| 842 | } | ||
| 796 | list_del_init(&gh->gh_list); | 843 | list_del_init(&gh->gh_list); |
| 797 | gh->gh_error = -EIO; | 844 | gh->gh_error = -EIO; |
| 798 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | 845 | if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| @@ -824,7 +871,6 @@ out: | |||
| 824 | if (op_done) { | 871 | if (op_done) { |
| 825 | spin_lock(&gl->gl_spin); | 872 | spin_lock(&gl->gl_spin); |
| 826 | gl->gl_req_gh = NULL; | 873 | gl->gl_req_gh = NULL; |
| 827 | gl->gl_req_bh = NULL; | ||
| 828 | clear_bit(GLF_LOCK, &gl->gl_flags); | 874 | clear_bit(GLF_LOCK, &gl->gl_flags); |
| 829 | spin_unlock(&gl->gl_spin); | 875 | spin_unlock(&gl->gl_spin); |
| 830 | } | 876 | } |
| @@ -835,6 +881,17 @@ out: | |||
| 835 | gfs2_holder_wake(gh); | 881 | gfs2_holder_wake(gh); |
| 836 | } | 882 | } |
| 837 | 883 | ||
| 884 | static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, | ||
| 885 | unsigned int cur_state, unsigned int req_state, | ||
| 886 | unsigned int flags) | ||
| 887 | { | ||
| 888 | int ret = 0; | ||
| 889 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 890 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, | ||
| 891 | req_state, flags); | ||
| 892 | return ret; | ||
| 893 | } | ||
| 894 | |||
| 838 | /** | 895 | /** |
| 839 | * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock | 896 | * gfs2_glock_xmote_th - Call into the lock module to acquire or change a glock |
| 840 | * @gl: The glock in question | 897 | * @gl: The glock in question |
| @@ -856,6 +913,8 @@ static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) | |||
| 856 | 913 | ||
| 857 | if (glops->go_xmote_th) | 914 | if (glops->go_xmote_th) |
| 858 | glops->go_xmote_th(gl); | 915 | glops->go_xmote_th(gl); |
| 916 | if (state == LM_ST_DEFERRED && glops->go_inval) | ||
| 917 | glops->go_inval(gl, DIO_METADATA); | ||
| 859 | 918 | ||
| 860 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 919 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
| 861 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | 920 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
| @@ -863,7 +922,6 @@ static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) | |||
| 863 | gfs2_assert_warn(sdp, state != gl->gl_state); | 922 | gfs2_assert_warn(sdp, state != gl->gl_state); |
| 864 | 923 | ||
| 865 | gfs2_glock_hold(gl); | 924 | gfs2_glock_hold(gl); |
| 866 | gl->gl_req_bh = xmote_bh; | ||
| 867 | 925 | ||
| 868 | lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags); | 926 | lck_ret = gfs2_lm_lock(sdp, gl->gl_lock, gl->gl_state, state, lck_flags); |
| 869 | 927 | ||
| @@ -876,49 +934,13 @@ static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) | |||
| 876 | xmote_bh(gl, lck_ret); | 934 | xmote_bh(gl, lck_ret); |
| 877 | } | 935 | } |
| 878 | 936 | ||
| 879 | /** | 937 | static unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock, |
| 880 | * drop_bh - Called after a lock module unlock completes | 938 | unsigned int cur_state) |
| 881 | * @gl: the glock | ||
| 882 | * @ret: the return status | ||
| 883 | * | ||
| 884 | * Doesn't wake up the process waiting on the struct gfs2_holder (if any) | ||
| 885 | * Doesn't drop the reference on the glock the top half took out | ||
| 886 | * | ||
| 887 | */ | ||
| 888 | |||
| 889 | static void drop_bh(struct gfs2_glock *gl, unsigned int ret) | ||
| 890 | { | 939 | { |
| 891 | struct gfs2_sbd *sdp = gl->gl_sbd; | 940 | int ret = 0; |
| 892 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 941 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 893 | struct gfs2_holder *gh = gl->gl_req_gh; | 942 | ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state); |
| 894 | 943 | return ret; | |
| 895 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | ||
| 896 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | ||
| 897 | gfs2_assert_warn(sdp, !ret); | ||
| 898 | |||
| 899 | state_change(gl, LM_ST_UNLOCKED); | ||
| 900 | |||
| 901 | if (glops->go_inval) | ||
| 902 | glops->go_inval(gl, DIO_METADATA); | ||
| 903 | |||
| 904 | if (gh) { | ||
| 905 | spin_lock(&gl->gl_spin); | ||
| 906 | list_del_init(&gh->gh_list); | ||
| 907 | gh->gh_error = 0; | ||
| 908 | spin_unlock(&gl->gl_spin); | ||
| 909 | } | ||
| 910 | |||
| 911 | spin_lock(&gl->gl_spin); | ||
| 912 | gfs2_demote_wake(gl); | ||
| 913 | gl->gl_req_gh = NULL; | ||
| 914 | gl->gl_req_bh = NULL; | ||
| 915 | clear_bit(GLF_LOCK, &gl->gl_flags); | ||
| 916 | spin_unlock(&gl->gl_spin); | ||
| 917 | |||
| 918 | gfs2_glock_put(gl); | ||
| 919 | |||
| 920 | if (gh) | ||
| 921 | gfs2_holder_wake(gh); | ||
| 922 | } | 944 | } |
| 923 | 945 | ||
| 924 | /** | 946 | /** |
| @@ -935,13 +957,14 @@ static void gfs2_glock_drop_th(struct gfs2_glock *gl) | |||
| 935 | 957 | ||
| 936 | if (glops->go_xmote_th) | 958 | if (glops->go_xmote_th) |
| 937 | glops->go_xmote_th(gl); | 959 | glops->go_xmote_th(gl); |
| 960 | if (glops->go_inval) | ||
| 961 | glops->go_inval(gl, DIO_METADATA); | ||
| 938 | 962 | ||
| 939 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); | 963 | gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags)); |
| 940 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); | 964 | gfs2_assert_warn(sdp, list_empty(&gl->gl_holders)); |
| 941 | gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); | 965 | gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED); |
| 942 | 966 | ||
| 943 | gfs2_glock_hold(gl); | 967 | gfs2_glock_hold(gl); |
| 944 | gl->gl_req_bh = drop_bh; | ||
| 945 | 968 | ||
| 946 | ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state); | 969 | ret = gfs2_lm_unlock(sdp, gl->gl_lock, gl->gl_state); |
| 947 | 970 | ||
| @@ -964,16 +987,17 @@ static void gfs2_glock_drop_th(struct gfs2_glock *gl) | |||
| 964 | static void do_cancels(struct gfs2_holder *gh) | 987 | static void do_cancels(struct gfs2_holder *gh) |
| 965 | { | 988 | { |
| 966 | struct gfs2_glock *gl = gh->gh_gl; | 989 | struct gfs2_glock *gl = gh->gh_gl; |
| 990 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 967 | 991 | ||
| 968 | spin_lock(&gl->gl_spin); | 992 | spin_lock(&gl->gl_spin); |
| 969 | 993 | ||
| 970 | while (gl->gl_req_gh != gh && | 994 | while (gl->gl_req_gh != gh && |
| 971 | !test_bit(HIF_HOLDER, &gh->gh_iflags) && | 995 | !test_bit(HIF_HOLDER, &gh->gh_iflags) && |
| 972 | !list_empty(&gh->gh_list)) { | 996 | !list_empty(&gh->gh_list)) { |
| 973 | if (gl->gl_req_bh && !(gl->gl_req_gh && | 997 | if (!(gl->gl_req_gh && (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) { |
| 974 | (gl->gl_req_gh->gh_flags & GL_NOCANCEL))) { | ||
| 975 | spin_unlock(&gl->gl_spin); | 998 | spin_unlock(&gl->gl_spin); |
| 976 | gfs2_lm_cancel(gl->gl_sbd, gl->gl_lock); | 999 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 1000 | sdp->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock); | ||
| 977 | msleep(100); | 1001 | msleep(100); |
| 978 | spin_lock(&gl->gl_spin); | 1002 | spin_lock(&gl->gl_spin); |
| 979 | } else { | 1003 | } else { |
| @@ -1041,7 +1065,6 @@ static int glock_wait_internal(struct gfs2_holder *gh) | |||
| 1041 | 1065 | ||
| 1042 | spin_lock(&gl->gl_spin); | 1066 | spin_lock(&gl->gl_spin); |
| 1043 | gl->gl_req_gh = NULL; | 1067 | gl->gl_req_gh = NULL; |
| 1044 | gl->gl_req_bh = NULL; | ||
| 1045 | clear_bit(GLF_LOCK, &gl->gl_flags); | 1068 | clear_bit(GLF_LOCK, &gl->gl_flags); |
| 1046 | run_queue(gl); | 1069 | run_queue(gl); |
| 1047 | spin_unlock(&gl->gl_spin); | 1070 | spin_unlock(&gl->gl_spin); |
| @@ -1428,6 +1451,14 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) | |||
| 1428 | gfs2_glock_dq_uninit(&ghs[x]); | 1451 | gfs2_glock_dq_uninit(&ghs[x]); |
| 1429 | } | 1452 | } |
| 1430 | 1453 | ||
| 1454 | static int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) | ||
| 1455 | { | ||
| 1456 | int error = -EIO; | ||
| 1457 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 1458 | error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp); | ||
| 1459 | return error; | ||
| 1460 | } | ||
| 1461 | |||
| 1431 | /** | 1462 | /** |
| 1432 | * gfs2_lvb_hold - attach a LVB from a glock | 1463 | * gfs2_lvb_hold - attach a LVB from a glock |
| 1433 | * @gl: The glock in question | 1464 | * @gl: The glock in question |
| @@ -1463,12 +1494,15 @@ int gfs2_lvb_hold(struct gfs2_glock *gl) | |||
| 1463 | 1494 | ||
| 1464 | void gfs2_lvb_unhold(struct gfs2_glock *gl) | 1495 | void gfs2_lvb_unhold(struct gfs2_glock *gl) |
| 1465 | { | 1496 | { |
| 1497 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 1498 | |||
| 1466 | gfs2_glock_hold(gl); | 1499 | gfs2_glock_hold(gl); |
| 1467 | gfs2_glmutex_lock(gl); | 1500 | gfs2_glmutex_lock(gl); |
| 1468 | 1501 | ||
| 1469 | gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); | 1502 | gfs2_assert(gl->gl_sbd, atomic_read(&gl->gl_lvb_count) > 0); |
| 1470 | if (atomic_dec_and_test(&gl->gl_lvb_count)) { | 1503 | if (atomic_dec_and_test(&gl->gl_lvb_count)) { |
| 1471 | gfs2_lm_unhold_lvb(gl->gl_sbd, gl->gl_lock, gl->gl_lvb); | 1504 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) |
| 1505 | sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock, gl->gl_lvb); | ||
| 1472 | gl->gl_lvb = NULL; | 1506 | gl->gl_lvb = NULL; |
| 1473 | gfs2_glock_put(gl); | 1507 | gfs2_glock_put(gl); |
| 1474 | } | 1508 | } |
| @@ -1534,8 +1568,7 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) | |||
| 1534 | gl = gfs2_glock_find(sdp, &async->lc_name); | 1568 | gl = gfs2_glock_find(sdp, &async->lc_name); |
| 1535 | if (gfs2_assert_warn(sdp, gl)) | 1569 | if (gfs2_assert_warn(sdp, gl)) |
| 1536 | return; | 1570 | return; |
| 1537 | if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) | 1571 | xmote_bh(gl, async->lc_ret); |
| 1538 | gl->gl_req_bh(gl, async->lc_ret); | ||
| 1539 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | 1572 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) |
| 1540 | gfs2_glock_put(gl); | 1573 | gfs2_glock_put(gl); |
| 1541 | up_read(&gfs2_umount_flush_sem); | 1574 | up_read(&gfs2_umount_flush_sem); |
| @@ -1594,10 +1627,10 @@ void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) | |||
| 1594 | gfs2_glock_hold(gl); | 1627 | gfs2_glock_hold(gl); |
| 1595 | list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list); | 1628 | list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list); |
| 1596 | atomic_inc(&sdp->sd_reclaim_count); | 1629 | atomic_inc(&sdp->sd_reclaim_count); |
| 1597 | } | 1630 | spin_unlock(&sdp->sd_reclaim_lock); |
| 1598 | spin_unlock(&sdp->sd_reclaim_lock); | 1631 | wake_up(&sdp->sd_reclaim_wq); |
| 1599 | 1632 | } else | |
| 1600 | wake_up(&sdp->sd_reclaim_wq); | 1633 | spin_unlock(&sdp->sd_reclaim_lock); |
| 1601 | } | 1634 | } |
| 1602 | 1635 | ||
| 1603 | /** | 1636 | /** |
| @@ -1897,7 +1930,6 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) | |||
| 1897 | print_dbg(gi, " gl_owner = -1\n"); | 1930 | print_dbg(gi, " gl_owner = -1\n"); |
| 1898 | print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); | 1931 | print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); |
| 1899 | print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); | 1932 | print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); |
| 1900 | print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); | ||
| 1901 | print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); | 1933 | print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); |
| 1902 | print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); | 1934 | print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); |
| 1903 | print_dbg(gi, " reclaim = %s\n", | 1935 | print_dbg(gi, " reclaim = %s\n", |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 2f9c6d136b37..cdad3e6f8150 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
| @@ -32,24 +32,23 @@ | |||
| 32 | #define GLR_TRYFAILED 13 | 32 | #define GLR_TRYFAILED 13 |
| 33 | #define GLR_CANCELED 14 | 33 | #define GLR_CANCELED 14 |
| 34 | 34 | ||
| 35 | static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) | 35 | static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) |
| 36 | { | 36 | { |
| 37 | struct gfs2_holder *gh; | 37 | struct gfs2_holder *gh; |
| 38 | int locked = 0; | ||
| 39 | struct pid *pid; | 38 | struct pid *pid; |
| 40 | 39 | ||
| 41 | /* Look in glock's list of holders for one with current task as owner */ | 40 | /* Look in glock's list of holders for one with current task as owner */ |
| 42 | spin_lock(&gl->gl_spin); | 41 | spin_lock(&gl->gl_spin); |
| 43 | pid = task_pid(current); | 42 | pid = task_pid(current); |
| 44 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { | 43 | list_for_each_entry(gh, &gl->gl_holders, gh_list) { |
| 45 | if (gh->gh_owner_pid == pid) { | 44 | if (gh->gh_owner_pid == pid) |
| 46 | locked = 1; | 45 | goto out; |
| 47 | break; | ||
| 48 | } | ||
| 49 | } | 46 | } |
| 47 | gh = NULL; | ||
| 48 | out: | ||
| 50 | spin_unlock(&gl->gl_spin); | 49 | spin_unlock(&gl->gl_spin); |
| 51 | 50 | ||
| 52 | return locked; | 51 | return gh; |
| 53 | } | 52 | } |
| 54 | 53 | ||
| 55 | static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl) | 54 | static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl) |
| @@ -79,7 +78,6 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) | |||
| 79 | int gfs2_glock_get(struct gfs2_sbd *sdp, | 78 | int gfs2_glock_get(struct gfs2_sbd *sdp, |
| 80 | u64 number, const struct gfs2_glock_operations *glops, | 79 | u64 number, const struct gfs2_glock_operations *glops, |
| 81 | int create, struct gfs2_glock **glp); | 80 | int create, struct gfs2_glock **glp); |
| 82 | void gfs2_glock_hold(struct gfs2_glock *gl); | ||
| 83 | int gfs2_glock_put(struct gfs2_glock *gl); | 81 | int gfs2_glock_put(struct gfs2_glock *gl); |
| 84 | void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, | 82 | void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, |
| 85 | struct gfs2_holder *gh); | 83 | struct gfs2_holder *gh); |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index c663b7a0f410..d31badadef8f 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
| 4 | * | 4 | * |
| 5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
| @@ -126,7 +126,13 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags) | |||
| 126 | return; | 126 | return; |
| 127 | 127 | ||
| 128 | gfs2_meta_inval(gl); | 128 | gfs2_meta_inval(gl); |
| 129 | gl->gl_vn++; | 129 | if (gl->gl_object == GFS2_I(gl->gl_sbd->sd_rindex)) |
| 130 | gl->gl_sbd->sd_rindex_uptodate = 0; | ||
| 131 | else if (gl->gl_ops == &gfs2_rgrp_glops && gl->gl_object) { | ||
| 132 | struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; | ||
| 133 | |||
| 134 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; | ||
| 135 | } | ||
| 130 | } | 136 | } |
| 131 | 137 | ||
| 132 | /** | 138 | /** |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 525dcae352d6..9c2c0b90b22a 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
| 4 | * | 4 | * |
| 5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
| @@ -44,7 +44,6 @@ struct gfs2_log_header_host { | |||
| 44 | 44 | ||
| 45 | struct gfs2_log_operations { | 45 | struct gfs2_log_operations { |
| 46 | void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le); | 46 | void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le); |
| 47 | void (*lo_incore_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); | ||
| 48 | void (*lo_before_commit) (struct gfs2_sbd *sdp); | 47 | void (*lo_before_commit) (struct gfs2_sbd *sdp); |
| 49 | void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); | 48 | void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); |
| 50 | void (*lo_before_scan) (struct gfs2_jdesc *jd, | 49 | void (*lo_before_scan) (struct gfs2_jdesc *jd, |
| @@ -70,7 +69,6 @@ struct gfs2_bitmap { | |||
| 70 | }; | 69 | }; |
| 71 | 70 | ||
| 72 | struct gfs2_rgrp_host { | 71 | struct gfs2_rgrp_host { |
| 73 | u32 rg_flags; | ||
| 74 | u32 rg_free; | 72 | u32 rg_free; |
| 75 | u32 rg_dinodes; | 73 | u32 rg_dinodes; |
| 76 | u64 rg_igeneration; | 74 | u64 rg_igeneration; |
| @@ -87,17 +85,17 @@ struct gfs2_rgrpd { | |||
| 87 | u32 rd_data; /* num of data blocks in rgrp */ | 85 | u32 rd_data; /* num of data blocks in rgrp */ |
| 88 | u32 rd_bitbytes; /* number of bytes in data bitmaps */ | 86 | u32 rd_bitbytes; /* number of bytes in data bitmaps */ |
| 89 | struct gfs2_rgrp_host rd_rg; | 87 | struct gfs2_rgrp_host rd_rg; |
| 90 | u64 rd_rg_vn; | ||
| 91 | struct gfs2_bitmap *rd_bits; | 88 | struct gfs2_bitmap *rd_bits; |
| 92 | unsigned int rd_bh_count; | 89 | unsigned int rd_bh_count; |
| 93 | struct mutex rd_mutex; | 90 | struct mutex rd_mutex; |
| 94 | u32 rd_free_clone; | 91 | u32 rd_free_clone; |
| 95 | struct gfs2_log_element rd_le; | 92 | struct gfs2_log_element rd_le; |
| 96 | u32 rd_last_alloc_data; | 93 | u32 rd_last_alloc; |
| 97 | u32 rd_last_alloc_meta; | ||
| 98 | struct gfs2_sbd *rd_sbd; | 94 | struct gfs2_sbd *rd_sbd; |
| 99 | unsigned long rd_flags; | 95 | unsigned char rd_flags; |
| 100 | #define GFS2_RDF_CHECK 0x0001 /* Need to check for unlinked inodes */ | 96 | #define GFS2_RDF_CHECK 0x01 /* Need to check for unlinked inodes */ |
| 97 | #define GFS2_RDF_NOALLOC 0x02 /* rg prohibits allocation */ | ||
| 98 | #define GFS2_RDF_UPTODATE 0x04 /* rg is up to date */ | ||
| 101 | }; | 99 | }; |
| 102 | 100 | ||
| 103 | enum gfs2_state_bits { | 101 | enum gfs2_state_bits { |
| @@ -168,6 +166,8 @@ enum { | |||
| 168 | GLF_DIRTY = 5, | 166 | GLF_DIRTY = 5, |
| 169 | GLF_DEMOTE_IN_PROGRESS = 6, | 167 | GLF_DEMOTE_IN_PROGRESS = 6, |
| 170 | GLF_LFLUSH = 7, | 168 | GLF_LFLUSH = 7, |
| 169 | GLF_WAITERS2 = 8, | ||
| 170 | GLF_CONV_DEADLK = 9, | ||
| 171 | }; | 171 | }; |
| 172 | 172 | ||
| 173 | struct gfs2_glock { | 173 | struct gfs2_glock { |
| @@ -187,18 +187,15 @@ struct gfs2_glock { | |||
| 187 | struct list_head gl_holders; | 187 | struct list_head gl_holders; |
| 188 | struct list_head gl_waiters1; /* HIF_MUTEX */ | 188 | struct list_head gl_waiters1; /* HIF_MUTEX */ |
| 189 | struct list_head gl_waiters3; /* HIF_PROMOTE */ | 189 | struct list_head gl_waiters3; /* HIF_PROMOTE */ |
| 190 | int gl_waiters2; /* GIF_DEMOTE */ | ||
| 191 | 190 | ||
| 192 | const struct gfs2_glock_operations *gl_ops; | 191 | const struct gfs2_glock_operations *gl_ops; |
| 193 | 192 | ||
| 194 | struct gfs2_holder *gl_req_gh; | 193 | struct gfs2_holder *gl_req_gh; |
| 195 | gfs2_glop_bh_t gl_req_bh; | ||
| 196 | 194 | ||
| 197 | void *gl_lock; | 195 | void *gl_lock; |
| 198 | char *gl_lvb; | 196 | char *gl_lvb; |
| 199 | atomic_t gl_lvb_count; | 197 | atomic_t gl_lvb_count; |
| 200 | 198 | ||
| 201 | u64 gl_vn; | ||
| 202 | unsigned long gl_stamp; | 199 | unsigned long gl_stamp; |
| 203 | unsigned long gl_tchange; | 200 | unsigned long gl_tchange; |
| 204 | void *gl_object; | 201 | void *gl_object; |
| @@ -213,6 +210,8 @@ struct gfs2_glock { | |||
| 213 | struct delayed_work gl_work; | 210 | struct delayed_work gl_work; |
| 214 | }; | 211 | }; |
| 215 | 212 | ||
| 213 | #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ | ||
| 214 | |||
| 216 | struct gfs2_alloc { | 215 | struct gfs2_alloc { |
| 217 | /* Quota stuff */ | 216 | /* Quota stuff */ |
| 218 | 217 | ||
| @@ -241,14 +240,9 @@ enum { | |||
| 241 | 240 | ||
| 242 | struct gfs2_dinode_host { | 241 | struct gfs2_dinode_host { |
| 243 | u64 di_size; /* number of bytes in file */ | 242 | u64 di_size; /* number of bytes in file */ |
| 244 | u64 di_blocks; /* number of blocks in file */ | ||
| 245 | u64 di_goal_meta; /* rgrp to alloc from next */ | ||
| 246 | u64 di_goal_data; /* data block goal */ | ||
| 247 | u64 di_generation; /* generation number for NFS */ | 243 | u64 di_generation; /* generation number for NFS */ |
| 248 | u32 di_flags; /* GFS2_DIF_... */ | 244 | u32 di_flags; /* GFS2_DIF_... */ |
| 249 | u16 di_height; /* height of metadata */ | ||
| 250 | /* These only apply to directories */ | 245 | /* These only apply to directories */ |
| 251 | u16 di_depth; /* Number of bits in the table */ | ||
| 252 | u32 di_entries; /* The number of entries in the directory */ | 246 | u32 di_entries; /* The number of entries in the directory */ |
| 253 | u64 di_eattr; /* extended attribute block number */ | 247 | u64 di_eattr; /* extended attribute block number */ |
| 254 | }; | 248 | }; |
| @@ -265,9 +259,10 @@ struct gfs2_inode { | |||
| 265 | struct gfs2_holder i_iopen_gh; | 259 | struct gfs2_holder i_iopen_gh; |
| 266 | struct gfs2_holder i_gh; /* for prepare/commit_write only */ | 260 | struct gfs2_holder i_gh; /* for prepare/commit_write only */ |
| 267 | struct gfs2_alloc *i_alloc; | 261 | struct gfs2_alloc *i_alloc; |
| 268 | u64 i_last_rg_alloc; | 262 | u64 i_goal; /* goal block for allocations */ |
| 269 | |||
| 270 | struct rw_semaphore i_rw_mutex; | 263 | struct rw_semaphore i_rw_mutex; |
| 264 | u8 i_height; | ||
| 265 | u8 i_depth; | ||
| 271 | }; | 266 | }; |
| 272 | 267 | ||
| 273 | /* | 268 | /* |
| @@ -490,9 +485,9 @@ struct gfs2_sbd { | |||
| 490 | u32 sd_qc_per_block; | 485 | u32 sd_qc_per_block; |
| 491 | u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ | 486 | u32 sd_max_dirres; /* Max blocks needed to add a directory entry */ |
| 492 | u32 sd_max_height; /* Max height of a file's metadata tree */ | 487 | u32 sd_max_height; /* Max height of a file's metadata tree */ |
| 493 | u64 sd_heightsize[GFS2_MAX_META_HEIGHT]; | 488 | u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1]; |
| 494 | u32 sd_max_jheight; /* Max height of journaled file's meta tree */ | 489 | u32 sd_max_jheight; /* Max height of journaled file's meta tree */ |
| 495 | u64 sd_jheightsize[GFS2_MAX_META_HEIGHT]; | 490 | u64 sd_jheightsize[GFS2_MAX_META_HEIGHT + 1]; |
| 496 | 491 | ||
| 497 | struct gfs2_args sd_args; /* Mount arguments */ | 492 | struct gfs2_args sd_args; /* Mount arguments */ |
| 498 | struct gfs2_tune sd_tune; /* Filesystem tuning structure */ | 493 | struct gfs2_tune sd_tune; /* Filesystem tuning structure */ |
| @@ -533,7 +528,7 @@ struct gfs2_sbd { | |||
| 533 | 528 | ||
| 534 | /* Resource group stuff */ | 529 | /* Resource group stuff */ |
| 535 | 530 | ||
| 536 | u64 sd_rindex_vn; | 531 | int sd_rindex_uptodate; |
| 537 | spinlock_t sd_rindex_spin; | 532 | spinlock_t sd_rindex_spin; |
| 538 | struct mutex sd_rindex_mutex; | 533 | struct mutex sd_rindex_mutex; |
| 539 | struct list_head sd_rindex_list; | 534 | struct list_head sd_rindex_list; |
| @@ -637,9 +632,6 @@ struct gfs2_sbd { | |||
| 637 | 632 | ||
| 638 | /* Counters */ | 633 | /* Counters */ |
| 639 | 634 | ||
| 640 | atomic_t sd_glock_count; | ||
| 641 | atomic_t sd_glock_held_count; | ||
| 642 | atomic_t sd_inode_count; | ||
| 643 | atomic_t sd_reclaimed; | 635 | atomic_t sd_reclaimed; |
| 644 | 636 | ||
| 645 | char sd_fsname[GFS2_FSNAME_LEN]; | 637 | char sd_fsname[GFS2_FSNAME_LEN]; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 37725ade3c51..3a9ef526c308 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
| 4 | * | 4 | * |
| 5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
| @@ -149,7 +149,8 @@ void gfs2_set_iop(struct inode *inode) | |||
| 149 | } else if (S_ISLNK(mode)) { | 149 | } else if (S_ISLNK(mode)) { |
| 150 | inode->i_op = &gfs2_symlink_iops; | 150 | inode->i_op = &gfs2_symlink_iops; |
| 151 | } else { | 151 | } else { |
| 152 | inode->i_op = &gfs2_dev_iops; | 152 | inode->i_op = &gfs2_file_iops; |
| 153 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | ||
| 153 | } | 154 | } |
| 154 | 155 | ||
| 155 | unlock_new_inode(inode); | 156 | unlock_new_inode(inode); |
| @@ -248,12 +249,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
| 248 | { | 249 | { |
| 249 | struct gfs2_dinode_host *di = &ip->i_di; | 250 | struct gfs2_dinode_host *di = &ip->i_di; |
| 250 | const struct gfs2_dinode *str = buf; | 251 | const struct gfs2_dinode *str = buf; |
| 252 | u16 height, depth; | ||
| 251 | 253 | ||
| 252 | if (ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)) { | 254 | if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) |
| 253 | if (gfs2_consist_inode(ip)) | 255 | goto corrupt; |
| 254 | gfs2_dinode_print(ip); | ||
| 255 | return -EIO; | ||
| 256 | } | ||
| 257 | ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); | 256 | ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); |
| 258 | ip->i_inode.i_mode = be32_to_cpu(str->di_mode); | 257 | ip->i_inode.i_mode = be32_to_cpu(str->di_mode); |
| 259 | ip->i_inode.i_rdev = 0; | 258 | ip->i_inode.i_rdev = 0; |
| @@ -275,8 +274,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
| 275 | ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); | 274 | ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); |
| 276 | di->di_size = be64_to_cpu(str->di_size); | 275 | di->di_size = be64_to_cpu(str->di_size); |
| 277 | i_size_write(&ip->i_inode, di->di_size); | 276 | i_size_write(&ip->i_inode, di->di_size); |
| 278 | di->di_blocks = be64_to_cpu(str->di_blocks); | 277 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); |
| 279 | gfs2_set_inode_blocks(&ip->i_inode); | ||
| 280 | ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); | 278 | ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); |
| 281 | ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); | 279 | ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); |
| 282 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); | 280 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); |
| @@ -284,15 +282,20 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
| 284 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); | 282 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); |
| 285 | ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); | 283 | ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); |
| 286 | 284 | ||
| 287 | di->di_goal_meta = be64_to_cpu(str->di_goal_meta); | 285 | ip->i_goal = be64_to_cpu(str->di_goal_meta); |
| 288 | di->di_goal_data = be64_to_cpu(str->di_goal_data); | ||
| 289 | di->di_generation = be64_to_cpu(str->di_generation); | 286 | di->di_generation = be64_to_cpu(str->di_generation); |
| 290 | 287 | ||
| 291 | di->di_flags = be32_to_cpu(str->di_flags); | 288 | di->di_flags = be32_to_cpu(str->di_flags); |
| 292 | gfs2_set_inode_flags(&ip->i_inode); | 289 | gfs2_set_inode_flags(&ip->i_inode); |
| 293 | di->di_height = be16_to_cpu(str->di_height); | 290 | height = be16_to_cpu(str->di_height); |
| 294 | 291 | if (unlikely(height > GFS2_MAX_META_HEIGHT)) | |
| 295 | di->di_depth = be16_to_cpu(str->di_depth); | 292 | goto corrupt; |
| 293 | ip->i_height = (u8)height; | ||
| 294 | |||
| 295 | depth = be16_to_cpu(str->di_depth); | ||
| 296 | if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) | ||
| 297 | goto corrupt; | ||
| 298 | ip->i_depth = (u8)depth; | ||
| 296 | di->di_entries = be32_to_cpu(str->di_entries); | 299 | di->di_entries = be32_to_cpu(str->di_entries); |
| 297 | 300 | ||
| 298 | di->di_eattr = be64_to_cpu(str->di_eattr); | 301 | di->di_eattr = be64_to_cpu(str->di_eattr); |
| @@ -300,6 +303,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
| 300 | gfs2_set_aops(&ip->i_inode); | 303 | gfs2_set_aops(&ip->i_inode); |
| 301 | 304 | ||
| 302 | return 0; | 305 | return 0; |
| 306 | corrupt: | ||
| 307 | if (gfs2_consist_inode(ip)) | ||
| 308 | gfs2_dinode_print(ip); | ||
| 309 | return -EIO; | ||
| 303 | } | 310 | } |
| 304 | 311 | ||
| 305 | /** | 312 | /** |
| @@ -337,13 +344,15 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip) | |||
| 337 | struct gfs2_rgrpd *rgd; | 344 | struct gfs2_rgrpd *rgd; |
| 338 | int error; | 345 | int error; |
| 339 | 346 | ||
| 340 | if (ip->i_di.di_blocks != 1) { | 347 | if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { |
| 341 | if (gfs2_consist_inode(ip)) | 348 | if (gfs2_consist_inode(ip)) |
| 342 | gfs2_dinode_print(ip); | 349 | gfs2_dinode_print(ip); |
| 343 | return -EIO; | 350 | return -EIO; |
| 344 | } | 351 | } |
| 345 | 352 | ||
| 346 | al = gfs2_alloc_get(ip); | 353 | al = gfs2_alloc_get(ip); |
| 354 | if (!al) | ||
| 355 | return -ENOMEM; | ||
| 347 | 356 | ||
| 348 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 357 | error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); |
| 349 | if (error) | 358 | if (error) |
| @@ -487,7 +496,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | |||
| 487 | return dir; | 496 | return dir; |
| 488 | } | 497 | } |
| 489 | 498 | ||
| 490 | if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) { | 499 | if (gfs2_glock_is_locked_by_me(dip->i_gl) == NULL) { |
| 491 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); | 500 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
| 492 | if (error) | 501 | if (error) |
| 493 | return ERR_PTR(error); | 502 | return ERR_PTR(error); |
| @@ -818,7 +827,8 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, | |||
| 818 | int error; | 827 | int error; |
| 819 | 828 | ||
| 820 | munge_mode_uid_gid(dip, &mode, &uid, &gid); | 829 | munge_mode_uid_gid(dip, &mode, &uid, &gid); |
| 821 | gfs2_alloc_get(dip); | 830 | if (!gfs2_alloc_get(dip)) |
| 831 | return -ENOMEM; | ||
| 822 | 832 | ||
| 823 | error = gfs2_quota_lock(dip, uid, gid); | 833 | error = gfs2_quota_lock(dip, uid, gid); |
| 824 | if (error) | 834 | if (error) |
| @@ -853,6 +863,8 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, | |||
| 853 | int error; | 863 | int error; |
| 854 | 864 | ||
| 855 | al = gfs2_alloc_get(dip); | 865 | al = gfs2_alloc_get(dip); |
| 866 | if (!al) | ||
| 867 | return -ENOMEM; | ||
| 856 | 868 | ||
| 857 | error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 869 | error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); |
| 858 | if (error) | 870 | if (error) |
| @@ -1219,7 +1231,7 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) | |||
| 1219 | 1231 | ||
| 1220 | x = ip->i_di.di_size + 1; | 1232 | x = ip->i_di.di_size + 1; |
| 1221 | if (x > *len) { | 1233 | if (x > *len) { |
| 1222 | *buf = kmalloc(x, GFP_KERNEL); | 1234 | *buf = kmalloc(x, GFP_NOFS); |
| 1223 | if (!*buf) { | 1235 | if (!*buf) { |
| 1224 | error = -ENOMEM; | 1236 | error = -ENOMEM; |
| 1225 | goto out_brelse; | 1237 | goto out_brelse; |
| @@ -1391,21 +1403,21 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) | |||
| 1391 | str->di_gid = cpu_to_be32(ip->i_inode.i_gid); | 1403 | str->di_gid = cpu_to_be32(ip->i_inode.i_gid); |
| 1392 | str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); | 1404 | str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); |
| 1393 | str->di_size = cpu_to_be64(di->di_size); | 1405 | str->di_size = cpu_to_be64(di->di_size); |
| 1394 | str->di_blocks = cpu_to_be64(di->di_blocks); | 1406 | str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); |
| 1395 | str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); | 1407 | str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); |
| 1396 | str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); | 1408 | str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); |
| 1397 | str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); | 1409 | str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); |
| 1398 | 1410 | ||
| 1399 | str->di_goal_meta = cpu_to_be64(di->di_goal_meta); | 1411 | str->di_goal_meta = cpu_to_be64(ip->i_goal); |
| 1400 | str->di_goal_data = cpu_to_be64(di->di_goal_data); | 1412 | str->di_goal_data = cpu_to_be64(ip->i_goal); |
| 1401 | str->di_generation = cpu_to_be64(di->di_generation); | 1413 | str->di_generation = cpu_to_be64(di->di_generation); |
| 1402 | 1414 | ||
| 1403 | str->di_flags = cpu_to_be32(di->di_flags); | 1415 | str->di_flags = cpu_to_be32(di->di_flags); |
| 1404 | str->di_height = cpu_to_be16(di->di_height); | 1416 | str->di_height = cpu_to_be16(ip->i_height); |
| 1405 | str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && | 1417 | str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && |
| 1406 | !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ? | 1418 | !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ? |
| 1407 | GFS2_FORMAT_DE : 0); | 1419 | GFS2_FORMAT_DE : 0); |
| 1408 | str->di_depth = cpu_to_be16(di->di_depth); | 1420 | str->di_depth = cpu_to_be16(ip->i_depth); |
| 1409 | str->di_entries = cpu_to_be32(di->di_entries); | 1421 | str->di_entries = cpu_to_be32(di->di_entries); |
| 1410 | 1422 | ||
| 1411 | str->di_eattr = cpu_to_be64(di->di_eattr); | 1423 | str->di_eattr = cpu_to_be64(di->di_eattr); |
| @@ -1423,15 +1435,13 @@ void gfs2_dinode_print(const struct gfs2_inode *ip) | |||
| 1423 | printk(KERN_INFO " no_addr = %llu\n", | 1435 | printk(KERN_INFO " no_addr = %llu\n", |
| 1424 | (unsigned long long)ip->i_no_addr); | 1436 | (unsigned long long)ip->i_no_addr); |
| 1425 | printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); | 1437 | printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); |
| 1426 | printk(KERN_INFO " di_blocks = %llu\n", | 1438 | printk(KERN_INFO " blocks = %llu\n", |
| 1427 | (unsigned long long)di->di_blocks); | 1439 | (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); |
| 1428 | printk(KERN_INFO " di_goal_meta = %llu\n", | 1440 | printk(KERN_INFO " i_goal = %llu\n", |
| 1429 | (unsigned long long)di->di_goal_meta); | 1441 | (unsigned long long)ip->i_goal); |
| 1430 | printk(KERN_INFO " di_goal_data = %llu\n", | ||
| 1431 | (unsigned long long)di->di_goal_data); | ||
| 1432 | printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags); | 1442 | printk(KERN_INFO " di_flags = 0x%.8X\n", di->di_flags); |
| 1433 | printk(KERN_INFO " di_height = %u\n", di->di_height); | 1443 | printk(KERN_INFO " i_height = %u\n", ip->i_height); |
| 1434 | printk(KERN_INFO " di_depth = %u\n", di->di_depth); | 1444 | printk(KERN_INFO " i_depth = %u\n", ip->i_depth); |
| 1435 | printk(KERN_INFO " di_entries = %u\n", di->di_entries); | 1445 | printk(KERN_INFO " di_entries = %u\n", di->di_entries); |
| 1436 | printk(KERN_INFO " di_eattr = %llu\n", | 1446 | printk(KERN_INFO " di_eattr = %llu\n", |
| 1437 | (unsigned long long)di->di_eattr); | 1447 | (unsigned long long)di->di_eattr); |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index d44650662615..580da454b38f 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
| @@ -10,9 +10,11 @@ | |||
| 10 | #ifndef __INODE_DOT_H__ | 10 | #ifndef __INODE_DOT_H__ |
| 11 | #define __INODE_DOT_H__ | 11 | #define __INODE_DOT_H__ |
| 12 | 12 | ||
| 13 | #include "util.h" | ||
| 14 | |||
| 13 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) | 15 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) |
| 14 | { | 16 | { |
| 15 | return !ip->i_di.di_height; | 17 | return !ip->i_height; |
| 16 | } | 18 | } |
| 17 | 19 | ||
| 18 | static inline int gfs2_is_jdata(const struct gfs2_inode *ip) | 20 | static inline int gfs2_is_jdata(const struct gfs2_inode *ip) |
| @@ -37,13 +39,25 @@ static inline int gfs2_is_dir(const struct gfs2_inode *ip) | |||
| 37 | return S_ISDIR(ip->i_inode.i_mode); | 39 | return S_ISDIR(ip->i_inode.i_mode); |
| 38 | } | 40 | } |
| 39 | 41 | ||
| 40 | static inline void gfs2_set_inode_blocks(struct inode *inode) | 42 | static inline void gfs2_set_inode_blocks(struct inode *inode, u64 blocks) |
| 43 | { | ||
| 44 | inode->i_blocks = blocks << | ||
| 45 | (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); | ||
| 46 | } | ||
| 47 | |||
| 48 | static inline u64 gfs2_get_inode_blocks(const struct inode *inode) | ||
| 41 | { | 49 | { |
| 42 | struct gfs2_inode *ip = GFS2_I(inode); | 50 | return inode->i_blocks >> |
| 43 | inode->i_blocks = ip->i_di.di_blocks << | ||
| 44 | (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); | 51 | (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); |
| 45 | } | 52 | } |
| 46 | 53 | ||
| 54 | static inline void gfs2_add_inode_blocks(struct inode *inode, s64 change) | ||
| 55 | { | ||
| 56 | gfs2_assert(GFS2_SB(inode), (change >= 0 || inode->i_blocks > -change)); | ||
| 57 | change *= (GFS2_SB(inode)->sd_sb.sb_bsize/GFS2_BASIC_BLOCK); | ||
| 58 | inode->i_blocks += change; | ||
| 59 | } | ||
| 60 | |||
| 47 | static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr, | 61 | static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr, |
| 48 | u64 no_formal_ino) | 62 | u64 no_formal_ino) |
| 49 | { | 63 | { |
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c deleted file mode 100644 index cfcc39b86a53..000000000000 --- a/fs/gfs2/lm.c +++ /dev/null | |||
| @@ -1,210 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/slab.h> | ||
| 11 | #include <linux/spinlock.h> | ||
| 12 | #include <linux/completion.h> | ||
| 13 | #include <linux/buffer_head.h> | ||
| 14 | #include <linux/delay.h> | ||
| 15 | #include <linux/gfs2_ondisk.h> | ||
| 16 | #include <linux/lm_interface.h> | ||
| 17 | |||
| 18 | #include "gfs2.h" | ||
| 19 | #include "incore.h" | ||
| 20 | #include "glock.h" | ||
| 21 | #include "lm.h" | ||
| 22 | #include "super.h" | ||
| 23 | #include "util.h" | ||
| 24 | |||
| 25 | /** | ||
| 26 | * gfs2_lm_mount - mount a locking protocol | ||
| 27 | * @sdp: the filesystem | ||
| 28 | * @args: mount arguements | ||
| 29 | * @silent: if 1, don't complain if the FS isn't a GFS2 fs | ||
| 30 | * | ||
| 31 | * Returns: errno | ||
| 32 | */ | ||
| 33 | |||
| 34 | int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | ||
| 35 | { | ||
| 36 | char *proto = sdp->sd_proto_name; | ||
| 37 | char *table = sdp->sd_table_name; | ||
| 38 | int flags = 0; | ||
| 39 | int error; | ||
| 40 | |||
| 41 | if (sdp->sd_args.ar_spectator) | ||
| 42 | flags |= LM_MFLAG_SPECTATOR; | ||
| 43 | |||
| 44 | fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table); | ||
| 45 | |||
| 46 | error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata, | ||
| 47 | gfs2_glock_cb, sdp, | ||
| 48 | GFS2_MIN_LVB_SIZE, flags, | ||
| 49 | &sdp->sd_lockstruct, &sdp->sd_kobj); | ||
| 50 | if (error) { | ||
| 51 | fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n", | ||
| 52 | proto, table, sdp->sd_args.ar_hostdata); | ||
| 53 | goto out; | ||
| 54 | } | ||
| 55 | |||
| 56 | if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) || | ||
| 57 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) || | ||
| 58 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= | ||
| 59 | GFS2_MIN_LVB_SIZE)) { | ||
| 60 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); | ||
| 61 | goto out; | ||
| 62 | } | ||
| 63 | |||
| 64 | if (sdp->sd_args.ar_spectator) | ||
| 65 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table); | ||
| 66 | else | ||
| 67 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table, | ||
| 68 | sdp->sd_lockstruct.ls_jid); | ||
| 69 | |||
| 70 | fs_info(sdp, "Joined cluster. Now mounting FS...\n"); | ||
| 71 | |||
| 72 | if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) && | ||
| 73 | !sdp->sd_args.ar_ignore_local_fs) { | ||
| 74 | sdp->sd_args.ar_localflocks = 1; | ||
| 75 | sdp->sd_args.ar_localcaching = 1; | ||
| 76 | } | ||
| 77 | |||
| 78 | out: | ||
| 79 | return error; | ||
| 80 | } | ||
| 81 | |||
| 82 | void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) | ||
| 83 | { | ||
| 84 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 85 | sdp->sd_lockstruct.ls_ops->lm_others_may_mount( | ||
| 86 | sdp->sd_lockstruct.ls_lockspace); | ||
| 87 | } | ||
| 88 | |||
| 89 | void gfs2_lm_unmount(struct gfs2_sbd *sdp) | ||
| 90 | { | ||
| 91 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 92 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); | ||
| 93 | } | ||
| 94 | |||
| 95 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | ||
| 96 | { | ||
| 97 | va_list args; | ||
| 98 | |||
| 99 | if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
| 100 | return 0; | ||
| 101 | |||
| 102 | va_start(args, fmt); | ||
| 103 | vprintk(fmt, args); | ||
| 104 | va_end(args); | ||
| 105 | |||
| 106 | fs_err(sdp, "about to withdraw this file system\n"); | ||
| 107 | BUG_ON(sdp->sd_args.ar_debug); | ||
| 108 | |||
| 109 | fs_err(sdp, "telling LM to withdraw\n"); | ||
| 110 | gfs2_withdraw_lockproto(&sdp->sd_lockstruct); | ||
| 111 | fs_err(sdp, "withdrawn\n"); | ||
| 112 | dump_stack(); | ||
| 113 | |||
| 114 | return -1; | ||
| 115 | } | ||
| 116 | |||
| 117 | int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 118 | void **lockp) | ||
| 119 | { | ||
| 120 | int error = -EIO; | ||
| 121 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 122 | error = sdp->sd_lockstruct.ls_ops->lm_get_lock( | ||
| 123 | sdp->sd_lockstruct.ls_lockspace, name, lockp); | ||
| 124 | return error; | ||
| 125 | } | ||
| 126 | |||
| 127 | void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock) | ||
| 128 | { | ||
| 129 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 130 | sdp->sd_lockstruct.ls_ops->lm_put_lock(lock); | ||
| 131 | } | ||
| 132 | |||
| 133 | unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, | ||
| 134 | unsigned int cur_state, unsigned int req_state, | ||
| 135 | unsigned int flags) | ||
| 136 | { | ||
| 137 | int ret = 0; | ||
| 138 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 139 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state, | ||
| 140 | req_state, flags); | ||
| 141 | return ret; | ||
| 142 | } | ||
| 143 | |||
| 144 | unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock, | ||
| 145 | unsigned int cur_state) | ||
| 146 | { | ||
| 147 | int ret = 0; | ||
| 148 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 149 | ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state); | ||
| 150 | return ret; | ||
| 151 | } | ||
| 152 | |||
| 153 | void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock) | ||
| 154 | { | ||
| 155 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 156 | sdp->sd_lockstruct.ls_ops->lm_cancel(lock); | ||
| 157 | } | ||
| 158 | |||
| 159 | int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp) | ||
| 160 | { | ||
| 161 | int error = -EIO; | ||
| 162 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 163 | error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp); | ||
| 164 | return error; | ||
| 165 | } | ||
| 166 | |||
| 167 | void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb) | ||
| 168 | { | ||
| 169 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 170 | sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(lock, lvb); | ||
| 171 | } | ||
| 172 | |||
| 173 | int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 174 | struct file *file, struct file_lock *fl) | ||
| 175 | { | ||
| 176 | int error = -EIO; | ||
| 177 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 178 | error = sdp->sd_lockstruct.ls_ops->lm_plock_get( | ||
| 179 | sdp->sd_lockstruct.ls_lockspace, name, file, fl); | ||
| 180 | return error; | ||
| 181 | } | ||
| 182 | |||
| 183 | int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 184 | struct file *file, int cmd, struct file_lock *fl) | ||
| 185 | { | ||
| 186 | int error = -EIO; | ||
| 187 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 188 | error = sdp->sd_lockstruct.ls_ops->lm_plock( | ||
| 189 | sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl); | ||
| 190 | return error; | ||
| 191 | } | ||
| 192 | |||
| 193 | int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 194 | struct file *file, struct file_lock *fl) | ||
| 195 | { | ||
| 196 | int error = -EIO; | ||
| 197 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 198 | error = sdp->sd_lockstruct.ls_ops->lm_punlock( | ||
| 199 | sdp->sd_lockstruct.ls_lockspace, name, file, fl); | ||
| 200 | return error; | ||
| 201 | } | ||
| 202 | |||
| 203 | void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | ||
| 204 | unsigned int message) | ||
| 205 | { | ||
| 206 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 207 | sdp->sd_lockstruct.ls_ops->lm_recovery_done( | ||
| 208 | sdp->sd_lockstruct.ls_lockspace, jid, message); | ||
| 209 | } | ||
| 210 | |||
diff --git a/fs/gfs2/lm.h b/fs/gfs2/lm.h deleted file mode 100644 index 21cdc30ee08c..000000000000 --- a/fs/gfs2/lm.h +++ /dev/null | |||
| @@ -1,42 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | ||
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | ||
| 4 | * | ||
| 5 | * This copyrighted material is made available to anyone wishing to use, | ||
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | ||
| 7 | * of the GNU General Public License version 2. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef __LM_DOT_H__ | ||
| 11 | #define __LM_DOT_H__ | ||
| 12 | |||
| 13 | struct gfs2_sbd; | ||
| 14 | |||
| 15 | #define GFS2_MIN_LVB_SIZE 32 | ||
| 16 | |||
| 17 | int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent); | ||
| 18 | void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp); | ||
| 19 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); | ||
| 20 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | ||
| 21 | __attribute__ ((format(printf, 2, 3))); | ||
| 22 | int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 23 | void **lockp); | ||
| 24 | void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock); | ||
| 25 | unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, | ||
| 26 | unsigned int cur_state, unsigned int req_state, | ||
| 27 | unsigned int flags); | ||
| 28 | unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock, | ||
| 29 | unsigned int cur_state); | ||
| 30 | void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock); | ||
| 31 | int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp); | ||
| 32 | void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb); | ||
| 33 | int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 34 | struct file *file, struct file_lock *fl); | ||
| 35 | int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 36 | struct file *file, int cmd, struct file_lock *fl); | ||
| 37 | int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 38 | struct file *file, struct file_lock *fl); | ||
| 39 | void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | ||
| 40 | unsigned int message); | ||
| 41 | |||
| 42 | #endif /* __LM_DOT_H__ */ | ||
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c index 542a797ac89a..cf7ea8abec87 100644 --- a/fs/gfs2/locking/dlm/lock.c +++ b/fs/gfs2/locking/dlm/lock.c | |||
| @@ -137,7 +137,8 @@ static inline unsigned int make_flags(struct gdlm_lock *lp, | |||
| 137 | 137 | ||
| 138 | /* Conversion deadlock avoidance by DLM */ | 138 | /* Conversion deadlock avoidance by DLM */ |
| 139 | 139 | ||
| 140 | if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) && | 140 | if (!(lp->ls->fsflags & LM_MFLAG_CONV_NODROP) && |
| 141 | !test_bit(LFL_FORCE_PROMOTE, &lp->flags) && | ||
| 141 | !(lkf & DLM_LKF_NOQUEUE) && | 142 | !(lkf & DLM_LKF_NOQUEUE) && |
| 142 | cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req) | 143 | cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req) |
| 143 | lkf |= DLM_LKF_CONVDEADLK; | 144 | lkf |= DLM_LKF_CONVDEADLK; |
| @@ -164,7 +165,7 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name, | |||
| 164 | { | 165 | { |
| 165 | struct gdlm_lock *lp; | 166 | struct gdlm_lock *lp; |
| 166 | 167 | ||
| 167 | lp = kzalloc(sizeof(struct gdlm_lock), GFP_KERNEL); | 168 | lp = kzalloc(sizeof(struct gdlm_lock), GFP_NOFS); |
| 168 | if (!lp) | 169 | if (!lp) |
| 169 | return -ENOMEM; | 170 | return -ENOMEM; |
| 170 | 171 | ||
| @@ -382,7 +383,7 @@ static int gdlm_add_lvb(struct gdlm_lock *lp) | |||
| 382 | { | 383 | { |
| 383 | char *lvb; | 384 | char *lvb; |
| 384 | 385 | ||
| 385 | lvb = kzalloc(GDLM_LVB_SIZE, GFP_KERNEL); | 386 | lvb = kzalloc(GDLM_LVB_SIZE, GFP_NOFS); |
| 386 | if (!lvb) | 387 | if (!lvb) |
| 387 | return -ENOMEM; | 388 | return -ENOMEM; |
| 388 | 389 | ||
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index 9e8265d28377..58fcf8c5bf39 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h | |||
| @@ -183,5 +183,10 @@ int gdlm_plock_get(void *, struct lm_lockname *, struct file *, | |||
| 183 | struct file_lock *); | 183 | struct file_lock *); |
| 184 | int gdlm_punlock(void *, struct lm_lockname *, struct file *, | 184 | int gdlm_punlock(void *, struct lm_lockname *, struct file *, |
| 185 | struct file_lock *); | 185 | struct file_lock *); |
| 186 | |||
| 187 | /* mount.c */ | ||
| 188 | |||
| 189 | extern const struct lm_lockops gdlm_ops; | ||
| 190 | |||
| 186 | #endif | 191 | #endif |
| 187 | 192 | ||
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c index a0e7eda643ed..36a225850bd8 100644 --- a/fs/gfs2/locking/dlm/main.c +++ b/fs/gfs2/locking/dlm/main.c | |||
| @@ -11,8 +11,6 @@ | |||
| 11 | 11 | ||
| 12 | #include "lock_dlm.h" | 12 | #include "lock_dlm.h" |
| 13 | 13 | ||
| 14 | extern struct lm_lockops gdlm_ops; | ||
| 15 | |||
| 16 | static int __init init_lock_dlm(void) | 14 | static int __init init_lock_dlm(void) |
| 17 | { | 15 | { |
| 18 | int error; | 16 | int error; |
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c index a87b09839761..8479da47049c 100644 --- a/fs/gfs2/locking/dlm/sysfs.c +++ b/fs/gfs2/locking/dlm/sysfs.c | |||
| @@ -12,8 +12,6 @@ | |||
| 12 | 12 | ||
| 13 | #include "lock_dlm.h" | 13 | #include "lock_dlm.h" |
| 14 | 14 | ||
| 15 | extern struct lm_lockops gdlm_ops; | ||
| 16 | |||
| 17 | static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf) | 15 | static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf) |
| 18 | { | 16 | { |
| 19 | return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name); | 17 | return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name); |
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c index 521694fc19d6..e53db6fd28ab 100644 --- a/fs/gfs2/locking/dlm/thread.c +++ b/fs/gfs2/locking/dlm/thread.c | |||
| @@ -135,7 +135,15 @@ static void process_complete(struct gdlm_lock *lp) | |||
| 135 | lp->lksb.sb_status, lp->lockname.ln_type, | 135 | lp->lksb.sb_status, lp->lockname.ln_type, |
| 136 | (unsigned long long)lp->lockname.ln_number, | 136 | (unsigned long long)lp->lockname.ln_number, |
| 137 | lp->flags); | 137 | lp->flags); |
| 138 | return; | 138 | if (lp->lksb.sb_status == -EDEADLOCK && |
| 139 | lp->ls->fsflags & LM_MFLAG_CONV_NODROP) { | ||
| 140 | lp->req = lp->cur; | ||
| 141 | acb.lc_ret |= LM_OUT_CONV_DEADLK; | ||
| 142 | if (lp->cur == DLM_LOCK_IV) | ||
| 143 | lp->lksb.sb_lkid = 0; | ||
| 144 | goto out; | ||
| 145 | } else | ||
| 146 | return; | ||
| 139 | } | 147 | } |
| 140 | 148 | ||
| 141 | /* | 149 | /* |
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c index d3b8ce6fbbe3..284a5ece8d94 100644 --- a/fs/gfs2/locking/nolock/main.c +++ b/fs/gfs2/locking/nolock/main.c | |||
| @@ -140,7 +140,7 @@ static int nolock_hold_lvb(void *lock, char **lvbp) | |||
| 140 | struct nolock_lockspace *nl = lock; | 140 | struct nolock_lockspace *nl = lock; |
| 141 | int error = 0; | 141 | int error = 0; |
| 142 | 142 | ||
| 143 | *lvbp = kzalloc(nl->nl_lvb_size, GFP_KERNEL); | 143 | *lvbp = kzalloc(nl->nl_lvb_size, GFP_NOFS); |
| 144 | if (!*lvbp) | 144 | if (!*lvbp) |
| 145 | error = -ENOMEM; | 145 | error = -ENOMEM; |
| 146 | 146 | ||
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 161ab6f2058e..548264b1836d 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
| @@ -769,8 +769,8 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
| 769 | sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; | 769 | sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; |
| 770 | gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); | 770 | gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0); |
| 771 | reserved = calc_reserved(sdp); | 771 | reserved = calc_reserved(sdp); |
| 772 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved); | ||
| 772 | unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; | 773 | unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; |
| 773 | gfs2_assert_withdraw(sdp, unused >= 0); | ||
| 774 | atomic_add(unused, &sdp->sd_log_blks_free); | 774 | atomic_add(unused, &sdp->sd_log_blks_free); |
| 775 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= | 775 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= |
| 776 | sdp->sd_jdesc->jd_blocks); | 776 | sdp->sd_jdesc->jd_blocks); |
| @@ -779,6 +779,21 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
| 779 | gfs2_log_unlock(sdp); | 779 | gfs2_log_unlock(sdp); |
| 780 | } | 780 | } |
| 781 | 781 | ||
| 782 | static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | ||
| 783 | { | ||
| 784 | struct list_head *head = &tr->tr_list_buf; | ||
| 785 | struct gfs2_bufdata *bd; | ||
| 786 | |||
| 787 | gfs2_log_lock(sdp); | ||
| 788 | while (!list_empty(head)) { | ||
| 789 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); | ||
| 790 | list_del_init(&bd->bd_list_tr); | ||
| 791 | tr->tr_num_buf--; | ||
| 792 | } | ||
| 793 | gfs2_log_unlock(sdp); | ||
| 794 | gfs2_assert_warn(sdp, !tr->tr_num_buf); | ||
| 795 | } | ||
| 796 | |||
| 782 | /** | 797 | /** |
| 783 | * gfs2_log_commit - Commit a transaction to the log | 798 | * gfs2_log_commit - Commit a transaction to the log |
| 784 | * @sdp: the filesystem | 799 | * @sdp: the filesystem |
| @@ -790,7 +805,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
| 790 | void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | 805 | void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
| 791 | { | 806 | { |
| 792 | log_refund(sdp, tr); | 807 | log_refund(sdp, tr); |
| 793 | lops_incore_commit(sdp, tr); | 808 | buf_lo_incore_commit(sdp, tr); |
| 794 | 809 | ||
| 795 | sdp->sd_vfs->s_dirt = 1; | 810 | sdp->sd_vfs->s_dirt = 1; |
| 796 | up_read(&sdp->sd_log_flush_lock); | 811 | up_read(&sdp->sd_log_flush_lock); |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index fae59d69d01a..4390f6f4047d 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
| @@ -152,21 +152,6 @@ out: | |||
| 152 | unlock_buffer(bd->bd_bh); | 152 | unlock_buffer(bd->bd_bh); |
| 153 | } | 153 | } |
| 154 | 154 | ||
| 155 | static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | ||
| 156 | { | ||
| 157 | struct list_head *head = &tr->tr_list_buf; | ||
| 158 | struct gfs2_bufdata *bd; | ||
| 159 | |||
| 160 | gfs2_log_lock(sdp); | ||
| 161 | while (!list_empty(head)) { | ||
| 162 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); | ||
| 163 | list_del_init(&bd->bd_list_tr); | ||
| 164 | tr->tr_num_buf--; | ||
| 165 | } | ||
| 166 | gfs2_log_unlock(sdp); | ||
| 167 | gfs2_assert_warn(sdp, !tr->tr_num_buf); | ||
| 168 | } | ||
| 169 | |||
| 170 | static void buf_lo_before_commit(struct gfs2_sbd *sdp) | 155 | static void buf_lo_before_commit(struct gfs2_sbd *sdp) |
| 171 | { | 156 | { |
| 172 | struct buffer_head *bh; | 157 | struct buffer_head *bh; |
| @@ -419,8 +404,10 @@ static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
| 419 | blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); | 404 | blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); |
| 420 | 405 | ||
| 421 | error = gfs2_revoke_add(sdp, blkno, start); | 406 | error = gfs2_revoke_add(sdp, blkno, start); |
| 422 | if (error < 0) | 407 | if (error < 0) { |
| 408 | brelse(bh); | ||
| 423 | return error; | 409 | return error; |
| 410 | } | ||
| 424 | else if (error) | 411 | else if (error) |
| 425 | sdp->sd_found_revokes++; | 412 | sdp->sd_found_revokes++; |
| 426 | 413 | ||
| @@ -737,7 +724,6 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | |||
| 737 | 724 | ||
| 738 | const struct gfs2_log_operations gfs2_buf_lops = { | 725 | const struct gfs2_log_operations gfs2_buf_lops = { |
| 739 | .lo_add = buf_lo_add, | 726 | .lo_add = buf_lo_add, |
| 740 | .lo_incore_commit = buf_lo_incore_commit, | ||
| 741 | .lo_before_commit = buf_lo_before_commit, | 727 | .lo_before_commit = buf_lo_before_commit, |
| 742 | .lo_after_commit = buf_lo_after_commit, | 728 | .lo_after_commit = buf_lo_after_commit, |
| 743 | .lo_before_scan = buf_lo_before_scan, | 729 | .lo_before_scan = buf_lo_before_scan, |
| @@ -763,7 +749,6 @@ const struct gfs2_log_operations gfs2_rg_lops = { | |||
| 763 | 749 | ||
| 764 | const struct gfs2_log_operations gfs2_databuf_lops = { | 750 | const struct gfs2_log_operations gfs2_databuf_lops = { |
| 765 | .lo_add = databuf_lo_add, | 751 | .lo_add = databuf_lo_add, |
| 766 | .lo_incore_commit = buf_lo_incore_commit, | ||
| 767 | .lo_before_commit = databuf_lo_before_commit, | 752 | .lo_before_commit = databuf_lo_before_commit, |
| 768 | .lo_after_commit = databuf_lo_after_commit, | 753 | .lo_after_commit = databuf_lo_after_commit, |
| 769 | .lo_scan_elements = databuf_lo_scan_elements, | 754 | .lo_scan_elements = databuf_lo_scan_elements, |
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 41a00df75587..3c0b2737658a 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
| 4 | * | 4 | * |
| 5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
| @@ -57,15 +57,6 @@ static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | |||
| 57 | le->le_ops->lo_add(sdp, le); | 57 | le->le_ops->lo_add(sdp, le); |
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | static inline void lops_incore_commit(struct gfs2_sbd *sdp, | ||
| 61 | struct gfs2_trans *tr) | ||
| 62 | { | ||
| 63 | int x; | ||
| 64 | for (x = 0; gfs2_log_ops[x]; x++) | ||
| 65 | if (gfs2_log_ops[x]->lo_incore_commit) | ||
| 66 | gfs2_log_ops[x]->lo_incore_commit(sdp, tr); | ||
| 67 | } | ||
| 68 | |||
| 69 | static inline void lops_before_commit(struct gfs2_sbd *sdp) | 60 | static inline void lops_before_commit(struct gfs2_sbd *sdp) |
| 70 | { | 61 | { |
| 71 | int x; | 62 | int x; |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 9c7765c12d62..053e2ebbbd50 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
| @@ -89,6 +89,12 @@ static int __init init_gfs2_fs(void) | |||
| 89 | if (!gfs2_bufdata_cachep) | 89 | if (!gfs2_bufdata_cachep) |
| 90 | goto fail; | 90 | goto fail; |
| 91 | 91 | ||
| 92 | gfs2_rgrpd_cachep = kmem_cache_create("gfs2_rgrpd", | ||
| 93 | sizeof(struct gfs2_rgrpd), | ||
| 94 | 0, 0, NULL); | ||
| 95 | if (!gfs2_rgrpd_cachep) | ||
| 96 | goto fail; | ||
| 97 | |||
| 92 | error = register_filesystem(&gfs2_fs_type); | 98 | error = register_filesystem(&gfs2_fs_type); |
| 93 | if (error) | 99 | if (error) |
| 94 | goto fail; | 100 | goto fail; |
| @@ -108,6 +114,9 @@ fail_unregister: | |||
| 108 | fail: | 114 | fail: |
| 109 | gfs2_glock_exit(); | 115 | gfs2_glock_exit(); |
| 110 | 116 | ||
| 117 | if (gfs2_rgrpd_cachep) | ||
| 118 | kmem_cache_destroy(gfs2_rgrpd_cachep); | ||
| 119 | |||
| 111 | if (gfs2_bufdata_cachep) | 120 | if (gfs2_bufdata_cachep) |
| 112 | kmem_cache_destroy(gfs2_bufdata_cachep); | 121 | kmem_cache_destroy(gfs2_bufdata_cachep); |
| 113 | 122 | ||
| @@ -133,6 +142,7 @@ static void __exit exit_gfs2_fs(void) | |||
| 133 | unregister_filesystem(&gfs2_fs_type); | 142 | unregister_filesystem(&gfs2_fs_type); |
| 134 | unregister_filesystem(&gfs2meta_fs_type); | 143 | unregister_filesystem(&gfs2meta_fs_type); |
| 135 | 144 | ||
| 145 | kmem_cache_destroy(gfs2_rgrpd_cachep); | ||
| 136 | kmem_cache_destroy(gfs2_bufdata_cachep); | 146 | kmem_cache_destroy(gfs2_bufdata_cachep); |
| 137 | kmem_cache_destroy(gfs2_inode_cachep); | 147 | kmem_cache_destroy(gfs2_inode_cachep); |
| 138 | kmem_cache_destroy(gfs2_glock_cachep); | 148 | kmem_cache_destroy(gfs2_glock_cachep); |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index ac772b6d9dbb..90a04a6e3789 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
| 4 | * | 4 | * |
| 5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
| @@ -21,7 +21,6 @@ | |||
| 21 | #include <linux/gfs2_ondisk.h> | 21 | #include <linux/gfs2_ondisk.h> |
| 22 | #include <linux/lm_interface.h> | 22 | #include <linux/lm_interface.h> |
| 23 | #include <linux/backing-dev.h> | 23 | #include <linux/backing-dev.h> |
| 24 | #include <linux/pagevec.h> | ||
| 25 | 24 | ||
| 26 | #include "gfs2.h" | 25 | #include "gfs2.h" |
| 27 | #include "incore.h" | 26 | #include "incore.h" |
| @@ -104,11 +103,9 @@ static int gfs2_writepage_common(struct page *page, | |||
| 104 | loff_t i_size = i_size_read(inode); | 103 | loff_t i_size = i_size_read(inode); |
| 105 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | 104 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; |
| 106 | unsigned offset; | 105 | unsigned offset; |
| 107 | int ret = -EIO; | ||
| 108 | 106 | ||
| 109 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) | 107 | if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) |
| 110 | goto out; | 108 | goto out; |
| 111 | ret = 0; | ||
| 112 | if (current->journal_info) | 109 | if (current->journal_info) |
| 113 | goto redirty; | 110 | goto redirty; |
| 114 | /* Is the page fully outside i_size? (truncate in progress) */ | 111 | /* Is the page fully outside i_size? (truncate in progress) */ |
| @@ -280,7 +277,7 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, | |||
| 280 | int i; | 277 | int i; |
| 281 | int ret; | 278 | int ret; |
| 282 | 279 | ||
| 283 | ret = gfs2_trans_begin(sdp, nrblocks, 0); | 280 | ret = gfs2_trans_begin(sdp, nrblocks, nrblocks); |
| 284 | if (ret < 0) | 281 | if (ret < 0) |
| 285 | return ret; | 282 | return ret; |
| 286 | 283 | ||
| @@ -510,23 +507,26 @@ static int __gfs2_readpage(void *file, struct page *page) | |||
| 510 | static int gfs2_readpage(struct file *file, struct page *page) | 507 | static int gfs2_readpage(struct file *file, struct page *page) |
| 511 | { | 508 | { |
| 512 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | 509 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); |
| 513 | struct gfs2_holder gh; | 510 | struct gfs2_holder *gh; |
| 514 | int error; | 511 | int error; |
| 515 | 512 | ||
| 516 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); | 513 | gh = gfs2_glock_is_locked_by_me(ip->i_gl); |
| 517 | error = gfs2_glock_nq_atime(&gh); | 514 | if (!gh) { |
| 518 | if (unlikely(error)) { | 515 | gh = kmalloc(sizeof(struct gfs2_holder), GFP_NOFS); |
| 516 | if (!gh) | ||
| 517 | return -ENOBUFS; | ||
| 518 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, gh); | ||
| 519 | unlock_page(page); | 519 | unlock_page(page); |
| 520 | goto out; | 520 | error = gfs2_glock_nq_atime(gh); |
| 521 | if (likely(error != 0)) | ||
| 522 | goto out; | ||
| 523 | return AOP_TRUNCATED_PAGE; | ||
| 521 | } | 524 | } |
| 522 | error = __gfs2_readpage(file, page); | 525 | error = __gfs2_readpage(file, page); |
| 523 | gfs2_glock_dq(&gh); | 526 | gfs2_glock_dq(gh); |
| 524 | out: | 527 | out: |
| 525 | gfs2_holder_uninit(&gh); | 528 | gfs2_holder_uninit(gh); |
| 526 | if (error == GLR_TRYFAILED) { | 529 | kfree(gh); |
| 527 | yield(); | ||
| 528 | return AOP_TRUNCATED_PAGE; | ||
| 529 | } | ||
| 530 | return error; | 530 | return error; |
| 531 | } | 531 | } |
| 532 | 532 | ||
| @@ -648,15 +648,15 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
| 648 | 648 | ||
| 649 | if (alloc_required) { | 649 | if (alloc_required) { |
| 650 | al = gfs2_alloc_get(ip); | 650 | al = gfs2_alloc_get(ip); |
| 651 | if (!al) { | ||
| 652 | error = -ENOMEM; | ||
| 653 | goto out_unlock; | ||
| 654 | } | ||
| 651 | 655 | ||
| 652 | error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 656 | error = gfs2_quota_lock_check(ip); |
| 653 | if (error) | 657 | if (error) |
| 654 | goto out_alloc_put; | 658 | goto out_alloc_put; |
| 655 | 659 | ||
| 656 | error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
| 657 | if (error) | ||
| 658 | goto out_qunlock; | ||
| 659 | |||
| 660 | al->al_requested = data_blocks + ind_blocks; | 660 | al->al_requested = data_blocks + ind_blocks; |
| 661 | error = gfs2_inplace_reserve(ip); | 661 | error = gfs2_inplace_reserve(ip); |
| 662 | if (error) | 662 | if (error) |
| @@ -828,7 +828,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
| 828 | unsigned int to = from + len; | 828 | unsigned int to = from + len; |
| 829 | int ret; | 829 | int ret; |
| 830 | 830 | ||
| 831 | BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == 0); | 831 | BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL); |
| 832 | 832 | ||
| 833 | ret = gfs2_meta_inode_buffer(ip, &dibh); | 833 | ret = gfs2_meta_inode_buffer(ip, &dibh); |
| 834 | if (unlikely(ret)) { | 834 | if (unlikely(ret)) { |
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c index 793e334d098e..4a5e676b4420 100644 --- a/fs/gfs2/ops_dentry.c +++ b/fs/gfs2/ops_dentry.c | |||
| @@ -43,7 +43,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
| 43 | struct gfs2_holder d_gh; | 43 | struct gfs2_holder d_gh; |
| 44 | struct gfs2_inode *ip = NULL; | 44 | struct gfs2_inode *ip = NULL; |
| 45 | int error; | 45 | int error; |
| 46 | int had_lock=0; | 46 | int had_lock = 0; |
| 47 | 47 | ||
| 48 | if (inode) { | 48 | if (inode) { |
| 49 | if (is_bad_inode(inode)) | 49 | if (is_bad_inode(inode)) |
| @@ -54,7 +54,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
| 54 | if (sdp->sd_args.ar_localcaching) | 54 | if (sdp->sd_args.ar_localcaching) |
| 55 | goto valid; | 55 | goto valid; |
| 56 | 56 | ||
| 57 | had_lock = gfs2_glock_is_locked_by_me(dip->i_gl); | 57 | had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); |
| 58 | if (!had_lock) { | 58 | if (!had_lock) { |
| 59 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); | 59 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
| 60 | if (error) | 60 | if (error) |
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index 334c7f85351b..990d9f4bc463 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c | |||
| @@ -204,8 +204,6 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, | |||
| 204 | inode = gfs2_inode_lookup(sb, DT_UNKNOWN, | 204 | inode = gfs2_inode_lookup(sb, DT_UNKNOWN, |
| 205 | inum->no_addr, | 205 | inum->no_addr, |
| 206 | 0, 0); | 206 | 0, 0); |
| 207 | if (!inode) | ||
| 208 | goto fail; | ||
| 209 | if (IS_ERR(inode)) { | 207 | if (IS_ERR(inode)) { |
| 210 | error = PTR_ERR(inode); | 208 | error = PTR_ERR(inode); |
| 211 | goto fail; | 209 | goto fail; |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index f4842f2548cd..e1b7d525a066 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
| @@ -30,7 +30,6 @@ | |||
| 30 | #include "glock.h" | 30 | #include "glock.h" |
| 31 | #include "glops.h" | 31 | #include "glops.h" |
| 32 | #include "inode.h" | 32 | #include "inode.h" |
| 33 | #include "lm.h" | ||
| 34 | #include "log.h" | 33 | #include "log.h" |
| 35 | #include "meta_io.h" | 34 | #include "meta_io.h" |
| 36 | #include "quota.h" | 35 | #include "quota.h" |
| @@ -39,6 +38,7 @@ | |||
| 39 | #include "util.h" | 38 | #include "util.h" |
| 40 | #include "eaops.h" | 39 | #include "eaops.h" |
| 41 | #include "ops_address.h" | 40 | #include "ops_address.h" |
| 41 | #include "ops_inode.h" | ||
| 42 | 42 | ||
| 43 | /** | 43 | /** |
| 44 | * gfs2_llseek - seek to a location in a file | 44 | * gfs2_llseek - seek to a location in a file |
| @@ -369,12 +369,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
| 369 | if (al == NULL) | 369 | if (al == NULL) |
| 370 | goto out_unlock; | 370 | goto out_unlock; |
| 371 | 371 | ||
| 372 | ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 372 | ret = gfs2_quota_lock_check(ip); |
| 373 | if (ret) | 373 | if (ret) |
| 374 | goto out_alloc_put; | 374 | goto out_alloc_put; |
| 375 | ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
| 376 | if (ret) | ||
| 377 | goto out_quota_unlock; | ||
| 378 | al->al_requested = data_blocks + ind_blocks; | 375 | al->al_requested = data_blocks + ind_blocks; |
| 379 | ret = gfs2_inplace_reserve(ip); | 376 | ret = gfs2_inplace_reserve(ip); |
| 380 | if (ret) | 377 | if (ret) |
| @@ -596,6 +593,36 @@ static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) | |||
| 596 | return generic_setlease(file, arg, fl); | 593 | return generic_setlease(file, arg, fl); |
| 597 | } | 594 | } |
| 598 | 595 | ||
| 596 | static int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 597 | struct file *file, struct file_lock *fl) | ||
| 598 | { | ||
| 599 | int error = -EIO; | ||
| 600 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 601 | error = sdp->sd_lockstruct.ls_ops->lm_plock_get( | ||
| 602 | sdp->sd_lockstruct.ls_lockspace, name, file, fl); | ||
| 603 | return error; | ||
| 604 | } | ||
| 605 | |||
| 606 | static int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 607 | struct file *file, int cmd, struct file_lock *fl) | ||
| 608 | { | ||
| 609 | int error = -EIO; | ||
| 610 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 611 | error = sdp->sd_lockstruct.ls_ops->lm_plock( | ||
| 612 | sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl); | ||
| 613 | return error; | ||
| 614 | } | ||
| 615 | |||
| 616 | static int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name, | ||
| 617 | struct file *file, struct file_lock *fl) | ||
| 618 | { | ||
| 619 | int error = -EIO; | ||
| 620 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 621 | error = sdp->sd_lockstruct.ls_ops->lm_punlock( | ||
| 622 | sdp->sd_lockstruct.ls_lockspace, name, file, fl); | ||
| 623 | return error; | ||
| 624 | } | ||
| 625 | |||
| 599 | /** | 626 | /** |
| 600 | * gfs2_lock - acquire/release a posix lock on a file | 627 | * gfs2_lock - acquire/release a posix lock on a file |
| 601 | * @file: the file pointer | 628 | * @file: the file pointer |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 4bee6aa845e4..ef9c6c4f80f6 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
| 4 | * | 4 | * |
| 5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
| @@ -26,7 +26,6 @@ | |||
| 26 | #include "glock.h" | 26 | #include "glock.h" |
| 27 | #include "glops.h" | 27 | #include "glops.h" |
| 28 | #include "inode.h" | 28 | #include "inode.h" |
| 29 | #include "lm.h" | ||
| 30 | #include "mount.h" | 29 | #include "mount.h" |
| 31 | #include "ops_fstype.h" | 30 | #include "ops_fstype.h" |
| 32 | #include "ops_dentry.h" | 31 | #include "ops_dentry.h" |
| @@ -363,6 +362,13 @@ static int map_journal_extents(struct gfs2_sbd *sdp) | |||
| 363 | return rc; | 362 | return rc; |
| 364 | } | 363 | } |
| 365 | 364 | ||
| 365 | static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) | ||
| 366 | { | ||
| 367 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 368 | sdp->sd_lockstruct.ls_ops->lm_others_may_mount( | ||
| 369 | sdp->sd_lockstruct.ls_lockspace); | ||
| 370 | } | ||
| 371 | |||
| 366 | static int init_journal(struct gfs2_sbd *sdp, int undo) | 372 | static int init_journal(struct gfs2_sbd *sdp, int undo) |
| 367 | { | 373 | { |
| 368 | struct gfs2_holder ji_gh; | 374 | struct gfs2_holder ji_gh; |
| @@ -542,7 +548,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
| 542 | } | 548 | } |
| 543 | ip = GFS2_I(sdp->sd_rindex); | 549 | ip = GFS2_I(sdp->sd_rindex); |
| 544 | set_bit(GLF_STICKY, &ip->i_gl->gl_flags); | 550 | set_bit(GLF_STICKY, &ip->i_gl->gl_flags); |
| 545 | sdp->sd_rindex_vn = ip->i_gl->gl_vn - 1; | 551 | sdp->sd_rindex_uptodate = 0; |
| 546 | 552 | ||
| 547 | /* Read in the quota inode */ | 553 | /* Read in the quota inode */ |
| 548 | sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); | 554 | sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); |
| @@ -705,6 +711,69 @@ fail: | |||
| 705 | } | 711 | } |
| 706 | 712 | ||
| 707 | /** | 713 | /** |
| 714 | * gfs2_lm_mount - mount a locking protocol | ||
| 715 | * @sdp: the filesystem | ||
| 716 | * @args: mount arguements | ||
| 717 | * @silent: if 1, don't complain if the FS isn't a GFS2 fs | ||
| 718 | * | ||
| 719 | * Returns: errno | ||
| 720 | */ | ||
| 721 | |||
| 722 | static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | ||
| 723 | { | ||
| 724 | char *proto = sdp->sd_proto_name; | ||
| 725 | char *table = sdp->sd_table_name; | ||
| 726 | int flags = LM_MFLAG_CONV_NODROP; | ||
| 727 | int error; | ||
| 728 | |||
| 729 | if (sdp->sd_args.ar_spectator) | ||
| 730 | flags |= LM_MFLAG_SPECTATOR; | ||
| 731 | |||
| 732 | fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table); | ||
| 733 | |||
| 734 | error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata, | ||
| 735 | gfs2_glock_cb, sdp, | ||
| 736 | GFS2_MIN_LVB_SIZE, flags, | ||
| 737 | &sdp->sd_lockstruct, &sdp->sd_kobj); | ||
| 738 | if (error) { | ||
| 739 | fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n", | ||
| 740 | proto, table, sdp->sd_args.ar_hostdata); | ||
| 741 | goto out; | ||
| 742 | } | ||
| 743 | |||
| 744 | if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) || | ||
| 745 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) || | ||
| 746 | gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >= | ||
| 747 | GFS2_MIN_LVB_SIZE)) { | ||
| 748 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); | ||
| 749 | goto out; | ||
| 750 | } | ||
| 751 | |||
| 752 | if (sdp->sd_args.ar_spectator) | ||
| 753 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table); | ||
| 754 | else | ||
| 755 | snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table, | ||
| 756 | sdp->sd_lockstruct.ls_jid); | ||
| 757 | |||
| 758 | fs_info(sdp, "Joined cluster. Now mounting FS...\n"); | ||
| 759 | |||
| 760 | if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) && | ||
| 761 | !sdp->sd_args.ar_ignore_local_fs) { | ||
| 762 | sdp->sd_args.ar_localflocks = 1; | ||
| 763 | sdp->sd_args.ar_localcaching = 1; | ||
| 764 | } | ||
| 765 | |||
| 766 | out: | ||
| 767 | return error; | ||
| 768 | } | ||
| 769 | |||
| 770 | void gfs2_lm_unmount(struct gfs2_sbd *sdp) | ||
| 771 | { | ||
| 772 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 773 | gfs2_unmount_lockproto(&sdp->sd_lockstruct); | ||
| 774 | } | ||
| 775 | |||
| 776 | /** | ||
| 708 | * fill_super - Read in superblock | 777 | * fill_super - Read in superblock |
| 709 | * @sb: The VFS superblock | 778 | * @sb: The VFS superblock |
| 710 | * @data: Mount options | 779 | * @data: Mount options |
| @@ -874,7 +943,6 @@ static struct super_block* get_gfs2_sb(const char *dev_name) | |||
| 874 | { | 943 | { |
| 875 | struct kstat stat; | 944 | struct kstat stat; |
| 876 | struct nameidata nd; | 945 | struct nameidata nd; |
| 877 | struct file_system_type *fstype; | ||
| 878 | struct super_block *sb = NULL, *s; | 946 | struct super_block *sb = NULL, *s; |
| 879 | int error; | 947 | int error; |
| 880 | 948 | ||
| @@ -886,8 +954,7 @@ static struct super_block* get_gfs2_sb(const char *dev_name) | |||
| 886 | } | 954 | } |
| 887 | error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat); | 955 | error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat); |
| 888 | 956 | ||
| 889 | fstype = get_fs_type("gfs2"); | 957 | list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) { |
| 890 | list_for_each_entry(s, &fstype->fs_supers, s_instances) { | ||
| 891 | if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || | 958 | if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || |
| 892 | (S_ISDIR(stat.mode) && | 959 | (S_ISDIR(stat.mode) && |
| 893 | s == nd.path.dentry->d_inode->i_sb)) { | 960 | s == nd.path.dentry->d_inode->i_sb)) { |
| @@ -931,7 +998,6 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, | |||
| 931 | error = PTR_ERR(new); | 998 | error = PTR_ERR(new); |
| 932 | goto error; | 999 | goto error; |
| 933 | } | 1000 | } |
| 934 | module_put(fs_type->owner); | ||
| 935 | new->s_flags = flags; | 1001 | new->s_flags = flags; |
| 936 | strlcpy(new->s_id, sb->s_id, sizeof(new->s_id)); | 1002 | strlcpy(new->s_id, sb->s_id, sizeof(new->s_id)); |
| 937 | sb_set_blocksize(new, sb->s_blocksize); | 1003 | sb_set_blocksize(new, sb->s_blocksize); |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index e87412902bed..2686ad4c0029 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
| @@ -200,15 +200,15 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
| 200 | 200 | ||
| 201 | if (alloc_required) { | 201 | if (alloc_required) { |
| 202 | struct gfs2_alloc *al = gfs2_alloc_get(dip); | 202 | struct gfs2_alloc *al = gfs2_alloc_get(dip); |
| 203 | if (!al) { | ||
| 204 | error = -ENOMEM; | ||
| 205 | goto out_gunlock; | ||
| 206 | } | ||
| 203 | 207 | ||
| 204 | error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 208 | error = gfs2_quota_lock_check(dip); |
| 205 | if (error) | 209 | if (error) |
| 206 | goto out_alloc; | 210 | goto out_alloc; |
| 207 | 211 | ||
| 208 | error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); | ||
| 209 | if (error) | ||
| 210 | goto out_gunlock_q; | ||
| 211 | |||
| 212 | al->al_requested = sdp->sd_max_dirres; | 212 | al->al_requested = sdp->sd_max_dirres; |
| 213 | 213 | ||
| 214 | error = gfs2_inplace_reserve(dip); | 214 | error = gfs2_inplace_reserve(dip); |
| @@ -716,15 +716,15 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 716 | 716 | ||
| 717 | if (alloc_required) { | 717 | if (alloc_required) { |
| 718 | struct gfs2_alloc *al = gfs2_alloc_get(ndip); | 718 | struct gfs2_alloc *al = gfs2_alloc_get(ndip); |
| 719 | if (!al) { | ||
| 720 | error = -ENOMEM; | ||
| 721 | goto out_gunlock; | ||
| 722 | } | ||
| 719 | 723 | ||
| 720 | error = gfs2_quota_lock(ndip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | 724 | error = gfs2_quota_lock_check(ndip); |
| 721 | if (error) | 725 | if (error) |
| 722 | goto out_alloc; | 726 | goto out_alloc; |
| 723 | 727 | ||
| 724 | error = gfs2_quota_check(ndip, ndip->i_inode.i_uid, ndip->i_inode.i_gid); | ||
| 725 | if (error) | ||
| 726 | goto out_gunlock_q; | ||
| 727 | |||
| 728 | al->al_requested = sdp->sd_max_dirres; | 728 | al->al_requested = sdp->sd_max_dirres; |
| 729 | 729 | ||
| 730 | error = gfs2_inplace_reserve(ndip); | 730 | error = gfs2_inplace_reserve(ndip); |
| @@ -898,7 +898,7 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) | |||
| 898 | int error; | 898 | int error; |
| 899 | int unlock = 0; | 899 | int unlock = 0; |
| 900 | 900 | ||
| 901 | if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) { | 901 | if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { |
| 902 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | 902 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); |
| 903 | if (error) | 903 | if (error) |
| 904 | return error; | 904 | return error; |
| @@ -953,7 +953,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) | |||
| 953 | if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) | 953 | if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) |
| 954 | ogid = ngid = NO_QUOTA_CHANGE; | 954 | ogid = ngid = NO_QUOTA_CHANGE; |
| 955 | 955 | ||
| 956 | gfs2_alloc_get(ip); | 956 | if (!gfs2_alloc_get(ip)) |
| 957 | return -ENOMEM; | ||
| 957 | 958 | ||
| 958 | error = gfs2_quota_lock(ip, nuid, ngid); | 959 | error = gfs2_quota_lock(ip, nuid, ngid); |
| 959 | if (error) | 960 | if (error) |
| @@ -981,8 +982,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) | |||
| 981 | brelse(dibh); | 982 | brelse(dibh); |
| 982 | 983 | ||
| 983 | if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { | 984 | if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { |
| 984 | gfs2_quota_change(ip, -ip->i_di.di_blocks, ouid, ogid); | 985 | u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); |
| 985 | gfs2_quota_change(ip, ip->i_di.di_blocks, nuid, ngid); | 986 | gfs2_quota_change(ip, -blocks, ouid, ogid); |
| 987 | gfs2_quota_change(ip, blocks, nuid, ngid); | ||
| 986 | } | 988 | } |
| 987 | 989 | ||
| 988 | out_end_trans: | 990 | out_end_trans: |
| @@ -1064,7 +1066,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
| 1064 | int error; | 1066 | int error; |
| 1065 | int unlock = 0; | 1067 | int unlock = 0; |
| 1066 | 1068 | ||
| 1067 | if (gfs2_glock_is_locked_by_me(ip->i_gl) == 0) { | 1069 | if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { |
| 1068 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); | 1070 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); |
| 1069 | if (error) | 1071 | if (error) |
| 1070 | return error; | 1072 | return error; |
| @@ -1148,16 +1150,6 @@ const struct inode_operations gfs2_file_iops = { | |||
| 1148 | .removexattr = gfs2_removexattr, | 1150 | .removexattr = gfs2_removexattr, |
| 1149 | }; | 1151 | }; |
| 1150 | 1152 | ||
| 1151 | const struct inode_operations gfs2_dev_iops = { | ||
| 1152 | .permission = gfs2_permission, | ||
| 1153 | .setattr = gfs2_setattr, | ||
| 1154 | .getattr = gfs2_getattr, | ||
| 1155 | .setxattr = gfs2_setxattr, | ||
| 1156 | .getxattr = gfs2_getxattr, | ||
| 1157 | .listxattr = gfs2_listxattr, | ||
| 1158 | .removexattr = gfs2_removexattr, | ||
| 1159 | }; | ||
| 1160 | |||
| 1161 | const struct inode_operations gfs2_dir_iops = { | 1153 | const struct inode_operations gfs2_dir_iops = { |
| 1162 | .create = gfs2_create, | 1154 | .create = gfs2_create, |
| 1163 | .lookup = gfs2_lookup, | 1155 | .lookup = gfs2_lookup, |
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h index fd8cee231e1d..14b4b797622a 100644 --- a/fs/gfs2/ops_inode.h +++ b/fs/gfs2/ops_inode.h | |||
| @@ -15,7 +15,6 @@ | |||
| 15 | extern const struct inode_operations gfs2_file_iops; | 15 | extern const struct inode_operations gfs2_file_iops; |
| 16 | extern const struct inode_operations gfs2_dir_iops; | 16 | extern const struct inode_operations gfs2_dir_iops; |
| 17 | extern const struct inode_operations gfs2_symlink_iops; | 17 | extern const struct inode_operations gfs2_symlink_iops; |
| 18 | extern const struct inode_operations gfs2_dev_iops; | ||
| 19 | extern const struct file_operations gfs2_file_fops; | 18 | extern const struct file_operations gfs2_file_fops; |
| 20 | extern const struct file_operations gfs2_dir_fops; | 19 | extern const struct file_operations gfs2_dir_fops; |
| 21 | extern const struct file_operations gfs2_file_fops_nolock; | 20 | extern const struct file_operations gfs2_file_fops_nolock; |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 5e524217944a..2278c68b7e35 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
| @@ -25,7 +25,6 @@ | |||
| 25 | #include "incore.h" | 25 | #include "incore.h" |
| 26 | #include "glock.h" | 26 | #include "glock.h" |
| 27 | #include "inode.h" | 27 | #include "inode.h" |
| 28 | #include "lm.h" | ||
| 29 | #include "log.h" | 28 | #include "log.h" |
| 30 | #include "mount.h" | 29 | #include "mount.h" |
| 31 | #include "ops_super.h" | 30 | #include "ops_super.h" |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a08dabd6ce90..56aaf915c59a 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
| @@ -94,7 +94,7 @@ static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id, | |||
| 94 | struct gfs2_quota_data *qd; | 94 | struct gfs2_quota_data *qd; |
| 95 | int error; | 95 | int error; |
| 96 | 96 | ||
| 97 | qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_KERNEL); | 97 | qd = kzalloc(sizeof(struct gfs2_quota_data), GFP_NOFS); |
| 98 | if (!qd) | 98 | if (!qd) |
| 99 | return -ENOMEM; | 99 | return -ENOMEM; |
| 100 | 100 | ||
| @@ -616,16 +616,9 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, | |||
| 616 | s64 value; | 616 | s64 value; |
| 617 | int err = -EIO; | 617 | int err = -EIO; |
| 618 | 618 | ||
| 619 | if (gfs2_is_stuffed(ip)) { | 619 | if (gfs2_is_stuffed(ip)) |
| 620 | struct gfs2_alloc *al = NULL; | ||
| 621 | al = gfs2_alloc_get(ip); | ||
| 622 | /* just request 1 blk */ | ||
| 623 | al->al_requested = 1; | ||
| 624 | gfs2_inplace_reserve(ip); | ||
| 625 | gfs2_unstuff_dinode(ip, NULL); | 620 | gfs2_unstuff_dinode(ip, NULL); |
| 626 | gfs2_inplace_release(ip); | 621 | |
| 627 | gfs2_alloc_put(ip); | ||
| 628 | } | ||
| 629 | page = grab_cache_page(mapping, index); | 622 | page = grab_cache_page(mapping, index); |
| 630 | if (!page) | 623 | if (!page) |
| 631 | return -ENOMEM; | 624 | return -ENOMEM; |
| @@ -690,14 +683,14 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
| 690 | unsigned int qx, x; | 683 | unsigned int qx, x; |
| 691 | struct gfs2_quota_data *qd; | 684 | struct gfs2_quota_data *qd; |
| 692 | loff_t offset; | 685 | loff_t offset; |
| 693 | unsigned int nalloc = 0; | 686 | unsigned int nalloc = 0, blocks; |
| 694 | struct gfs2_alloc *al = NULL; | 687 | struct gfs2_alloc *al = NULL; |
| 695 | int error; | 688 | int error; |
| 696 | 689 | ||
| 697 | gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), | 690 | gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), |
| 698 | &data_blocks, &ind_blocks); | 691 | &data_blocks, &ind_blocks); |
| 699 | 692 | ||
| 700 | ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_KERNEL); | 693 | ghs = kcalloc(num_qd, sizeof(struct gfs2_holder), GFP_NOFS); |
| 701 | if (!ghs) | 694 | if (!ghs) |
| 702 | return -ENOMEM; | 695 | return -ENOMEM; |
| 703 | 696 | ||
| @@ -727,30 +720,33 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
| 727 | nalloc++; | 720 | nalloc++; |
| 728 | } | 721 | } |
| 729 | 722 | ||
| 730 | if (nalloc) { | 723 | al = gfs2_alloc_get(ip); |
| 731 | al = gfs2_alloc_get(ip); | 724 | if (!al) { |
| 725 | error = -ENOMEM; | ||
| 726 | goto out_gunlock; | ||
| 727 | } | ||
| 728 | /* | ||
| 729 | * 1 blk for unstuffing inode if stuffed. We add this extra | ||
| 730 | * block to the reservation unconditionally. If the inode | ||
| 731 | * doesn't need unstuffing, the block will be released to the | ||
| 732 | * rgrp since it won't be allocated during the transaction | ||
| 733 | */ | ||
| 734 | al->al_requested = 1; | ||
| 735 | /* +1 in the end for block requested above for unstuffing */ | ||
| 736 | blocks = num_qd * data_blocks + RES_DINODE + num_qd + 1; | ||
| 732 | 737 | ||
| 733 | al->al_requested = nalloc * (data_blocks + ind_blocks); | 738 | if (nalloc) |
| 739 | al->al_requested += nalloc * (data_blocks + ind_blocks); | ||
| 740 | error = gfs2_inplace_reserve(ip); | ||
| 741 | if (error) | ||
| 742 | goto out_alloc; | ||
| 734 | 743 | ||
| 735 | error = gfs2_inplace_reserve(ip); | 744 | if (nalloc) |
| 736 | if (error) | 745 | blocks += al->al_rgd->rd_length + nalloc * ind_blocks + RES_STATFS; |
| 737 | goto out_alloc; | 746 | |
| 738 | 747 | error = gfs2_trans_begin(sdp, blocks, 0); | |
| 739 | error = gfs2_trans_begin(sdp, | 748 | if (error) |
| 740 | al->al_rgd->rd_length + | 749 | goto out_ipres; |
| 741 | num_qd * data_blocks + | ||
| 742 | nalloc * ind_blocks + | ||
| 743 | RES_DINODE + num_qd + | ||
| 744 | RES_STATFS, 0); | ||
| 745 | if (error) | ||
| 746 | goto out_ipres; | ||
| 747 | } else { | ||
| 748 | error = gfs2_trans_begin(sdp, | ||
| 749 | num_qd * data_blocks + | ||
| 750 | RES_DINODE + num_qd, 0); | ||
| 751 | if (error) | ||
| 752 | goto out_gunlock; | ||
| 753 | } | ||
| 754 | 750 | ||
| 755 | for (x = 0; x < num_qd; x++) { | 751 | for (x = 0; x < num_qd; x++) { |
| 756 | qd = qda[x]; | 752 | qd = qda[x]; |
| @@ -769,11 +765,9 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
| 769 | out_end_trans: | 765 | out_end_trans: |
| 770 | gfs2_trans_end(sdp); | 766 | gfs2_trans_end(sdp); |
| 771 | out_ipres: | 767 | out_ipres: |
| 772 | if (nalloc) | 768 | gfs2_inplace_release(ip); |
| 773 | gfs2_inplace_release(ip); | ||
| 774 | out_alloc: | 769 | out_alloc: |
| 775 | if (nalloc) | 770 | gfs2_alloc_put(ip); |
| 776 | gfs2_alloc_put(ip); | ||
| 777 | out_gunlock: | 771 | out_gunlock: |
| 778 | gfs2_glock_dq_uninit(&i_gh); | 772 | gfs2_glock_dq_uninit(&i_gh); |
| 779 | out: | 773 | out: |
| @@ -1124,12 +1118,12 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) | |||
| 1124 | error = -ENOMEM; | 1118 | error = -ENOMEM; |
| 1125 | 1119 | ||
| 1126 | sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks, | 1120 | sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks, |
| 1127 | sizeof(unsigned char *), GFP_KERNEL); | 1121 | sizeof(unsigned char *), GFP_NOFS); |
| 1128 | if (!sdp->sd_quota_bitmap) | 1122 | if (!sdp->sd_quota_bitmap) |
| 1129 | return error; | 1123 | return error; |
| 1130 | 1124 | ||
| 1131 | for (x = 0; x < sdp->sd_quota_chunks; x++) { | 1125 | for (x = 0; x < sdp->sd_quota_chunks; x++) { |
| 1132 | sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL); | 1126 | sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_NOFS); |
| 1133 | if (!sdp->sd_quota_bitmap[x]) | 1127 | if (!sdp->sd_quota_bitmap[x]) |
| 1134 | goto fail; | 1128 | goto fail; |
| 1135 | } | 1129 | } |
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index a8be1417051f..3b7f4b0e5dfe 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h | |||
| @@ -32,4 +32,21 @@ int gfs2_quota_init(struct gfs2_sbd *sdp); | |||
| 32 | void gfs2_quota_scan(struct gfs2_sbd *sdp); | 32 | void gfs2_quota_scan(struct gfs2_sbd *sdp); |
| 33 | void gfs2_quota_cleanup(struct gfs2_sbd *sdp); | 33 | void gfs2_quota_cleanup(struct gfs2_sbd *sdp); |
| 34 | 34 | ||
| 35 | static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) | ||
| 36 | { | ||
| 37 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 38 | int ret; | ||
| 39 | if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) | ||
| 40 | return 0; | ||
| 41 | ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); | ||
| 42 | if (ret) | ||
| 43 | return ret; | ||
| 44 | if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) | ||
| 45 | return 0; | ||
| 46 | ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
| 47 | if (ret) | ||
| 48 | gfs2_quota_unlock(ip); | ||
| 49 | return ret; | ||
| 50 | } | ||
| 51 | |||
| 35 | #endif /* __QUOTA_DOT_H__ */ | 52 | #endif /* __QUOTA_DOT_H__ */ |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 6fb07d67ca8a..2888e4b4b1c5 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "bmap.h" | 20 | #include "bmap.h" |
| 21 | #include "glock.h" | 21 | #include "glock.h" |
| 22 | #include "glops.h" | 22 | #include "glops.h" |
| 23 | #include "lm.h" | ||
| 24 | #include "lops.h" | 23 | #include "lops.h" |
| 25 | #include "meta_io.h" | 24 | #include "meta_io.h" |
| 26 | #include "recovery.h" | 25 | #include "recovery.h" |
| @@ -69,7 +68,7 @@ int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) | |||
| 69 | return 0; | 68 | return 0; |
| 70 | } | 69 | } |
| 71 | 70 | ||
| 72 | rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL); | 71 | rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_NOFS); |
| 73 | if (!rr) | 72 | if (!rr) |
| 74 | return -ENOMEM; | 73 | return -ENOMEM; |
| 75 | 74 | ||
| @@ -150,7 +149,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, | |||
| 150 | struct gfs2_log_header_host *head) | 149 | struct gfs2_log_header_host *head) |
| 151 | { | 150 | { |
| 152 | struct buffer_head *bh; | 151 | struct buffer_head *bh; |
| 153 | struct gfs2_log_header_host lh; | 152 | struct gfs2_log_header_host uninitialized_var(lh); |
| 154 | const u32 nothing = 0; | 153 | const u32 nothing = 0; |
| 155 | u32 hash; | 154 | u32 hash; |
| 156 | int error; | 155 | int error; |
| @@ -425,6 +424,16 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea | |||
| 425 | return error; | 424 | return error; |
| 426 | } | 425 | } |
| 427 | 426 | ||
| 427 | |||
| 428 | static void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, | ||
| 429 | unsigned int message) | ||
| 430 | { | ||
| 431 | if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) | ||
| 432 | sdp->sd_lockstruct.ls_ops->lm_recovery_done( | ||
| 433 | sdp->sd_lockstruct.ls_lockspace, jid, message); | ||
| 434 | } | ||
| 435 | |||
| 436 | |||
| 428 | /** | 437 | /** |
| 429 | * gfs2_recover_journal - recovery a given journal | 438 | * gfs2_recover_journal - recovery a given journal |
| 430 | * @jd: the struct gfs2_jdesc describing the journal | 439 | * @jd: the struct gfs2_jdesc describing the journal |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 3552110b2e5f..7e8f0b1d6c6e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 3 | * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
| 4 | * | 4 | * |
| 5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
| 15 | #include <linux/gfs2_ondisk.h> | 15 | #include <linux/gfs2_ondisk.h> |
| 16 | #include <linux/lm_interface.h> | 16 | #include <linux/lm_interface.h> |
| 17 | #include <linux/prefetch.h> | ||
| 17 | 18 | ||
| 18 | #include "gfs2.h" | 19 | #include "gfs2.h" |
| 19 | #include "incore.h" | 20 | #include "incore.h" |
| @@ -33,6 +34,16 @@ | |||
| 33 | #define BFITNOENT ((u32)~0) | 34 | #define BFITNOENT ((u32)~0) |
| 34 | #define NO_BLOCK ((u64)~0) | 35 | #define NO_BLOCK ((u64)~0) |
| 35 | 36 | ||
| 37 | #if BITS_PER_LONG == 32 | ||
| 38 | #define LBITMASK (0x55555555UL) | ||
| 39 | #define LBITSKIP55 (0x55555555UL) | ||
| 40 | #define LBITSKIP00 (0x00000000UL) | ||
| 41 | #else | ||
| 42 | #define LBITMASK (0x5555555555555555UL) | ||
| 43 | #define LBITSKIP55 (0x5555555555555555UL) | ||
| 44 | #define LBITSKIP00 (0x0000000000000000UL) | ||
| 45 | #endif | ||
| 46 | |||
| 36 | /* | 47 | /* |
| 37 | * These routines are used by the resource group routines (rgrp.c) | 48 | * These routines are used by the resource group routines (rgrp.c) |
| 38 | * to keep track of block allocation. Each block is represented by two | 49 | * to keep track of block allocation. Each block is represented by two |
| @@ -53,7 +64,8 @@ static const char valid_change[16] = { | |||
| 53 | }; | 64 | }; |
| 54 | 65 | ||
| 55 | static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | 66 | static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, |
| 56 | unsigned char old_state, unsigned char new_state); | 67 | unsigned char old_state, unsigned char new_state, |
| 68 | unsigned int *n); | ||
| 57 | 69 | ||
| 58 | /** | 70 | /** |
| 59 | * gfs2_setbit - Set a bit in the bitmaps | 71 | * gfs2_setbit - Set a bit in the bitmaps |
| @@ -64,26 +76,32 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | |||
| 64 | * | 76 | * |
| 65 | */ | 77 | */ |
| 66 | 78 | ||
| 67 | static void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | 79 | static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, |
| 68 | unsigned int buflen, u32 block, | 80 | unsigned char *buf2, unsigned int offset, |
| 69 | unsigned char new_state) | 81 | unsigned int buflen, u32 block, |
| 82 | unsigned char new_state) | ||
| 70 | { | 83 | { |
| 71 | unsigned char *byte, *end, cur_state; | 84 | unsigned char *byte1, *byte2, *end, cur_state; |
| 72 | unsigned int bit; | 85 | const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; |
| 73 | 86 | ||
| 74 | byte = buffer + (block / GFS2_NBBY); | 87 | byte1 = buf1 + offset + (block / GFS2_NBBY); |
| 75 | bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; | 88 | end = buf1 + offset + buflen; |
| 76 | end = buffer + buflen; | ||
| 77 | 89 | ||
| 78 | gfs2_assert(rgd->rd_sbd, byte < end); | 90 | BUG_ON(byte1 >= end); |
| 79 | 91 | ||
| 80 | cur_state = (*byte >> bit) & GFS2_BIT_MASK; | 92 | cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; |
| 81 | 93 | ||
| 82 | if (valid_change[new_state * 4 + cur_state]) { | 94 | if (unlikely(!valid_change[new_state * 4 + cur_state])) { |
| 83 | *byte ^= cur_state << bit; | ||
| 84 | *byte |= new_state << bit; | ||
| 85 | } else | ||
| 86 | gfs2_consist_rgrpd(rgd); | 95 | gfs2_consist_rgrpd(rgd); |
| 96 | return; | ||
| 97 | } | ||
| 98 | *byte1 ^= (cur_state ^ new_state) << bit; | ||
| 99 | |||
| 100 | if (buf2) { | ||
| 101 | byte2 = buf2 + offset + (block / GFS2_NBBY); | ||
| 102 | cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; | ||
| 103 | *byte2 ^= (cur_state ^ new_state) << bit; | ||
| 104 | } | ||
| 87 | } | 105 | } |
| 88 | 106 | ||
| 89 | /** | 107 | /** |
| @@ -94,10 +112,12 @@ static void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | |||
| 94 | * | 112 | * |
| 95 | */ | 113 | */ |
| 96 | 114 | ||
| 97 | static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | 115 | static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, |
| 98 | unsigned int buflen, u32 block) | 116 | const unsigned char *buffer, |
| 117 | unsigned int buflen, u32 block) | ||
| 99 | { | 118 | { |
| 100 | unsigned char *byte, *end, cur_state; | 119 | const unsigned char *byte, *end; |
| 120 | unsigned char cur_state; | ||
| 101 | unsigned int bit; | 121 | unsigned int bit; |
| 102 | 122 | ||
| 103 | byte = buffer + (block / GFS2_NBBY); | 123 | byte = buffer + (block / GFS2_NBBY); |
| @@ -126,47 +146,66 @@ static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, | |||
| 126 | * Return: the block number (bitmap buffer scope) that was found | 146 | * Return: the block number (bitmap buffer scope) that was found |
| 127 | */ | 147 | */ |
| 128 | 148 | ||
| 129 | static u32 gfs2_bitfit(unsigned char *buffer, unsigned int buflen, u32 goal, | 149 | static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal, |
| 130 | unsigned char old_state) | 150 | u8 old_state) |
| 131 | { | 151 | { |
| 132 | unsigned char *byte; | 152 | const u8 *byte, *start, *end; |
| 133 | u32 blk = goal; | 153 | int bit, startbit; |
| 134 | unsigned int bit, bitlong; | 154 | u32 g1, g2, misaligned; |
| 135 | unsigned long *plong, plong55; | 155 | unsigned long *plong; |
| 136 | 156 | unsigned long lskipval; | |
| 137 | byte = buffer + (goal / GFS2_NBBY); | 157 | |
| 138 | plong = (unsigned long *)(buffer + (goal / GFS2_NBBY)); | 158 | lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55; |
| 139 | bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; | 159 | g1 = (goal / GFS2_NBBY); |
| 140 | bitlong = bit; | 160 | start = buffer + g1; |
| 141 | #if BITS_PER_LONG == 32 | 161 | byte = start; |
| 142 | plong55 = 0x55555555; | 162 | end = buffer + buflen; |
| 143 | #else | 163 | g2 = ALIGN(g1, sizeof(unsigned long)); |
| 144 | plong55 = 0x5555555555555555; | 164 | plong = (unsigned long *)(buffer + g2); |
| 145 | #endif | 165 | startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; |
| 146 | while (byte < buffer + buflen) { | 166 | misaligned = g2 - g1; |
| 147 | 167 | if (!misaligned) | |
| 148 | if (bitlong == 0 && old_state == 0 && *plong == plong55) { | 168 | goto ulong_aligned; |
| 149 | plong++; | 169 | /* parse the bitmap a byte at a time */ |
| 150 | byte += sizeof(unsigned long); | 170 | misaligned: |
| 151 | blk += sizeof(unsigned long) * GFS2_NBBY; | 171 | while (byte < end) { |
| 152 | continue; | 172 | if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) { |
| 173 | return goal + | ||
| 174 | (((byte - start) * GFS2_NBBY) + | ||
| 175 | ((bit - startbit) >> 1)); | ||
| 153 | } | 176 | } |
| 154 | if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) | ||
| 155 | return blk; | ||
| 156 | bit += GFS2_BIT_SIZE; | 177 | bit += GFS2_BIT_SIZE; |
| 157 | if (bit >= 8) { | 178 | if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) { |
| 158 | bit = 0; | 179 | bit = 0; |
| 159 | byte++; | 180 | byte++; |
| 181 | misaligned--; | ||
| 182 | if (!misaligned) { | ||
| 183 | plong = (unsigned long *)byte; | ||
| 184 | goto ulong_aligned; | ||
| 185 | } | ||
| 160 | } | 186 | } |
| 161 | bitlong += GFS2_BIT_SIZE; | ||
| 162 | if (bitlong >= sizeof(unsigned long) * 8) { | ||
| 163 | bitlong = 0; | ||
| 164 | plong++; | ||
| 165 | } | ||
| 166 | |||
| 167 | blk++; | ||
| 168 | } | 187 | } |
| 188 | return BFITNOENT; | ||
| 169 | 189 | ||
| 190 | /* parse the bitmap a unsigned long at a time */ | ||
| 191 | ulong_aligned: | ||
| 192 | /* Stop at "end - 1" or else prefetch can go past the end and segfault. | ||
| 193 | We could "if" it but we'd lose some of the performance gained. | ||
| 194 | This way will only slow down searching the very last 4/8 bytes | ||
| 195 | depending on architecture. I've experimented with several ways | ||
| 196 | of writing this section such as using an else before the goto | ||
| 197 | but this one seems to be the fastest. */ | ||
| 198 | while ((unsigned char *)plong < end - 1) { | ||
| 199 | prefetch(plong + 1); | ||
| 200 | if (((*plong) & LBITMASK) != lskipval) | ||
| 201 | break; | ||
| 202 | plong++; | ||
| 203 | } | ||
| 204 | if ((unsigned char *)plong < end) { | ||
| 205 | byte = (const u8 *)plong; | ||
| 206 | misaligned += sizeof(unsigned long) - 1; | ||
| 207 | goto misaligned; | ||
| 208 | } | ||
| 170 | return BFITNOENT; | 209 | return BFITNOENT; |
| 171 | } | 210 | } |
| 172 | 211 | ||
| @@ -179,14 +218,14 @@ static u32 gfs2_bitfit(unsigned char *buffer, unsigned int buflen, u32 goal, | |||
| 179 | * Returns: The number of bits | 218 | * Returns: The number of bits |
| 180 | */ | 219 | */ |
| 181 | 220 | ||
| 182 | static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, unsigned char *buffer, | 221 | static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer, |
| 183 | unsigned int buflen, unsigned char state) | 222 | unsigned int buflen, u8 state) |
| 184 | { | 223 | { |
| 185 | unsigned char *byte = buffer; | 224 | const u8 *byte = buffer; |
| 186 | unsigned char *end = buffer + buflen; | 225 | const u8 *end = buffer + buflen; |
| 187 | unsigned char state1 = state << 2; | 226 | const u8 state1 = state << 2; |
| 188 | unsigned char state2 = state << 4; | 227 | const u8 state2 = state << 4; |
| 189 | unsigned char state3 = state << 6; | 228 | const u8 state3 = state << 6; |
| 190 | u32 count = 0; | 229 | u32 count = 0; |
| 191 | 230 | ||
| 192 | for (; byte < end; byte++) { | 231 | for (; byte < end; byte++) { |
| @@ -353,7 +392,7 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp) | |||
| 353 | } | 392 | } |
| 354 | 393 | ||
| 355 | kfree(rgd->rd_bits); | 394 | kfree(rgd->rd_bits); |
| 356 | kfree(rgd); | 395 | kmem_cache_free(gfs2_rgrpd_cachep, rgd); |
| 357 | } | 396 | } |
| 358 | } | 397 | } |
| 359 | 398 | ||
| @@ -516,7 +555,7 @@ static int read_rindex_entry(struct gfs2_inode *ip, | |||
| 516 | return error; | 555 | return error; |
| 517 | } | 556 | } |
| 518 | 557 | ||
| 519 | rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS); | 558 | rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS); |
| 520 | error = -ENOMEM; | 559 | error = -ENOMEM; |
| 521 | if (!rgd) | 560 | if (!rgd) |
| 522 | return error; | 561 | return error; |
| @@ -539,7 +578,7 @@ static int read_rindex_entry(struct gfs2_inode *ip, | |||
| 539 | return error; | 578 | return error; |
| 540 | 579 | ||
| 541 | rgd->rd_gl->gl_object = rgd; | 580 | rgd->rd_gl->gl_object = rgd; |
| 542 | rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1; | 581 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; |
| 543 | rgd->rd_flags |= GFS2_RDF_CHECK; | 582 | rgd->rd_flags |= GFS2_RDF_CHECK; |
| 544 | return error; | 583 | return error; |
| 545 | } | 584 | } |
| @@ -575,7 +614,7 @@ static int gfs2_ri_update(struct gfs2_inode *ip) | |||
| 575 | } | 614 | } |
| 576 | } | 615 | } |
| 577 | 616 | ||
| 578 | sdp->sd_rindex_vn = ip->i_gl->gl_vn; | 617 | sdp->sd_rindex_uptodate = 1; |
| 579 | return 0; | 618 | return 0; |
| 580 | } | 619 | } |
| 581 | 620 | ||
| @@ -609,7 +648,7 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip) | |||
| 609 | } | 648 | } |
| 610 | } | 649 | } |
| 611 | 650 | ||
| 612 | sdp->sd_rindex_vn = ip->i_gl->gl_vn; | 651 | sdp->sd_rindex_uptodate = 1; |
| 613 | return 0; | 652 | return 0; |
| 614 | } | 653 | } |
| 615 | 654 | ||
| @@ -642,9 +681,9 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh) | |||
| 642 | return error; | 681 | return error; |
| 643 | 682 | ||
| 644 | /* Read new copy from disk if we don't have the latest */ | 683 | /* Read new copy from disk if we don't have the latest */ |
| 645 | if (sdp->sd_rindex_vn != gl->gl_vn) { | 684 | if (!sdp->sd_rindex_uptodate) { |
| 646 | mutex_lock(&sdp->sd_rindex_mutex); | 685 | mutex_lock(&sdp->sd_rindex_mutex); |
| 647 | if (sdp->sd_rindex_vn != gl->gl_vn) { | 686 | if (!sdp->sd_rindex_uptodate) { |
| 648 | error = gfs2_ri_update(ip); | 687 | error = gfs2_ri_update(ip); |
| 649 | if (error) | 688 | if (error) |
| 650 | gfs2_glock_dq_uninit(ri_gh); | 689 | gfs2_glock_dq_uninit(ri_gh); |
| @@ -655,21 +694,31 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh) | |||
| 655 | return error; | 694 | return error; |
| 656 | } | 695 | } |
| 657 | 696 | ||
| 658 | static void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf) | 697 | static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) |
| 659 | { | 698 | { |
| 660 | const struct gfs2_rgrp *str = buf; | 699 | const struct gfs2_rgrp *str = buf; |
| 700 | struct gfs2_rgrp_host *rg = &rgd->rd_rg; | ||
| 701 | u32 rg_flags; | ||
| 661 | 702 | ||
| 662 | rg->rg_flags = be32_to_cpu(str->rg_flags); | 703 | rg_flags = be32_to_cpu(str->rg_flags); |
| 704 | if (rg_flags & GFS2_RGF_NOALLOC) | ||
| 705 | rgd->rd_flags |= GFS2_RDF_NOALLOC; | ||
| 706 | else | ||
| 707 | rgd->rd_flags &= ~GFS2_RDF_NOALLOC; | ||
| 663 | rg->rg_free = be32_to_cpu(str->rg_free); | 708 | rg->rg_free = be32_to_cpu(str->rg_free); |
| 664 | rg->rg_dinodes = be32_to_cpu(str->rg_dinodes); | 709 | rg->rg_dinodes = be32_to_cpu(str->rg_dinodes); |
| 665 | rg->rg_igeneration = be64_to_cpu(str->rg_igeneration); | 710 | rg->rg_igeneration = be64_to_cpu(str->rg_igeneration); |
| 666 | } | 711 | } |
| 667 | 712 | ||
| 668 | static void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf) | 713 | static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) |
| 669 | { | 714 | { |
| 670 | struct gfs2_rgrp *str = buf; | 715 | struct gfs2_rgrp *str = buf; |
| 716 | struct gfs2_rgrp_host *rg = &rgd->rd_rg; | ||
| 717 | u32 rg_flags = 0; | ||
| 671 | 718 | ||
| 672 | str->rg_flags = cpu_to_be32(rg->rg_flags); | 719 | if (rgd->rd_flags & GFS2_RDF_NOALLOC) |
| 720 | rg_flags |= GFS2_RGF_NOALLOC; | ||
| 721 | str->rg_flags = cpu_to_be32(rg_flags); | ||
| 673 | str->rg_free = cpu_to_be32(rg->rg_free); | 722 | str->rg_free = cpu_to_be32(rg->rg_free); |
| 674 | str->rg_dinodes = cpu_to_be32(rg->rg_dinodes); | 723 | str->rg_dinodes = cpu_to_be32(rg->rg_dinodes); |
| 675 | str->__pad = cpu_to_be32(0); | 724 | str->__pad = cpu_to_be32(0); |
| @@ -726,9 +775,9 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) | |||
| 726 | } | 775 | } |
| 727 | } | 776 | } |
| 728 | 777 | ||
| 729 | if (rgd->rd_rg_vn != gl->gl_vn) { | 778 | if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { |
| 730 | gfs2_rgrp_in(&rgd->rd_rg, (rgd->rd_bits[0].bi_bh)->b_data); | 779 | gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); |
| 731 | rgd->rd_rg_vn = gl->gl_vn; | 780 | rgd->rd_flags |= GFS2_RDF_UPTODATE; |
| 732 | } | 781 | } |
| 733 | 782 | ||
| 734 | spin_lock(&sdp->sd_rindex_spin); | 783 | spin_lock(&sdp->sd_rindex_spin); |
| @@ -840,7 +889,7 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) | |||
| 840 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 889 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
| 841 | int ret = 0; | 890 | int ret = 0; |
| 842 | 891 | ||
| 843 | if (rgd->rd_rg.rg_flags & GFS2_RGF_NOALLOC) | 892 | if (rgd->rd_flags & GFS2_RDF_NOALLOC) |
| 844 | return 0; | 893 | return 0; |
| 845 | 894 | ||
| 846 | spin_lock(&sdp->sd_rindex_spin); | 895 | spin_lock(&sdp->sd_rindex_spin); |
| @@ -866,13 +915,15 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) | |||
| 866 | u32 goal = 0, block; | 915 | u32 goal = 0, block; |
| 867 | u64 no_addr; | 916 | u64 no_addr; |
| 868 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 917 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
| 918 | unsigned int n; | ||
| 869 | 919 | ||
| 870 | for(;;) { | 920 | for(;;) { |
| 871 | if (goal >= rgd->rd_data) | 921 | if (goal >= rgd->rd_data) |
| 872 | break; | 922 | break; |
| 873 | down_write(&sdp->sd_log_flush_lock); | 923 | down_write(&sdp->sd_log_flush_lock); |
| 924 | n = 1; | ||
| 874 | block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, | 925 | block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, |
| 875 | GFS2_BLKST_UNLINKED); | 926 | GFS2_BLKST_UNLINKED, &n); |
| 876 | up_write(&sdp->sd_log_flush_lock); | 927 | up_write(&sdp->sd_log_flush_lock); |
| 877 | if (block == BFITNOENT) | 928 | if (block == BFITNOENT) |
| 878 | break; | 929 | break; |
| @@ -904,24 +955,20 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) | |||
| 904 | static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp, | 955 | static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp, |
| 905 | u64 rglast) | 956 | u64 rglast) |
| 906 | { | 957 | { |
| 907 | struct gfs2_rgrpd *rgd = NULL; | 958 | struct gfs2_rgrpd *rgd; |
| 908 | 959 | ||
| 909 | spin_lock(&sdp->sd_rindex_spin); | 960 | spin_lock(&sdp->sd_rindex_spin); |
| 910 | 961 | ||
| 911 | if (list_empty(&sdp->sd_rindex_recent_list)) | 962 | if (rglast) { |
| 912 | goto out; | 963 | list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { |
| 913 | 964 | if (rgrp_contains_block(rgd, rglast)) | |
| 914 | if (!rglast) | 965 | goto out; |
| 915 | goto first; | 966 | } |
| 916 | |||
| 917 | list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { | ||
| 918 | if (rgd->rd_addr == rglast) | ||
| 919 | goto out; | ||
| 920 | } | 967 | } |
| 921 | 968 | rgd = NULL; | |
| 922 | first: | 969 | if (!list_empty(&sdp->sd_rindex_recent_list)) |
| 923 | rgd = list_entry(sdp->sd_rindex_recent_list.next, struct gfs2_rgrpd, | 970 | rgd = list_entry(sdp->sd_rindex_recent_list.next, |
| 924 | rd_recent); | 971 | struct gfs2_rgrpd, rd_recent); |
| 925 | out: | 972 | out: |
| 926 | spin_unlock(&sdp->sd_rindex_spin); | 973 | spin_unlock(&sdp->sd_rindex_spin); |
| 927 | return rgd; | 974 | return rgd; |
| @@ -1067,7 +1114,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
| 1067 | 1114 | ||
| 1068 | /* Try recently successful rgrps */ | 1115 | /* Try recently successful rgrps */ |
| 1069 | 1116 | ||
| 1070 | rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc); | 1117 | rgd = recent_rgrp_first(sdp, ip->i_goal); |
| 1071 | 1118 | ||
| 1072 | while (rgd) { | 1119 | while (rgd) { |
| 1073 | rg_locked = 0; | 1120 | rg_locked = 0; |
| @@ -1151,8 +1198,6 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) | |||
| 1151 | } | 1198 | } |
| 1152 | 1199 | ||
| 1153 | out: | 1200 | out: |
| 1154 | ip->i_last_rg_alloc = rgd->rd_addr; | ||
| 1155 | |||
| 1156 | if (begin) { | 1201 | if (begin) { |
| 1157 | recent_rgrp_add(rgd); | 1202 | recent_rgrp_add(rgd); |
| 1158 | rgd = gfs2_rgrpd_get_next(rgd); | 1203 | rgd = gfs2_rgrpd_get_next(rgd); |
| @@ -1275,6 +1320,7 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) | |||
| 1275 | * @goal: the goal block within the RG (start here to search for avail block) | 1320 | * @goal: the goal block within the RG (start here to search for avail block) |
| 1276 | * @old_state: GFS2_BLKST_XXX the before-allocation state to find | 1321 | * @old_state: GFS2_BLKST_XXX the before-allocation state to find |
| 1277 | * @new_state: GFS2_BLKST_XXX the after-allocation block state | 1322 | * @new_state: GFS2_BLKST_XXX the after-allocation block state |
| 1323 | * @n: The extent length | ||
| 1278 | * | 1324 | * |
| 1279 | * Walk rgrp's bitmap to find bits that represent a block in @old_state. | 1325 | * Walk rgrp's bitmap to find bits that represent a block in @old_state. |
| 1280 | * Add the found bitmap buffer to the transaction. | 1326 | * Add the found bitmap buffer to the transaction. |
| @@ -1290,13 +1336,17 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) | |||
| 1290 | */ | 1336 | */ |
| 1291 | 1337 | ||
| 1292 | static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | 1338 | static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, |
| 1293 | unsigned char old_state, unsigned char new_state) | 1339 | unsigned char old_state, unsigned char new_state, |
| 1340 | unsigned int *n) | ||
| 1294 | { | 1341 | { |
| 1295 | struct gfs2_bitmap *bi = NULL; | 1342 | struct gfs2_bitmap *bi = NULL; |
| 1296 | u32 length = rgd->rd_length; | 1343 | const u32 length = rgd->rd_length; |
| 1297 | u32 blk = 0; | 1344 | u32 blk = 0; |
| 1298 | unsigned int buf, x; | 1345 | unsigned int buf, x; |
| 1346 | const unsigned int elen = *n; | ||
| 1347 | const u8 *buffer; | ||
| 1299 | 1348 | ||
| 1349 | *n = 0; | ||
| 1300 | /* Find bitmap block that contains bits for goal block */ | 1350 | /* Find bitmap block that contains bits for goal block */ |
| 1301 | for (buf = 0; buf < length; buf++) { | 1351 | for (buf = 0; buf < length; buf++) { |
| 1302 | bi = rgd->rd_bits + buf; | 1352 | bi = rgd->rd_bits + buf; |
| @@ -1317,12 +1367,11 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | |||
| 1317 | for (x = 0; x <= length; x++) { | 1367 | for (x = 0; x <= length; x++) { |
| 1318 | /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone | 1368 | /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone |
| 1319 | bitmaps, so we must search the originals for that. */ | 1369 | bitmaps, so we must search the originals for that. */ |
| 1370 | buffer = bi->bi_bh->b_data + bi->bi_offset; | ||
| 1320 | if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) | 1371 | if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) |
| 1321 | blk = gfs2_bitfit(bi->bi_clone + bi->bi_offset, | 1372 | buffer = bi->bi_clone + bi->bi_offset; |
| 1322 | bi->bi_len, goal, old_state); | 1373 | |
| 1323 | else | 1374 | blk = gfs2_bitfit(buffer, bi->bi_len, goal, old_state); |
| 1324 | blk = gfs2_bitfit(bi->bi_bh->b_data + bi->bi_offset, | ||
| 1325 | bi->bi_len, goal, old_state); | ||
| 1326 | if (blk != BFITNOENT) | 1375 | if (blk != BFITNOENT) |
| 1327 | break; | 1376 | break; |
| 1328 | 1377 | ||
| @@ -1333,12 +1382,23 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, | |||
| 1333 | } | 1382 | } |
| 1334 | 1383 | ||
| 1335 | if (blk != BFITNOENT && old_state != new_state) { | 1384 | if (blk != BFITNOENT && old_state != new_state) { |
| 1385 | *n = 1; | ||
| 1336 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); | 1386 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); |
| 1337 | gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, | 1387 | gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, |
| 1338 | bi->bi_len, blk, new_state); | 1388 | bi->bi_len, blk, new_state); |
| 1339 | if (bi->bi_clone) | 1389 | goal = blk; |
| 1340 | gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset, | 1390 | while (*n < elen) { |
| 1341 | bi->bi_len, blk, new_state); | 1391 | goal++; |
| 1392 | if (goal >= (bi->bi_len * GFS2_NBBY)) | ||
| 1393 | break; | ||
| 1394 | if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != | ||
| 1395 | GFS2_BLKST_FREE) | ||
| 1396 | break; | ||
| 1397 | gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, | ||
| 1398 | bi->bi_offset, bi->bi_len, goal, | ||
| 1399 | new_state); | ||
| 1400 | (*n)++; | ||
| 1401 | } | ||
| 1342 | } | 1402 | } |
| 1343 | 1403 | ||
| 1344 | return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk; | 1404 | return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk; |
| @@ -1393,7 +1453,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
| 1393 | bi->bi_len); | 1453 | bi->bi_len); |
| 1394 | } | 1454 | } |
| 1395 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); | 1455 | gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); |
| 1396 | gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, | 1456 | gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset, |
| 1397 | bi->bi_len, buf_blk, new_state); | 1457 | bi->bi_len, buf_blk, new_state); |
| 1398 | } | 1458 | } |
| 1399 | 1459 | ||
| @@ -1401,13 +1461,13 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
| 1401 | } | 1461 | } |
| 1402 | 1462 | ||
| 1403 | /** | 1463 | /** |
| 1404 | * gfs2_alloc_data - Allocate a data block | 1464 | * gfs2_alloc_block - Allocate a block |
| 1405 | * @ip: the inode to allocate the data block for | 1465 | * @ip: the inode to allocate the block for |
| 1406 | * | 1466 | * |
| 1407 | * Returns: the allocated block | 1467 | * Returns: the allocated block |
| 1408 | */ | 1468 | */ |
| 1409 | 1469 | ||
| 1410 | u64 gfs2_alloc_data(struct gfs2_inode *ip) | 1470 | u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) |
| 1411 | { | 1471 | { |
| 1412 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1472 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
| 1413 | struct gfs2_alloc *al = ip->i_alloc; | 1473 | struct gfs2_alloc *al = ip->i_alloc; |
| @@ -1415,77 +1475,31 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip) | |||
| 1415 | u32 goal, blk; | 1475 | u32 goal, blk; |
| 1416 | u64 block; | 1476 | u64 block; |
| 1417 | 1477 | ||
| 1418 | if (rgrp_contains_block(rgd, ip->i_di.di_goal_data)) | 1478 | if (rgrp_contains_block(rgd, ip->i_goal)) |
| 1419 | goal = ip->i_di.di_goal_data - rgd->rd_data0; | 1479 | goal = ip->i_goal - rgd->rd_data0; |
| 1420 | else | 1480 | else |
| 1421 | goal = rgd->rd_last_alloc_data; | 1481 | goal = rgd->rd_last_alloc; |
| 1422 | 1482 | ||
| 1423 | blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED); | 1483 | blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n); |
| 1424 | BUG_ON(blk == BFITNOENT); | 1484 | BUG_ON(blk == BFITNOENT); |
| 1425 | rgd->rd_last_alloc_data = blk; | ||
| 1426 | 1485 | ||
| 1486 | rgd->rd_last_alloc = blk; | ||
| 1427 | block = rgd->rd_data0 + blk; | 1487 | block = rgd->rd_data0 + blk; |
| 1428 | ip->i_di.di_goal_data = block; | 1488 | ip->i_goal = block; |
| 1429 | 1489 | ||
| 1430 | gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); | 1490 | gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free >= *n); |
| 1431 | rgd->rd_rg.rg_free--; | 1491 | rgd->rd_rg.rg_free -= *n; |
| 1432 | 1492 | ||
| 1433 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1493 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
| 1434 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | 1494 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
| 1435 | 1495 | ||
| 1436 | al->al_alloced++; | 1496 | al->al_alloced += *n; |
| 1437 | 1497 | ||
| 1438 | gfs2_statfs_change(sdp, 0, -1, 0); | 1498 | gfs2_statfs_change(sdp, 0, -*n, 0); |
| 1439 | gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid); | 1499 | gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid); |
| 1440 | 1500 | ||
| 1441 | spin_lock(&sdp->sd_rindex_spin); | 1501 | spin_lock(&sdp->sd_rindex_spin); |
| 1442 | rgd->rd_free_clone--; | 1502 | rgd->rd_free_clone -= *n; |
| 1443 | spin_unlock(&sdp->sd_rindex_spin); | ||
| 1444 | |||
| 1445 | return block; | ||
| 1446 | } | ||
| 1447 | |||
| 1448 | /** | ||
| 1449 | * gfs2_alloc_meta - Allocate a metadata block | ||
| 1450 | * @ip: the inode to allocate the metadata block for | ||
| 1451 | * | ||
| 1452 | * Returns: the allocated block | ||
| 1453 | */ | ||
| 1454 | |||
| 1455 | u64 gfs2_alloc_meta(struct gfs2_inode *ip) | ||
| 1456 | { | ||
| 1457 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
| 1458 | struct gfs2_alloc *al = ip->i_alloc; | ||
| 1459 | struct gfs2_rgrpd *rgd = al->al_rgd; | ||
| 1460 | u32 goal, blk; | ||
| 1461 | u64 block; | ||
| 1462 | |||
| 1463 | if (rgrp_contains_block(rgd, ip->i_di.di_goal_meta)) | ||
| 1464 | goal = ip->i_di.di_goal_meta - rgd->rd_data0; | ||
| 1465 | else | ||
| 1466 | goal = rgd->rd_last_alloc_meta; | ||
| 1467 | |||
| 1468 | blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED); | ||
| 1469 | BUG_ON(blk == BFITNOENT); | ||
| 1470 | rgd->rd_last_alloc_meta = blk; | ||
| 1471 | |||
| 1472 | block = rgd->rd_data0 + blk; | ||
| 1473 | ip->i_di.di_goal_meta = block; | ||
| 1474 | |||
| 1475 | gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); | ||
| 1476 | rgd->rd_rg.rg_free--; | ||
| 1477 | |||
| 1478 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | ||
| 1479 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | ||
| 1480 | |||
| 1481 | al->al_alloced++; | ||
| 1482 | |||
| 1483 | gfs2_statfs_change(sdp, 0, -1, 0); | ||
| 1484 | gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid); | ||
| 1485 | gfs2_trans_add_unrevoke(sdp, block); | ||
| 1486 | |||
| 1487 | spin_lock(&sdp->sd_rindex_spin); | ||
| 1488 | rgd->rd_free_clone--; | ||
| 1489 | spin_unlock(&sdp->sd_rindex_spin); | 1503 | spin_unlock(&sdp->sd_rindex_spin); |
| 1490 | 1504 | ||
| 1491 | return block; | 1505 | return block; |
| @@ -1505,12 +1519,13 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) | |||
| 1505 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1519 | struct gfs2_rgrpd *rgd = al->al_rgd; |
| 1506 | u32 blk; | 1520 | u32 blk; |
| 1507 | u64 block; | 1521 | u64 block; |
| 1522 | unsigned int n = 1; | ||
| 1508 | 1523 | ||
| 1509 | blk = rgblk_search(rgd, rgd->rd_last_alloc_meta, | 1524 | blk = rgblk_search(rgd, rgd->rd_last_alloc, |
| 1510 | GFS2_BLKST_FREE, GFS2_BLKST_DINODE); | 1525 | GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n); |
| 1511 | BUG_ON(blk == BFITNOENT); | 1526 | BUG_ON(blk == BFITNOENT); |
| 1512 | 1527 | ||
| 1513 | rgd->rd_last_alloc_meta = blk; | 1528 | rgd->rd_last_alloc = blk; |
| 1514 | 1529 | ||
| 1515 | block = rgd->rd_data0 + blk; | 1530 | block = rgd->rd_data0 + blk; |
| 1516 | 1531 | ||
| @@ -1519,12 +1534,12 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) | |||
| 1519 | rgd->rd_rg.rg_dinodes++; | 1534 | rgd->rd_rg.rg_dinodes++; |
| 1520 | *generation = rgd->rd_rg.rg_igeneration++; | 1535 | *generation = rgd->rd_rg.rg_igeneration++; |
| 1521 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1536 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
| 1522 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | 1537 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
| 1523 | 1538 | ||
| 1524 | al->al_alloced++; | 1539 | al->al_alloced++; |
| 1525 | 1540 | ||
| 1526 | gfs2_statfs_change(sdp, 0, -1, +1); | 1541 | gfs2_statfs_change(sdp, 0, -1, +1); |
| 1527 | gfs2_trans_add_unrevoke(sdp, block); | 1542 | gfs2_trans_add_unrevoke(sdp, block, 1); |
| 1528 | 1543 | ||
| 1529 | spin_lock(&sdp->sd_rindex_spin); | 1544 | spin_lock(&sdp->sd_rindex_spin); |
| 1530 | rgd->rd_free_clone--; | 1545 | rgd->rd_free_clone--; |
| @@ -1553,7 +1568,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) | |||
| 1553 | rgd->rd_rg.rg_free += blen; | 1568 | rgd->rd_rg.rg_free += blen; |
| 1554 | 1569 | ||
| 1555 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1570 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
| 1556 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | 1571 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
| 1557 | 1572 | ||
| 1558 | gfs2_trans_add_rg(rgd); | 1573 | gfs2_trans_add_rg(rgd); |
| 1559 | 1574 | ||
| @@ -1581,7 +1596,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) | |||
| 1581 | rgd->rd_rg.rg_free += blen; | 1596 | rgd->rd_rg.rg_free += blen; |
| 1582 | 1597 | ||
| 1583 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1598 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
| 1584 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | 1599 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
| 1585 | 1600 | ||
| 1586 | gfs2_trans_add_rg(rgd); | 1601 | gfs2_trans_add_rg(rgd); |
| 1587 | 1602 | ||
| @@ -1601,7 +1616,7 @@ void gfs2_unlink_di(struct inode *inode) | |||
| 1601 | if (!rgd) | 1616 | if (!rgd) |
| 1602 | return; | 1617 | return; |
| 1603 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1618 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
| 1604 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | 1619 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
| 1605 | gfs2_trans_add_rg(rgd); | 1620 | gfs2_trans_add_rg(rgd); |
| 1606 | } | 1621 | } |
| 1607 | 1622 | ||
| @@ -1621,7 +1636,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) | |||
| 1621 | rgd->rd_rg.rg_free++; | 1636 | rgd->rd_rg.rg_free++; |
| 1622 | 1637 | ||
| 1623 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); | 1638 | gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); |
| 1624 | gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); | 1639 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
| 1625 | 1640 | ||
| 1626 | gfs2_statfs_change(sdp, 0, +1, -1); | 1641 | gfs2_statfs_change(sdp, 0, +1, -1); |
| 1627 | gfs2_trans_add_rg(rgd); | 1642 | gfs2_trans_add_rg(rgd); |
| @@ -1699,8 +1714,7 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, | |||
| 1699 | * | 1714 | * |
| 1700 | */ | 1715 | */ |
| 1701 | 1716 | ||
| 1702 | void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, | 1717 | void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) |
| 1703 | int flags) | ||
| 1704 | { | 1718 | { |
| 1705 | unsigned int x; | 1719 | unsigned int x; |
| 1706 | 1720 | ||
| @@ -1708,7 +1722,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, | |||
| 1708 | GFP_NOFS | __GFP_NOFAIL); | 1722 | GFP_NOFS | __GFP_NOFAIL); |
| 1709 | for (x = 0; x < rlist->rl_rgrps; x++) | 1723 | for (x = 0; x < rlist->rl_rgrps; x++) |
| 1710 | gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, | 1724 | gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, |
| 1711 | state, flags, | 1725 | state, 0, |
| 1712 | &rlist->rl_ghs[x]); | 1726 | &rlist->rl_ghs[x]); |
| 1713 | } | 1727 | } |
| 1714 | 1728 | ||
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 149bb161f4b6..3181c7e624bf 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 2 | * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 3 | * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. | 3 | * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
| 4 | * | 4 | * |
| 5 | * This copyrighted material is made available to anyone wishing to use, | 5 | * This copyrighted material is made available to anyone wishing to use, |
| 6 | * modify, copy, or redistribute it subject to the terms and conditions | 6 | * modify, copy, or redistribute it subject to the terms and conditions |
| @@ -46,8 +46,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip); | |||
| 46 | 46 | ||
| 47 | unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); | 47 | unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); |
| 48 | 48 | ||
| 49 | u64 gfs2_alloc_data(struct gfs2_inode *ip); | 49 | u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n); |
| 50 | u64 gfs2_alloc_meta(struct gfs2_inode *ip); | ||
| 51 | u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); | 50 | u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); |
| 52 | 51 | ||
| 53 | void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); | 52 | void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); |
| @@ -64,8 +63,7 @@ struct gfs2_rgrp_list { | |||
| 64 | 63 | ||
| 65 | void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, | 64 | void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, |
| 66 | u64 block); | 65 | u64 block); |
| 67 | void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, | 66 | void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); |
| 68 | int flags); | ||
| 69 | void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); | 67 | void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); |
| 70 | u64 gfs2_ri_total(struct gfs2_sbd *sdp); | 68 | u64 gfs2_ri_total(struct gfs2_sbd *sdp); |
| 71 | 69 | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ef0562c3bc71..7aeacbc65f35 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
| @@ -210,7 +210,7 @@ int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | |||
| 210 | struct page *page; | 210 | struct page *page; |
| 211 | struct bio *bio; | 211 | struct bio *bio; |
| 212 | 212 | ||
| 213 | page = alloc_page(GFP_KERNEL); | 213 | page = alloc_page(GFP_NOFS); |
| 214 | if (unlikely(!page)) | 214 | if (unlikely(!page)) |
| 215 | return -ENOBUFS; | 215 | return -ENOBUFS; |
| 216 | 216 | ||
| @@ -218,7 +218,7 @@ int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | |||
| 218 | ClearPageDirty(page); | 218 | ClearPageDirty(page); |
| 219 | lock_page(page); | 219 | lock_page(page); |
| 220 | 220 | ||
| 221 | bio = bio_alloc(GFP_KERNEL, 1); | 221 | bio = bio_alloc(GFP_NOFS, 1); |
| 222 | if (unlikely(!bio)) { | 222 | if (unlikely(!bio)) { |
| 223 | __free_page(page); | 223 | __free_page(page); |
| 224 | return -ENOBUFS; | 224 | return -ENOBUFS; |
| @@ -316,6 +316,7 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | |||
| 316 | sdp->sd_heightsize[x] = space; | 316 | sdp->sd_heightsize[x] = space; |
| 317 | } | 317 | } |
| 318 | sdp->sd_max_height = x; | 318 | sdp->sd_max_height = x; |
| 319 | sdp->sd_heightsize[x] = ~0; | ||
| 319 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); | 320 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); |
| 320 | 321 | ||
| 321 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - | 322 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - |
| @@ -334,6 +335,7 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | |||
| 334 | sdp->sd_jheightsize[x] = space; | 335 | sdp->sd_jheightsize[x] = space; |
| 335 | } | 336 | } |
| 336 | sdp->sd_max_jheight = x; | 337 | sdp->sd_max_jheight = x; |
| 338 | sdp->sd_jheightsize[x] = ~0; | ||
| 337 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); | 339 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); |
| 338 | 340 | ||
| 339 | return 0; | 341 | return 0; |
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 60a870e430be..44361ecc44f7 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h | |||
| @@ -17,6 +17,7 @@ void gfs2_tune_init(struct gfs2_tune *gt); | |||
| 17 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); | 17 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); |
| 18 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); | 18 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); |
| 19 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector); | 19 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector); |
| 20 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); | ||
| 20 | 21 | ||
| 21 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) | 22 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) |
| 22 | { | 23 | { |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index eaa3b7b2f99e..9ab9fc85ecd0 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | 20 | ||
| 21 | #include "gfs2.h" | 21 | #include "gfs2.h" |
| 22 | #include "incore.h" | 22 | #include "incore.h" |
| 23 | #include "lm.h" | ||
| 24 | #include "sys.h" | 23 | #include "sys.h" |
| 25 | #include "super.h" | 24 | #include "super.h" |
| 26 | #include "glock.h" | 25 | #include "glock.h" |
| @@ -328,15 +327,9 @@ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ | |||
| 328 | } \ | 327 | } \ |
| 329 | static struct counters_attr counters_attr_##name = __ATTR_RO(name) | 328 | static struct counters_attr counters_attr_##name = __ATTR_RO(name) |
| 330 | 329 | ||
| 331 | COUNTERS_ATTR(glock_count, "%u\n"); | ||
| 332 | COUNTERS_ATTR(glock_held_count, "%u\n"); | ||
| 333 | COUNTERS_ATTR(inode_count, "%u\n"); | ||
| 334 | COUNTERS_ATTR(reclaimed, "%u\n"); | 330 | COUNTERS_ATTR(reclaimed, "%u\n"); |
| 335 | 331 | ||
| 336 | static struct attribute *counters_attrs[] = { | 332 | static struct attribute *counters_attrs[] = { |
| 337 | &counters_attr_glock_count.attr, | ||
| 338 | &counters_attr_glock_held_count.attr, | ||
| 339 | &counters_attr_inode_count.attr, | ||
| 340 | &counters_attr_reclaimed.attr, | 333 | &counters_attr_reclaimed.attr, |
| 341 | NULL, | 334 | NULL, |
| 342 | }; | 335 | }; |
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 73e5d92a657c..f677b8a83f0c 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c | |||
| @@ -146,30 +146,25 @@ void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) | |||
| 146 | lops_add(sdp, &bd->bd_le); | 146 | lops_add(sdp, &bd->bd_le); |
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno) | 149 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) |
| 150 | { | 150 | { |
| 151 | struct gfs2_bufdata *bd; | 151 | struct gfs2_bufdata *bd, *tmp; |
| 152 | int found = 0; | 152 | struct gfs2_trans *tr = current->journal_info; |
| 153 | unsigned int n = len; | ||
| 153 | 154 | ||
| 154 | gfs2_log_lock(sdp); | 155 | gfs2_log_lock(sdp); |
| 155 | 156 | list_for_each_entry_safe(bd, tmp, &sdp->sd_log_le_revoke, bd_le.le_list) { | |
| 156 | list_for_each_entry(bd, &sdp->sd_log_le_revoke, bd_le.le_list) { | 157 | if ((bd->bd_blkno >= blkno) && (bd->bd_blkno < (blkno + len))) { |
| 157 | if (bd->bd_blkno == blkno) { | ||
| 158 | list_del_init(&bd->bd_le.le_list); | 158 | list_del_init(&bd->bd_le.le_list); |
| 159 | gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); | 159 | gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); |
| 160 | sdp->sd_log_num_revoke--; | 160 | sdp->sd_log_num_revoke--; |
| 161 | found = 1; | 161 | kmem_cache_free(gfs2_bufdata_cachep, bd); |
| 162 | break; | 162 | tr->tr_num_revoke_rm++; |
| 163 | if (--n == 0) | ||
| 164 | break; | ||
| 163 | } | 165 | } |
| 164 | } | 166 | } |
| 165 | |||
| 166 | gfs2_log_unlock(sdp); | 167 | gfs2_log_unlock(sdp); |
| 167 | |||
| 168 | if (found) { | ||
| 169 | struct gfs2_trans *tr = current->journal_info; | ||
| 170 | kmem_cache_free(gfs2_bufdata_cachep, bd); | ||
| 171 | tr->tr_num_revoke_rm++; | ||
| 172 | } | ||
| 173 | } | 168 | } |
| 174 | 169 | ||
| 175 | void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd) | 170 | void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd) |
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index e826f0dab80a..edf9d4bd908e 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h | |||
| @@ -32,7 +32,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp); | |||
| 32 | 32 | ||
| 33 | void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); | 33 | void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); |
| 34 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); | 34 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); |
| 35 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno); | 35 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); |
| 36 | void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); | 36 | void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); |
| 37 | 37 | ||
| 38 | #endif /* __TRANS_DOT_H__ */ | 38 | #endif /* __TRANS_DOT_H__ */ |
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 424a0774eda8..d31e355c61fb 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c | |||
| @@ -19,12 +19,12 @@ | |||
| 19 | #include "gfs2.h" | 19 | #include "gfs2.h" |
| 20 | #include "incore.h" | 20 | #include "incore.h" |
| 21 | #include "glock.h" | 21 | #include "glock.h" |
| 22 | #include "lm.h" | ||
| 23 | #include "util.h" | 22 | #include "util.h" |
| 24 | 23 | ||
| 25 | struct kmem_cache *gfs2_glock_cachep __read_mostly; | 24 | struct kmem_cache *gfs2_glock_cachep __read_mostly; |
| 26 | struct kmem_cache *gfs2_inode_cachep __read_mostly; | 25 | struct kmem_cache *gfs2_inode_cachep __read_mostly; |
| 27 | struct kmem_cache *gfs2_bufdata_cachep __read_mostly; | 26 | struct kmem_cache *gfs2_bufdata_cachep __read_mostly; |
| 27 | struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; | ||
| 28 | 28 | ||
| 29 | void gfs2_assert_i(struct gfs2_sbd *sdp) | 29 | void gfs2_assert_i(struct gfs2_sbd *sdp) |
| 30 | { | 30 | { |
| @@ -32,6 +32,28 @@ void gfs2_assert_i(struct gfs2_sbd *sdp) | |||
| 32 | sdp->sd_fsname); | 32 | sdp->sd_fsname); |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | ||
| 36 | { | ||
| 37 | va_list args; | ||
| 38 | |||
| 39 | if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
| 40 | return 0; | ||
| 41 | |||
| 42 | va_start(args, fmt); | ||
| 43 | vprintk(fmt, args); | ||
| 44 | va_end(args); | ||
| 45 | |||
| 46 | fs_err(sdp, "about to withdraw this file system\n"); | ||
| 47 | BUG_ON(sdp->sd_args.ar_debug); | ||
| 48 | |||
| 49 | fs_err(sdp, "telling LM to withdraw\n"); | ||
| 50 | gfs2_withdraw_lockproto(&sdp->sd_lockstruct); | ||
| 51 | fs_err(sdp, "withdrawn\n"); | ||
| 52 | dump_stack(); | ||
| 53 | |||
| 54 | return -1; | ||
| 55 | } | ||
| 56 | |||
| 35 | /** | 57 | /** |
| 36 | * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false | 58 | * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false |
| 37 | * Returns: -1 if this call withdrew the machine, | 59 | * Returns: -1 if this call withdrew the machine, |
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 28938a46cf47..509c5d60bd80 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h | |||
| @@ -147,6 +147,7 @@ gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__); | |||
| 147 | extern struct kmem_cache *gfs2_glock_cachep; | 147 | extern struct kmem_cache *gfs2_glock_cachep; |
| 148 | extern struct kmem_cache *gfs2_inode_cachep; | 148 | extern struct kmem_cache *gfs2_inode_cachep; |
| 149 | extern struct kmem_cache *gfs2_bufdata_cachep; | 149 | extern struct kmem_cache *gfs2_bufdata_cachep; |
| 150 | extern struct kmem_cache *gfs2_rgrpd_cachep; | ||
| 150 | 151 | ||
| 151 | static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, | 152 | static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, |
| 152 | unsigned int *p) | 153 | unsigned int *p) |
| @@ -163,6 +164,7 @@ gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) | |||
| 163 | 164 | ||
| 164 | void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, | 165 | void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, |
| 165 | unsigned int bit, int new_value); | 166 | unsigned int bit, int new_value); |
| 167 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...); | ||
| 166 | 168 | ||
| 167 | #endif /* __UTIL_DOT_H__ */ | 169 | #endif /* __UTIL_DOT_H__ */ |
| 168 | 170 | ||
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index f9c5dd6f4b64..dcc2734e0b5d 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c | |||
| @@ -129,7 +129,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, | |||
| 129 | struct inode *inode = mapping->host; | 129 | struct inode *inode = mapping->host; |
| 130 | struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); | 130 | struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); |
| 131 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 131 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
| 132 | uint32_t pageofs = pos & (PAGE_CACHE_SIZE - 1); | 132 | uint32_t pageofs = index << PAGE_CACHE_SHIFT; |
| 133 | int ret = 0; | 133 | int ret = 0; |
| 134 | 134 | ||
| 135 | pg = __grab_cache_page(mapping, index); | 135 | pg = __grab_cache_page(mapping, index); |
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index e1985066b1c6..2bc7d8aa5740 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c | |||
| @@ -2172,7 +2172,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 2172 | } | 2172 | } |
| 2173 | 2173 | ||
| 2174 | /* update the free count for this dmap */ | 2174 | /* update the free count for this dmap */ |
| 2175 | dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); | 2175 | le32_add_cpu(&dp->nfree, -nblocks); |
| 2176 | 2176 | ||
| 2177 | BMAP_LOCK(bmp); | 2177 | BMAP_LOCK(bmp); |
| 2178 | 2178 | ||
| @@ -2316,7 +2316,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 2316 | 2316 | ||
| 2317 | /* update the free count for this dmap. | 2317 | /* update the free count for this dmap. |
| 2318 | */ | 2318 | */ |
| 2319 | dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks); | 2319 | le32_add_cpu(&dp->nfree, nblocks); |
| 2320 | 2320 | ||
| 2321 | BMAP_LOCK(bmp); | 2321 | BMAP_LOCK(bmp); |
| 2322 | 2322 | ||
| @@ -3226,7 +3226,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, | |||
| 3226 | } | 3226 | } |
| 3227 | 3227 | ||
| 3228 | /* update the free count for this dmap */ | 3228 | /* update the free count for this dmap */ |
| 3229 | dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); | 3229 | le32_add_cpu(&dp->nfree, -nblocks); |
| 3230 | 3230 | ||
| 3231 | /* reconstruct summary tree */ | 3231 | /* reconstruct summary tree */ |
| 3232 | dbInitDmapTree(dp); | 3232 | dbInitDmapTree(dp); |
| @@ -3660,9 +3660,8 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) | |||
| 3660 | goto initTree; | 3660 | goto initTree; |
| 3661 | } | 3661 | } |
| 3662 | } else { | 3662 | } else { |
| 3663 | dp->nblocks = | 3663 | le32_add_cpu(&dp->nblocks, nblocks); |
| 3664 | cpu_to_le32(le32_to_cpu(dp->nblocks) + nblocks); | 3664 | le32_add_cpu(&dp->nfree, nblocks); |
| 3665 | dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks); | ||
| 3666 | } | 3665 | } |
| 3667 | 3666 | ||
| 3668 | /* word number containing start block number */ | 3667 | /* word number containing start block number */ |
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 11e6d471b364..1a6eb41569bc 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h | |||
| @@ -61,7 +61,7 @@ | |||
| 61 | * determine the maximum free string for four (lower level) nodes | 61 | * determine the maximum free string for four (lower level) nodes |
| 62 | * of the tree. | 62 | * of the tree. |
| 63 | */ | 63 | */ |
| 64 | static __inline signed char TREEMAX(signed char *cp) | 64 | static inline signed char TREEMAX(signed char *cp) |
| 65 | { | 65 | { |
| 66 | signed char tmp1, tmp2; | 66 | signed char tmp1, tmp2; |
| 67 | 67 | ||
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 9bf29f771737..734ec916beaf 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
| @@ -1019,8 +1019,7 @@ int diFree(struct inode *ip) | |||
| 1019 | /* update the free inode counts at the iag, ag and | 1019 | /* update the free inode counts at the iag, ag and |
| 1020 | * map level. | 1020 | * map level. |
| 1021 | */ | 1021 | */ |
| 1022 | iagp->nfreeinos = | 1022 | le32_add_cpu(&iagp->nfreeinos, 1); |
| 1023 | cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 1); | ||
| 1024 | imap->im_agctl[agno].numfree += 1; | 1023 | imap->im_agctl[agno].numfree += 1; |
| 1025 | atomic_inc(&imap->im_numfree); | 1024 | atomic_inc(&imap->im_numfree); |
| 1026 | 1025 | ||
| @@ -1219,9 +1218,8 @@ int diFree(struct inode *ip) | |||
| 1219 | /* update the number of free inodes and number of free extents | 1218 | /* update the number of free inodes and number of free extents |
| 1220 | * for the iag. | 1219 | * for the iag. |
| 1221 | */ | 1220 | */ |
| 1222 | iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - | 1221 | le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1)); |
| 1223 | (INOSPEREXT - 1)); | 1222 | le32_add_cpu(&iagp->nfreeexts, 1); |
| 1224 | iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) + 1); | ||
| 1225 | 1223 | ||
| 1226 | /* update the number of free inodes and backed inodes | 1224 | /* update the number of free inodes and backed inodes |
| 1227 | * at the ag and inode map level. | 1225 | * at the ag and inode map level. |
| @@ -2124,7 +2122,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) | |||
| 2124 | /* update the free inode count at the iag, ag, inode | 2122 | /* update the free inode count at the iag, ag, inode |
| 2125 | * map levels. | 2123 | * map levels. |
| 2126 | */ | 2124 | */ |
| 2127 | iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1); | 2125 | le32_add_cpu(&iagp->nfreeinos, -1); |
| 2128 | imap->im_agctl[agno].numfree -= 1; | 2126 | imap->im_agctl[agno].numfree -= 1; |
| 2129 | atomic_dec(&imap->im_numfree); | 2127 | atomic_dec(&imap->im_numfree); |
| 2130 | 2128 | ||
| @@ -2378,9 +2376,8 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) | |||
| 2378 | /* update the free inode and free extent counts for the | 2376 | /* update the free inode and free extent counts for the |
| 2379 | * iag. | 2377 | * iag. |
| 2380 | */ | 2378 | */ |
| 2381 | iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + | 2379 | le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1)); |
| 2382 | (INOSPEREXT - 1)); | 2380 | le32_add_cpu(&iagp->nfreeexts, -1); |
| 2383 | iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) - 1); | ||
| 2384 | 2381 | ||
| 2385 | /* update the free and backed inode counts for the ag. | 2382 | /* update the free and backed inode counts for the ag. |
| 2386 | */ | 2383 | */ |
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index a000aaa75136..5a61ebf2cbcc 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c | |||
| @@ -905,8 +905,7 @@ int xtInsert(tid_t tid, /* transaction id */ | |||
| 905 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); | 905 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); |
| 906 | 906 | ||
| 907 | /* advance next available entry index */ | 907 | /* advance next available entry index */ |
| 908 | p->header.nextindex = | 908 | le16_add_cpu(&p->header.nextindex, 1); |
| 909 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | ||
| 910 | 909 | ||
| 911 | /* Don't log it if there are no links to the file */ | 910 | /* Don't log it if there are no links to the file */ |
| 912 | if (!test_cflag(COMMIT_Nolink, ip)) { | 911 | if (!test_cflag(COMMIT_Nolink, ip)) { |
| @@ -997,8 +996,7 @@ xtSplitUp(tid_t tid, | |||
| 997 | split->addr); | 996 | split->addr); |
| 998 | 997 | ||
| 999 | /* advance next available entry index */ | 998 | /* advance next available entry index */ |
| 1000 | sp->header.nextindex = | 999 | le16_add_cpu(&sp->header.nextindex, 1); |
| 1001 | cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1); | ||
| 1002 | 1000 | ||
| 1003 | /* Don't log it if there are no links to the file */ | 1001 | /* Don't log it if there are no links to the file */ |
| 1004 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1002 | if (!test_cflag(COMMIT_Nolink, ip)) { |
| @@ -1167,9 +1165,7 @@ xtSplitUp(tid_t tid, | |||
| 1167 | JFS_SBI(ip->i_sb)->nbperpage, rcbn); | 1165 | JFS_SBI(ip->i_sb)->nbperpage, rcbn); |
| 1168 | 1166 | ||
| 1169 | /* advance next available entry index. */ | 1167 | /* advance next available entry index. */ |
| 1170 | sp->header.nextindex = | 1168 | le16_add_cpu(&sp->header.nextindex, 1); |
| 1171 | cpu_to_le16(le16_to_cpu(sp->header.nextindex) + | ||
| 1172 | 1); | ||
| 1173 | 1169 | ||
| 1174 | /* Don't log it if there are no links to the file */ | 1170 | /* Don't log it if there are no links to the file */ |
| 1175 | if (!test_cflag(COMMIT_Nolink, ip)) { | 1171 | if (!test_cflag(COMMIT_Nolink, ip)) { |
| @@ -1738,8 +1734,7 @@ int xtExtend(tid_t tid, /* transaction id */ | |||
| 1738 | XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); | 1734 | XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); |
| 1739 | 1735 | ||
| 1740 | /* advance next available entry index */ | 1736 | /* advance next available entry index */ |
| 1741 | p->header.nextindex = | 1737 | le16_add_cpu(&p->header.nextindex, 1); |
| 1742 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | ||
| 1743 | } | 1738 | } |
| 1744 | 1739 | ||
| 1745 | /* get back old entry */ | 1740 | /* get back old entry */ |
| @@ -1905,8 +1900,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", | |||
| 1905 | XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); | 1900 | XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); |
| 1906 | 1901 | ||
| 1907 | /* advance next available entry index */ | 1902 | /* advance next available entry index */ |
| 1908 | p->header.nextindex = | 1903 | le16_add_cpu(&p->header.nextindex, 1); |
| 1909 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | ||
| 1910 | } | 1904 | } |
| 1911 | 1905 | ||
| 1912 | /* get back old XAD */ | 1906 | /* get back old XAD */ |
| @@ -2567,8 +2561,7 @@ int xtAppend(tid_t tid, /* transaction id */ | |||
| 2567 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); | 2561 | XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); |
| 2568 | 2562 | ||
| 2569 | /* advance next available entry index */ | 2563 | /* advance next available entry index */ |
| 2570 | p->header.nextindex = | 2564 | le16_add_cpu(&p->header.nextindex, 1); |
| 2571 | cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); | ||
| 2572 | 2565 | ||
| 2573 | xtlck->lwm.offset = | 2566 | xtlck->lwm.offset = |
| 2574 | (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; | 2567 | (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; |
| @@ -2631,8 +2624,7 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) | |||
| 2631 | * delete the entry from the leaf page | 2624 | * delete the entry from the leaf page |
| 2632 | */ | 2625 | */ |
| 2633 | nextindex = le16_to_cpu(p->header.nextindex); | 2626 | nextindex = le16_to_cpu(p->header.nextindex); |
| 2634 | p->header.nextindex = | 2627 | le16_add_cpu(&p->header.nextindex, -1); |
| 2635 | cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1); | ||
| 2636 | 2628 | ||
| 2637 | /* | 2629 | /* |
| 2638 | * if the leaf page bocome empty, free the page | 2630 | * if the leaf page bocome empty, free the page |
| @@ -2795,9 +2787,7 @@ xtDeleteUp(tid_t tid, struct inode *ip, | |||
| 2795 | (nextindex - index - | 2787 | (nextindex - index - |
| 2796 | 1) << L2XTSLOTSIZE); | 2788 | 1) << L2XTSLOTSIZE); |
| 2797 | 2789 | ||
| 2798 | p->header.nextindex = | 2790 | le16_add_cpu(&p->header.nextindex, -1); |
| 2799 | cpu_to_le16(le16_to_cpu(p->header.nextindex) - | ||
| 2800 | 1); | ||
| 2801 | jfs_info("xtDeleteUp(entry): 0x%lx[%d]", | 2791 | jfs_info("xtDeleteUp(entry): 0x%lx[%d]", |
| 2802 | (ulong) parent->bn, index); | 2792 | (ulong) parent->bn, index); |
| 2803 | } | 2793 | } |
diff --git a/fs/locks.c b/fs/locks.c index d83fab1b77b5..43c0af21a0c5 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
| @@ -1801,17 +1801,21 @@ again: | |||
| 1801 | if (error) | 1801 | if (error) |
| 1802 | goto out; | 1802 | goto out; |
| 1803 | 1803 | ||
| 1804 | for (;;) { | 1804 | if (filp->f_op && filp->f_op->lock != NULL) |
| 1805 | error = vfs_lock_file(filp, cmd, file_lock, NULL); | 1805 | error = filp->f_op->lock(filp, cmd, file_lock); |
| 1806 | if (error != -EAGAIN || cmd == F_SETLK) | 1806 | else { |
| 1807 | break; | 1807 | for (;;) { |
| 1808 | error = wait_event_interruptible(file_lock->fl_wait, | 1808 | error = posix_lock_file(filp, file_lock, NULL); |
| 1809 | !file_lock->fl_next); | 1809 | if (error != -EAGAIN || cmd == F_SETLK) |
| 1810 | if (!error) | 1810 | break; |
| 1811 | continue; | 1811 | error = wait_event_interruptible(file_lock->fl_wait, |
| 1812 | !file_lock->fl_next); | ||
| 1813 | if (!error) | ||
| 1814 | continue; | ||
| 1812 | 1815 | ||
| 1813 | locks_delete_block(file_lock); | 1816 | locks_delete_block(file_lock); |
| 1814 | break; | 1817 | break; |
| 1818 | } | ||
| 1815 | } | 1819 | } |
| 1816 | 1820 | ||
| 1817 | /* | 1821 | /* |
| @@ -1925,17 +1929,21 @@ again: | |||
| 1925 | if (error) | 1929 | if (error) |
| 1926 | goto out; | 1930 | goto out; |
| 1927 | 1931 | ||
| 1928 | for (;;) { | 1932 | if (filp->f_op && filp->f_op->lock != NULL) |
| 1929 | error = vfs_lock_file(filp, cmd, file_lock, NULL); | 1933 | error = filp->f_op->lock(filp, cmd, file_lock); |
| 1930 | if (error != -EAGAIN || cmd == F_SETLK64) | 1934 | else { |
| 1931 | break; | 1935 | for (;;) { |
| 1932 | error = wait_event_interruptible(file_lock->fl_wait, | 1936 | error = posix_lock_file(filp, file_lock, NULL); |
| 1933 | !file_lock->fl_next); | 1937 | if (error != -EAGAIN || cmd == F_SETLK64) |
| 1934 | if (!error) | 1938 | break; |
| 1935 | continue; | 1939 | error = wait_event_interruptible(file_lock->fl_wait, |
| 1940 | !file_lock->fl_next); | ||
| 1941 | if (!error) | ||
| 1942 | continue; | ||
| 1936 | 1943 | ||
| 1937 | locks_delete_block(file_lock); | 1944 | locks_delete_block(file_lock); |
| 1938 | break; | 1945 | break; |
| 1946 | } | ||
| 1939 | } | 1947 | } |
| 1940 | 1948 | ||
| 1941 | /* | 1949 | /* |
diff --git a/fs/mbcache.c b/fs/mbcache.c index eb31b73e7d69..ec88ff3d04a9 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
| @@ -399,11 +399,11 @@ mb_cache_destroy(struct mb_cache *cache) | |||
| 399 | * if no more memory was available. | 399 | * if no more memory was available. |
| 400 | */ | 400 | */ |
| 401 | struct mb_cache_entry * | 401 | struct mb_cache_entry * |
| 402 | mb_cache_entry_alloc(struct mb_cache *cache) | 402 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) |
| 403 | { | 403 | { |
| 404 | struct mb_cache_entry *ce; | 404 | struct mb_cache_entry *ce; |
| 405 | 405 | ||
| 406 | ce = kmem_cache_alloc(cache->c_entry_cache, GFP_KERNEL); | 406 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); |
| 407 | if (ce) { | 407 | if (ce) { |
| 408 | atomic_inc(&cache->c_entry_count); | 408 | atomic_inc(&cache->c_entry_count); |
| 409 | INIT_LIST_HEAD(&ce->e_lru_list); | 409 | INIT_LIST_HEAD(&ce->e_lru_list); |
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 4d4ce48bb42c..f6956de56fdb 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
| @@ -2,7 +2,12 @@ EXTRA_CFLAGS += -Ifs/ocfs2 | |||
| 2 | 2 | ||
| 3 | EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES | 3 | EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES |
| 4 | 4 | ||
| 5 | obj-$(CONFIG_OCFS2_FS) += ocfs2.o | 5 | obj-$(CONFIG_OCFS2_FS) += \ |
| 6 | ocfs2.o \ | ||
| 7 | ocfs2_stackglue.o | ||
| 8 | |||
| 9 | obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_stack_o2cb.o | ||
| 10 | obj-$(CONFIG_OCFS2_FS_USERSPACE_CLUSTER) += ocfs2_stack_user.o | ||
| 6 | 11 | ||
| 7 | ocfs2-objs := \ | 12 | ocfs2-objs := \ |
| 8 | alloc.o \ | 13 | alloc.o \ |
| @@ -31,5 +36,10 @@ ocfs2-objs := \ | |||
| 31 | uptodate.o \ | 36 | uptodate.o \ |
| 32 | ver.o | 37 | ver.o |
| 33 | 38 | ||
| 39 | ocfs2_stackglue-objs := stackglue.o | ||
| 40 | ocfs2_stack_o2cb-objs := stack_o2cb.o | ||
| 41 | ocfs2_stack_user-objs := stack_user.o | ||
| 42 | |||
| 43 | # cluster/ is always needed when OCFS2_FS for masklog support | ||
| 34 | obj-$(CONFIG_OCFS2_FS) += cluster/ | 44 | obj-$(CONFIG_OCFS2_FS) += cluster/ |
| 35 | obj-$(CONFIG_OCFS2_FS) += dlm/ | 45 | obj-$(CONFIG_OCFS2_FS_O2CB) += dlm/ |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 447206eb5c2e..41f84c92094f 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -1029,8 +1029,7 @@ static void ocfs2_rotate_leaf(struct ocfs2_extent_list *el, | |||
| 1029 | BUG_ON(!next_free); | 1029 | BUG_ON(!next_free); |
| 1030 | 1030 | ||
| 1031 | /* The tree code before us didn't allow enough room in the leaf. */ | 1031 | /* The tree code before us didn't allow enough room in the leaf. */ |
| 1032 | if (el->l_next_free_rec == el->l_count && !has_empty) | 1032 | BUG_ON(el->l_next_free_rec == el->l_count && !has_empty); |
| 1033 | BUG(); | ||
| 1034 | 1033 | ||
| 1035 | /* | 1034 | /* |
| 1036 | * The easiest way to approach this is to just remove the | 1035 | * The easiest way to approach this is to just remove the |
| @@ -1450,6 +1449,8 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el, | |||
| 1450 | * - When our insert into the right path leaf is at the leftmost edge | 1449 | * - When our insert into the right path leaf is at the leftmost edge |
| 1451 | * and requires an update of the path immediately to it's left. This | 1450 | * and requires an update of the path immediately to it's left. This |
| 1452 | * can occur at the end of some types of rotation and appending inserts. | 1451 | * can occur at the end of some types of rotation and appending inserts. |
| 1452 | * - When we've adjusted the last extent record in the left path leaf and the | ||
| 1453 | * 1st extent record in the right path leaf during cross extent block merge. | ||
| 1453 | */ | 1454 | */ |
| 1454 | static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, | 1455 | static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, |
| 1455 | struct ocfs2_path *left_path, | 1456 | struct ocfs2_path *left_path, |
| @@ -2712,24 +2713,147 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el, | |||
| 2712 | } | 2713 | } |
| 2713 | } | 2714 | } |
| 2714 | 2715 | ||
| 2716 | static int ocfs2_get_right_path(struct inode *inode, | ||
| 2717 | struct ocfs2_path *left_path, | ||
| 2718 | struct ocfs2_path **ret_right_path) | ||
| 2719 | { | ||
| 2720 | int ret; | ||
| 2721 | u32 right_cpos; | ||
| 2722 | struct ocfs2_path *right_path = NULL; | ||
| 2723 | struct ocfs2_extent_list *left_el; | ||
| 2724 | |||
| 2725 | *ret_right_path = NULL; | ||
| 2726 | |||
| 2727 | /* This function shouldn't be called for non-trees. */ | ||
| 2728 | BUG_ON(left_path->p_tree_depth == 0); | ||
| 2729 | |||
| 2730 | left_el = path_leaf_el(left_path); | ||
| 2731 | BUG_ON(left_el->l_next_free_rec != left_el->l_count); | ||
| 2732 | |||
| 2733 | ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, | ||
| 2734 | &right_cpos); | ||
| 2735 | if (ret) { | ||
| 2736 | mlog_errno(ret); | ||
| 2737 | goto out; | ||
| 2738 | } | ||
| 2739 | |||
| 2740 | /* This function shouldn't be called for the rightmost leaf. */ | ||
| 2741 | BUG_ON(right_cpos == 0); | ||
| 2742 | |||
| 2743 | right_path = ocfs2_new_path(path_root_bh(left_path), | ||
| 2744 | path_root_el(left_path)); | ||
| 2745 | if (!right_path) { | ||
| 2746 | ret = -ENOMEM; | ||
| 2747 | mlog_errno(ret); | ||
| 2748 | goto out; | ||
| 2749 | } | ||
| 2750 | |||
| 2751 | ret = ocfs2_find_path(inode, right_path, right_cpos); | ||
| 2752 | if (ret) { | ||
| 2753 | mlog_errno(ret); | ||
| 2754 | goto out; | ||
| 2755 | } | ||
| 2756 | |||
| 2757 | *ret_right_path = right_path; | ||
| 2758 | out: | ||
| 2759 | if (ret) | ||
| 2760 | ocfs2_free_path(right_path); | ||
| 2761 | return ret; | ||
| 2762 | } | ||
| 2763 | |||
| 2715 | /* | 2764 | /* |
| 2716 | * Remove split_rec clusters from the record at index and merge them | 2765 | * Remove split_rec clusters from the record at index and merge them |
| 2717 | * onto the beginning of the record at index + 1. | 2766 | * onto the beginning of the record "next" to it. |
| 2767 | * For index < l_count - 1, the next means the extent rec at index + 1. | ||
| 2768 | * For index == l_count - 1, the "next" means the 1st extent rec of the | ||
| 2769 | * next extent block. | ||
| 2718 | */ | 2770 | */ |
| 2719 | static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, | 2771 | static int ocfs2_merge_rec_right(struct inode *inode, |
| 2720 | handle_t *handle, | 2772 | struct ocfs2_path *left_path, |
| 2721 | struct ocfs2_extent_rec *split_rec, | 2773 | handle_t *handle, |
| 2722 | struct ocfs2_extent_list *el, int index) | 2774 | struct ocfs2_extent_rec *split_rec, |
| 2775 | int index) | ||
| 2723 | { | 2776 | { |
| 2724 | int ret; | 2777 | int ret, next_free, i; |
| 2725 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); | 2778 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); |
| 2726 | struct ocfs2_extent_rec *left_rec; | 2779 | struct ocfs2_extent_rec *left_rec; |
| 2727 | struct ocfs2_extent_rec *right_rec; | 2780 | struct ocfs2_extent_rec *right_rec; |
| 2781 | struct ocfs2_extent_list *right_el; | ||
| 2782 | struct ocfs2_path *right_path = NULL; | ||
| 2783 | int subtree_index = 0; | ||
| 2784 | struct ocfs2_extent_list *el = path_leaf_el(left_path); | ||
| 2785 | struct buffer_head *bh = path_leaf_bh(left_path); | ||
| 2786 | struct buffer_head *root_bh = NULL; | ||
| 2728 | 2787 | ||
| 2729 | BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); | 2788 | BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); |
| 2730 | |||
| 2731 | left_rec = &el->l_recs[index]; | 2789 | left_rec = &el->l_recs[index]; |
| 2732 | right_rec = &el->l_recs[index + 1]; | 2790 | |
| 2791 | if (index == le16_to_cpu(el->l_next_free_rec - 1) && | ||
| 2792 | le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) { | ||
| 2793 | /* we meet with a cross extent block merge. */ | ||
| 2794 | ret = ocfs2_get_right_path(inode, left_path, &right_path); | ||
| 2795 | if (ret) { | ||
| 2796 | mlog_errno(ret); | ||
| 2797 | goto out; | ||
| 2798 | } | ||
| 2799 | |||
| 2800 | right_el = path_leaf_el(right_path); | ||
| 2801 | next_free = le16_to_cpu(right_el->l_next_free_rec); | ||
| 2802 | BUG_ON(next_free <= 0); | ||
| 2803 | right_rec = &right_el->l_recs[0]; | ||
| 2804 | if (ocfs2_is_empty_extent(right_rec)) { | ||
| 2805 | BUG_ON(le16_to_cpu(next_free) <= 1); | ||
| 2806 | right_rec = &right_el->l_recs[1]; | ||
| 2807 | } | ||
| 2808 | |||
| 2809 | BUG_ON(le32_to_cpu(left_rec->e_cpos) + | ||
| 2810 | le16_to_cpu(left_rec->e_leaf_clusters) != | ||
| 2811 | le32_to_cpu(right_rec->e_cpos)); | ||
| 2812 | |||
| 2813 | subtree_index = ocfs2_find_subtree_root(inode, | ||
| 2814 | left_path, right_path); | ||
| 2815 | |||
| 2816 | ret = ocfs2_extend_rotate_transaction(handle, subtree_index, | ||
| 2817 | handle->h_buffer_credits, | ||
| 2818 | right_path); | ||
| 2819 | if (ret) { | ||
| 2820 | mlog_errno(ret); | ||
| 2821 | goto out; | ||
| 2822 | } | ||
| 2823 | |||
| 2824 | root_bh = left_path->p_node[subtree_index].bh; | ||
| 2825 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); | ||
| 2826 | |||
| 2827 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
| 2828 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2829 | if (ret) { | ||
| 2830 | mlog_errno(ret); | ||
| 2831 | goto out; | ||
| 2832 | } | ||
| 2833 | |||
| 2834 | for (i = subtree_index + 1; | ||
| 2835 | i < path_num_items(right_path); i++) { | ||
| 2836 | ret = ocfs2_journal_access(handle, inode, | ||
| 2837 | right_path->p_node[i].bh, | ||
| 2838 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2839 | if (ret) { | ||
| 2840 | mlog_errno(ret); | ||
| 2841 | goto out; | ||
| 2842 | } | ||
| 2843 | |||
| 2844 | ret = ocfs2_journal_access(handle, inode, | ||
| 2845 | left_path->p_node[i].bh, | ||
| 2846 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2847 | if (ret) { | ||
| 2848 | mlog_errno(ret); | ||
| 2849 | goto out; | ||
| 2850 | } | ||
| 2851 | } | ||
| 2852 | |||
| 2853 | } else { | ||
| 2854 | BUG_ON(index == le16_to_cpu(el->l_next_free_rec) - 1); | ||
| 2855 | right_rec = &el->l_recs[index + 1]; | ||
| 2856 | } | ||
| 2733 | 2857 | ||
| 2734 | ret = ocfs2_journal_access(handle, inode, bh, | 2858 | ret = ocfs2_journal_access(handle, inode, bh, |
| 2735 | OCFS2_JOURNAL_ACCESS_WRITE); | 2859 | OCFS2_JOURNAL_ACCESS_WRITE); |
| @@ -2751,30 +2875,156 @@ static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, | |||
| 2751 | if (ret) | 2875 | if (ret) |
| 2752 | mlog_errno(ret); | 2876 | mlog_errno(ret); |
| 2753 | 2877 | ||
| 2878 | if (right_path) { | ||
| 2879 | ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); | ||
| 2880 | if (ret) | ||
| 2881 | mlog_errno(ret); | ||
| 2882 | |||
| 2883 | ocfs2_complete_edge_insert(inode, handle, left_path, | ||
| 2884 | right_path, subtree_index); | ||
| 2885 | } | ||
| 2886 | out: | ||
| 2887 | if (right_path) | ||
| 2888 | ocfs2_free_path(right_path); | ||
| 2889 | return ret; | ||
| 2890 | } | ||
| 2891 | |||
| 2892 | static int ocfs2_get_left_path(struct inode *inode, | ||
| 2893 | struct ocfs2_path *right_path, | ||
| 2894 | struct ocfs2_path **ret_left_path) | ||
| 2895 | { | ||
| 2896 | int ret; | ||
| 2897 | u32 left_cpos; | ||
| 2898 | struct ocfs2_path *left_path = NULL; | ||
| 2899 | |||
| 2900 | *ret_left_path = NULL; | ||
| 2901 | |||
| 2902 | /* This function shouldn't be called for non-trees. */ | ||
| 2903 | BUG_ON(right_path->p_tree_depth == 0); | ||
| 2904 | |||
| 2905 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, | ||
| 2906 | right_path, &left_cpos); | ||
| 2907 | if (ret) { | ||
| 2908 | mlog_errno(ret); | ||
| 2909 | goto out; | ||
| 2910 | } | ||
| 2911 | |||
| 2912 | /* This function shouldn't be called for the leftmost leaf. */ | ||
| 2913 | BUG_ON(left_cpos == 0); | ||
| 2914 | |||
| 2915 | left_path = ocfs2_new_path(path_root_bh(right_path), | ||
| 2916 | path_root_el(right_path)); | ||
| 2917 | if (!left_path) { | ||
| 2918 | ret = -ENOMEM; | ||
| 2919 | mlog_errno(ret); | ||
| 2920 | goto out; | ||
| 2921 | } | ||
| 2922 | |||
| 2923 | ret = ocfs2_find_path(inode, left_path, left_cpos); | ||
| 2924 | if (ret) { | ||
| 2925 | mlog_errno(ret); | ||
| 2926 | goto out; | ||
| 2927 | } | ||
| 2928 | |||
| 2929 | *ret_left_path = left_path; | ||
| 2754 | out: | 2930 | out: |
| 2931 | if (ret) | ||
| 2932 | ocfs2_free_path(left_path); | ||
| 2755 | return ret; | 2933 | return ret; |
| 2756 | } | 2934 | } |
| 2757 | 2935 | ||
| 2758 | /* | 2936 | /* |
| 2759 | * Remove split_rec clusters from the record at index and merge them | 2937 | * Remove split_rec clusters from the record at index and merge them |
| 2760 | * onto the tail of the record at index - 1. | 2938 | * onto the tail of the record "before" it. |
| 2939 | * For index > 0, the "before" means the extent rec at index - 1. | ||
| 2940 | * | ||
| 2941 | * For index == 0, the "before" means the last record of the previous | ||
| 2942 | * extent block. And there is also a situation that we may need to | ||
| 2943 | * remove the rightmost leaf extent block in the right_path and change | ||
| 2944 | * the right path to indicate the new rightmost path. | ||
| 2761 | */ | 2945 | */ |
| 2762 | static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, | 2946 | static int ocfs2_merge_rec_left(struct inode *inode, |
| 2947 | struct ocfs2_path *right_path, | ||
| 2763 | handle_t *handle, | 2948 | handle_t *handle, |
| 2764 | struct ocfs2_extent_rec *split_rec, | 2949 | struct ocfs2_extent_rec *split_rec, |
| 2765 | struct ocfs2_extent_list *el, int index) | 2950 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
| 2951 | int index) | ||
| 2766 | { | 2952 | { |
| 2767 | int ret, has_empty_extent = 0; | 2953 | int ret, i, subtree_index = 0, has_empty_extent = 0; |
| 2768 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); | 2954 | unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); |
| 2769 | struct ocfs2_extent_rec *left_rec; | 2955 | struct ocfs2_extent_rec *left_rec; |
| 2770 | struct ocfs2_extent_rec *right_rec; | 2956 | struct ocfs2_extent_rec *right_rec; |
| 2957 | struct ocfs2_extent_list *el = path_leaf_el(right_path); | ||
| 2958 | struct buffer_head *bh = path_leaf_bh(right_path); | ||
| 2959 | struct buffer_head *root_bh = NULL; | ||
| 2960 | struct ocfs2_path *left_path = NULL; | ||
| 2961 | struct ocfs2_extent_list *left_el; | ||
| 2771 | 2962 | ||
| 2772 | BUG_ON(index <= 0); | 2963 | BUG_ON(index < 0); |
| 2773 | 2964 | ||
| 2774 | left_rec = &el->l_recs[index - 1]; | ||
| 2775 | right_rec = &el->l_recs[index]; | 2965 | right_rec = &el->l_recs[index]; |
| 2776 | if (ocfs2_is_empty_extent(&el->l_recs[0])) | 2966 | if (index == 0) { |
| 2777 | has_empty_extent = 1; | 2967 | /* we meet with a cross extent block merge. */ |
| 2968 | ret = ocfs2_get_left_path(inode, right_path, &left_path); | ||
| 2969 | if (ret) { | ||
| 2970 | mlog_errno(ret); | ||
| 2971 | goto out; | ||
| 2972 | } | ||
| 2973 | |||
| 2974 | left_el = path_leaf_el(left_path); | ||
| 2975 | BUG_ON(le16_to_cpu(left_el->l_next_free_rec) != | ||
| 2976 | le16_to_cpu(left_el->l_count)); | ||
| 2977 | |||
| 2978 | left_rec = &left_el->l_recs[ | ||
| 2979 | le16_to_cpu(left_el->l_next_free_rec) - 1]; | ||
| 2980 | BUG_ON(le32_to_cpu(left_rec->e_cpos) + | ||
| 2981 | le16_to_cpu(left_rec->e_leaf_clusters) != | ||
| 2982 | le32_to_cpu(split_rec->e_cpos)); | ||
| 2983 | |||
| 2984 | subtree_index = ocfs2_find_subtree_root(inode, | ||
| 2985 | left_path, right_path); | ||
| 2986 | |||
| 2987 | ret = ocfs2_extend_rotate_transaction(handle, subtree_index, | ||
| 2988 | handle->h_buffer_credits, | ||
| 2989 | left_path); | ||
| 2990 | if (ret) { | ||
| 2991 | mlog_errno(ret); | ||
| 2992 | goto out; | ||
| 2993 | } | ||
| 2994 | |||
| 2995 | root_bh = left_path->p_node[subtree_index].bh; | ||
| 2996 | BUG_ON(root_bh != right_path->p_node[subtree_index].bh); | ||
| 2997 | |||
| 2998 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
| 2999 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3000 | if (ret) { | ||
| 3001 | mlog_errno(ret); | ||
| 3002 | goto out; | ||
| 3003 | } | ||
| 3004 | |||
| 3005 | for (i = subtree_index + 1; | ||
| 3006 | i < path_num_items(right_path); i++) { | ||
| 3007 | ret = ocfs2_journal_access(handle, inode, | ||
| 3008 | right_path->p_node[i].bh, | ||
| 3009 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3010 | if (ret) { | ||
| 3011 | mlog_errno(ret); | ||
| 3012 | goto out; | ||
| 3013 | } | ||
| 3014 | |||
| 3015 | ret = ocfs2_journal_access(handle, inode, | ||
| 3016 | left_path->p_node[i].bh, | ||
| 3017 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3018 | if (ret) { | ||
| 3019 | mlog_errno(ret); | ||
| 3020 | goto out; | ||
| 3021 | } | ||
| 3022 | } | ||
| 3023 | } else { | ||
| 3024 | left_rec = &el->l_recs[index - 1]; | ||
| 3025 | if (ocfs2_is_empty_extent(&el->l_recs[0])) | ||
| 3026 | has_empty_extent = 1; | ||
| 3027 | } | ||
| 2778 | 3028 | ||
| 2779 | ret = ocfs2_journal_access(handle, inode, bh, | 3029 | ret = ocfs2_journal_access(handle, inode, bh, |
| 2780 | OCFS2_JOURNAL_ACCESS_WRITE); | 3030 | OCFS2_JOURNAL_ACCESS_WRITE); |
| @@ -2790,9 +3040,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, | |||
| 2790 | *left_rec = *split_rec; | 3040 | *left_rec = *split_rec; |
| 2791 | 3041 | ||
| 2792 | has_empty_extent = 0; | 3042 | has_empty_extent = 0; |
| 2793 | } else { | 3043 | } else |
| 2794 | le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); | 3044 | le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); |
| 2795 | } | ||
| 2796 | 3045 | ||
| 2797 | le32_add_cpu(&right_rec->e_cpos, split_clusters); | 3046 | le32_add_cpu(&right_rec->e_cpos, split_clusters); |
| 2798 | le64_add_cpu(&right_rec->e_blkno, | 3047 | le64_add_cpu(&right_rec->e_blkno, |
| @@ -2805,13 +3054,44 @@ static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, | |||
| 2805 | if (ret) | 3054 | if (ret) |
| 2806 | mlog_errno(ret); | 3055 | mlog_errno(ret); |
| 2807 | 3056 | ||
| 3057 | if (left_path) { | ||
| 3058 | ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); | ||
| 3059 | if (ret) | ||
| 3060 | mlog_errno(ret); | ||
| 3061 | |||
| 3062 | /* | ||
| 3063 | * In the situation that the right_rec is empty and the extent | ||
| 3064 | * block is empty also, ocfs2_complete_edge_insert can't handle | ||
| 3065 | * it and we need to delete the right extent block. | ||
| 3066 | */ | ||
| 3067 | if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && | ||
| 3068 | le16_to_cpu(el->l_next_free_rec) == 1) { | ||
| 3069 | |||
| 3070 | ret = ocfs2_remove_rightmost_path(inode, handle, | ||
| 3071 | right_path, dealloc); | ||
| 3072 | if (ret) { | ||
| 3073 | mlog_errno(ret); | ||
| 3074 | goto out; | ||
| 3075 | } | ||
| 3076 | |||
| 3077 | /* Now the rightmost extent block has been deleted. | ||
| 3078 | * So we use the new rightmost path. | ||
| 3079 | */ | ||
| 3080 | ocfs2_mv_path(right_path, left_path); | ||
| 3081 | left_path = NULL; | ||
| 3082 | } else | ||
| 3083 | ocfs2_complete_edge_insert(inode, handle, left_path, | ||
| 3084 | right_path, subtree_index); | ||
| 3085 | } | ||
| 2808 | out: | 3086 | out: |
| 3087 | if (left_path) | ||
| 3088 | ocfs2_free_path(left_path); | ||
| 2809 | return ret; | 3089 | return ret; |
| 2810 | } | 3090 | } |
| 2811 | 3091 | ||
| 2812 | static int ocfs2_try_to_merge_extent(struct inode *inode, | 3092 | static int ocfs2_try_to_merge_extent(struct inode *inode, |
| 2813 | handle_t *handle, | 3093 | handle_t *handle, |
| 2814 | struct ocfs2_path *left_path, | 3094 | struct ocfs2_path *path, |
| 2815 | int split_index, | 3095 | int split_index, |
| 2816 | struct ocfs2_extent_rec *split_rec, | 3096 | struct ocfs2_extent_rec *split_rec, |
| 2817 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 3097 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
| @@ -2819,7 +3099,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2819 | 3099 | ||
| 2820 | { | 3100 | { |
| 2821 | int ret = 0; | 3101 | int ret = 0; |
| 2822 | struct ocfs2_extent_list *el = path_leaf_el(left_path); | 3102 | struct ocfs2_extent_list *el = path_leaf_el(path); |
| 2823 | struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; | 3103 | struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; |
| 2824 | 3104 | ||
| 2825 | BUG_ON(ctxt->c_contig_type == CONTIG_NONE); | 3105 | BUG_ON(ctxt->c_contig_type == CONTIG_NONE); |
| @@ -2832,7 +3112,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2832 | * extents - having more than one in a leaf is | 3112 | * extents - having more than one in a leaf is |
| 2833 | * illegal. | 3113 | * illegal. |
| 2834 | */ | 3114 | */ |
| 2835 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | 3115 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
| 2836 | dealloc); | 3116 | dealloc); |
| 2837 | if (ret) { | 3117 | if (ret) { |
| 2838 | mlog_errno(ret); | 3118 | mlog_errno(ret); |
| @@ -2847,7 +3127,6 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2847 | * Left-right contig implies this. | 3127 | * Left-right contig implies this. |
| 2848 | */ | 3128 | */ |
| 2849 | BUG_ON(!ctxt->c_split_covers_rec); | 3129 | BUG_ON(!ctxt->c_split_covers_rec); |
| 2850 | BUG_ON(split_index == 0); | ||
| 2851 | 3130 | ||
| 2852 | /* | 3131 | /* |
| 2853 | * Since the leftright insert always covers the entire | 3132 | * Since the leftright insert always covers the entire |
| @@ -2858,9 +3137,14 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2858 | * Since the adding of an empty extent shifts | 3137 | * Since the adding of an empty extent shifts |
| 2859 | * everything back to the right, there's no need to | 3138 | * everything back to the right, there's no need to |
| 2860 | * update split_index here. | 3139 | * update split_index here. |
| 3140 | * | ||
| 3141 | * When the split_index is zero, we need to merge it to the | ||
| 3142 | * prevoius extent block. It is more efficient and easier | ||
| 3143 | * if we do merge_right first and merge_left later. | ||
| 2861 | */ | 3144 | */ |
| 2862 | ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), | 3145 | ret = ocfs2_merge_rec_right(inode, path, |
| 2863 | handle, split_rec, el, split_index); | 3146 | handle, split_rec, |
| 3147 | split_index); | ||
| 2864 | if (ret) { | 3148 | if (ret) { |
| 2865 | mlog_errno(ret); | 3149 | mlog_errno(ret); |
| 2866 | goto out; | 3150 | goto out; |
| @@ -2871,32 +3155,30 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2871 | */ | 3155 | */ |
| 2872 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | 3156 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); |
| 2873 | 3157 | ||
| 2874 | /* | 3158 | /* The merge left us with an empty extent, remove it. */ |
| 2875 | * The left merge left us with an empty extent, remove | 3159 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); |
| 2876 | * it. | ||
| 2877 | */ | ||
| 2878 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, dealloc); | ||
| 2879 | if (ret) { | 3160 | if (ret) { |
| 2880 | mlog_errno(ret); | 3161 | mlog_errno(ret); |
| 2881 | goto out; | 3162 | goto out; |
| 2882 | } | 3163 | } |
| 2883 | split_index--; | 3164 | |
| 2884 | rec = &el->l_recs[split_index]; | 3165 | rec = &el->l_recs[split_index]; |
| 2885 | 3166 | ||
| 2886 | /* | 3167 | /* |
| 2887 | * Note that we don't pass split_rec here on purpose - | 3168 | * Note that we don't pass split_rec here on purpose - |
| 2888 | * we've merged it into the left side. | 3169 | * we've merged it into the rec already. |
| 2889 | */ | 3170 | */ |
| 2890 | ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path), | 3171 | ret = ocfs2_merge_rec_left(inode, path, |
| 2891 | handle, rec, el, split_index); | 3172 | handle, rec, |
| 3173 | dealloc, | ||
| 3174 | split_index); | ||
| 3175 | |||
| 2892 | if (ret) { | 3176 | if (ret) { |
| 2893 | mlog_errno(ret); | 3177 | mlog_errno(ret); |
| 2894 | goto out; | 3178 | goto out; |
| 2895 | } | 3179 | } |
| 2896 | 3180 | ||
| 2897 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | 3181 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
| 2898 | |||
| 2899 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | ||
| 2900 | dealloc); | 3182 | dealloc); |
| 2901 | /* | 3183 | /* |
| 2902 | * Error from this last rotate is not critical, so | 3184 | * Error from this last rotate is not critical, so |
| @@ -2915,8 +3197,9 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2915 | */ | 3197 | */ |
| 2916 | if (ctxt->c_contig_type == CONTIG_RIGHT) { | 3198 | if (ctxt->c_contig_type == CONTIG_RIGHT) { |
| 2917 | ret = ocfs2_merge_rec_left(inode, | 3199 | ret = ocfs2_merge_rec_left(inode, |
| 2918 | path_leaf_bh(left_path), | 3200 | path, |
| 2919 | handle, split_rec, el, | 3201 | handle, split_rec, |
| 3202 | dealloc, | ||
| 2920 | split_index); | 3203 | split_index); |
| 2921 | if (ret) { | 3204 | if (ret) { |
| 2922 | mlog_errno(ret); | 3205 | mlog_errno(ret); |
| @@ -2924,8 +3207,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2924 | } | 3207 | } |
| 2925 | } else { | 3208 | } else { |
| 2926 | ret = ocfs2_merge_rec_right(inode, | 3209 | ret = ocfs2_merge_rec_right(inode, |
| 2927 | path_leaf_bh(left_path), | 3210 | path, |
| 2928 | handle, split_rec, el, | 3211 | handle, split_rec, |
| 2929 | split_index); | 3212 | split_index); |
| 2930 | if (ret) { | 3213 | if (ret) { |
| 2931 | mlog_errno(ret); | 3214 | mlog_errno(ret); |
| @@ -2938,7 +3221,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 2938 | * The merge may have left an empty extent in | 3221 | * The merge may have left an empty extent in |
| 2939 | * our leaf. Try to rotate it away. | 3222 | * our leaf. Try to rotate it away. |
| 2940 | */ | 3223 | */ |
| 2941 | ret = ocfs2_rotate_tree_left(inode, handle, left_path, | 3224 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
| 2942 | dealloc); | 3225 | dealloc); |
| 2943 | if (ret) | 3226 | if (ret) |
| 2944 | mlog_errno(ret); | 3227 | mlog_errno(ret); |
| @@ -3498,20 +3781,57 @@ out: | |||
| 3498 | } | 3781 | } |
| 3499 | 3782 | ||
| 3500 | static enum ocfs2_contig_type | 3783 | static enum ocfs2_contig_type |
| 3501 | ocfs2_figure_merge_contig_type(struct inode *inode, | 3784 | ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, |
| 3502 | struct ocfs2_extent_list *el, int index, | 3785 | struct ocfs2_extent_list *el, int index, |
| 3503 | struct ocfs2_extent_rec *split_rec) | 3786 | struct ocfs2_extent_rec *split_rec) |
| 3504 | { | 3787 | { |
| 3505 | struct ocfs2_extent_rec *rec; | 3788 | int status; |
| 3506 | enum ocfs2_contig_type ret = CONTIG_NONE; | 3789 | enum ocfs2_contig_type ret = CONTIG_NONE; |
| 3790 | u32 left_cpos, right_cpos; | ||
| 3791 | struct ocfs2_extent_rec *rec = NULL; | ||
| 3792 | struct ocfs2_extent_list *new_el; | ||
| 3793 | struct ocfs2_path *left_path = NULL, *right_path = NULL; | ||
| 3794 | struct buffer_head *bh; | ||
| 3795 | struct ocfs2_extent_block *eb; | ||
| 3796 | |||
| 3797 | if (index > 0) { | ||
| 3798 | rec = &el->l_recs[index - 1]; | ||
| 3799 | } else if (path->p_tree_depth > 0) { | ||
| 3800 | status = ocfs2_find_cpos_for_left_leaf(inode->i_sb, | ||
| 3801 | path, &left_cpos); | ||
| 3802 | if (status) | ||
| 3803 | goto out; | ||
| 3804 | |||
| 3805 | if (left_cpos != 0) { | ||
| 3806 | left_path = ocfs2_new_path(path_root_bh(path), | ||
| 3807 | path_root_el(path)); | ||
| 3808 | if (!left_path) | ||
| 3809 | goto out; | ||
| 3810 | |||
| 3811 | status = ocfs2_find_path(inode, left_path, left_cpos); | ||
| 3812 | if (status) | ||
| 3813 | goto out; | ||
| 3814 | |||
| 3815 | new_el = path_leaf_el(left_path); | ||
| 3816 | |||
| 3817 | if (le16_to_cpu(new_el->l_next_free_rec) != | ||
| 3818 | le16_to_cpu(new_el->l_count)) { | ||
| 3819 | bh = path_leaf_bh(left_path); | ||
| 3820 | eb = (struct ocfs2_extent_block *)bh->b_data; | ||
| 3821 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, | ||
| 3822 | eb); | ||
| 3823 | goto out; | ||
| 3824 | } | ||
| 3825 | rec = &new_el->l_recs[ | ||
| 3826 | le16_to_cpu(new_el->l_next_free_rec) - 1]; | ||
| 3827 | } | ||
| 3828 | } | ||
| 3507 | 3829 | ||
| 3508 | /* | 3830 | /* |
| 3509 | * We're careful to check for an empty extent record here - | 3831 | * We're careful to check for an empty extent record here - |
| 3510 | * the merge code will know what to do if it sees one. | 3832 | * the merge code will know what to do if it sees one. |
| 3511 | */ | 3833 | */ |
| 3512 | 3834 | if (rec) { | |
| 3513 | if (index > 0) { | ||
| 3514 | rec = &el->l_recs[index - 1]; | ||
| 3515 | if (index == 1 && ocfs2_is_empty_extent(rec)) { | 3835 | if (index == 1 && ocfs2_is_empty_extent(rec)) { |
| 3516 | if (split_rec->e_cpos == el->l_recs[index].e_cpos) | 3836 | if (split_rec->e_cpos == el->l_recs[index].e_cpos) |
| 3517 | ret = CONTIG_RIGHT; | 3837 | ret = CONTIG_RIGHT; |
| @@ -3520,10 +3840,45 @@ ocfs2_figure_merge_contig_type(struct inode *inode, | |||
| 3520 | } | 3840 | } |
| 3521 | } | 3841 | } |
| 3522 | 3842 | ||
| 3523 | if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) { | 3843 | rec = NULL; |
| 3844 | if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) | ||
| 3845 | rec = &el->l_recs[index + 1]; | ||
| 3846 | else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) && | ||
| 3847 | path->p_tree_depth > 0) { | ||
| 3848 | status = ocfs2_find_cpos_for_right_leaf(inode->i_sb, | ||
| 3849 | path, &right_cpos); | ||
| 3850 | if (status) | ||
| 3851 | goto out; | ||
| 3852 | |||
| 3853 | if (right_cpos == 0) | ||
| 3854 | goto out; | ||
| 3855 | |||
| 3856 | right_path = ocfs2_new_path(path_root_bh(path), | ||
| 3857 | path_root_el(path)); | ||
| 3858 | if (!right_path) | ||
| 3859 | goto out; | ||
| 3860 | |||
| 3861 | status = ocfs2_find_path(inode, right_path, right_cpos); | ||
| 3862 | if (status) | ||
| 3863 | goto out; | ||
| 3864 | |||
| 3865 | new_el = path_leaf_el(right_path); | ||
| 3866 | rec = &new_el->l_recs[0]; | ||
| 3867 | if (ocfs2_is_empty_extent(rec)) { | ||
| 3868 | if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { | ||
| 3869 | bh = path_leaf_bh(right_path); | ||
| 3870 | eb = (struct ocfs2_extent_block *)bh->b_data; | ||
| 3871 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, | ||
| 3872 | eb); | ||
| 3873 | goto out; | ||
| 3874 | } | ||
| 3875 | rec = &new_el->l_recs[1]; | ||
| 3876 | } | ||
| 3877 | } | ||
| 3878 | |||
| 3879 | if (rec) { | ||
| 3524 | enum ocfs2_contig_type contig_type; | 3880 | enum ocfs2_contig_type contig_type; |
| 3525 | 3881 | ||
| 3526 | rec = &el->l_recs[index + 1]; | ||
| 3527 | contig_type = ocfs2_extent_contig(inode, rec, split_rec); | 3882 | contig_type = ocfs2_extent_contig(inode, rec, split_rec); |
| 3528 | 3883 | ||
| 3529 | if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) | 3884 | if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) |
| @@ -3532,6 +3887,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, | |||
| 3532 | ret = contig_type; | 3887 | ret = contig_type; |
| 3533 | } | 3888 | } |
| 3534 | 3889 | ||
| 3890 | out: | ||
| 3891 | if (left_path) | ||
| 3892 | ocfs2_free_path(left_path); | ||
| 3893 | if (right_path) | ||
| 3894 | ocfs2_free_path(right_path); | ||
| 3895 | |||
| 3535 | return ret; | 3896 | return ret; |
| 3536 | } | 3897 | } |
| 3537 | 3898 | ||
| @@ -3994,7 +4355,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
| 3994 | goto out; | 4355 | goto out; |
| 3995 | } | 4356 | } |
| 3996 | 4357 | ||
| 3997 | ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el, | 4358 | ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el, |
| 3998 | split_index, | 4359 | split_index, |
| 3999 | split_rec); | 4360 | split_rec); |
| 4000 | 4361 | ||
| @@ -4788,6 +5149,8 @@ static void ocfs2_truncate_log_worker(struct work_struct *work) | |||
| 4788 | status = ocfs2_flush_truncate_log(osb); | 5149 | status = ocfs2_flush_truncate_log(osb); |
| 4789 | if (status < 0) | 5150 | if (status < 0) |
| 4790 | mlog_errno(status); | 5151 | mlog_errno(status); |
| 5152 | else | ||
| 5153 | ocfs2_init_inode_steal_slot(osb); | ||
| 4791 | 5154 | ||
| 4792 | mlog_exit(status); | 5155 | mlog_exit(status); |
| 4793 | } | 5156 | } |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 90383ed61005..17964c0505a9 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -467,11 +467,11 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | |||
| 467 | unsigned to) | 467 | unsigned to) |
| 468 | { | 468 | { |
| 469 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 469 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 470 | handle_t *handle = NULL; | 470 | handle_t *handle; |
| 471 | int ret = 0; | 471 | int ret = 0; |
| 472 | 472 | ||
| 473 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 473 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
| 474 | if (!handle) { | 474 | if (IS_ERR(handle)) { |
| 475 | ret = -ENOMEM; | 475 | ret = -ENOMEM; |
| 476 | mlog_errno(ret); | 476 | mlog_errno(ret); |
| 477 | goto out; | 477 | goto out; |
| @@ -487,7 +487,7 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | |||
| 487 | } | 487 | } |
| 488 | out: | 488 | out: |
| 489 | if (ret) { | 489 | if (ret) { |
| 490 | if (handle) | 490 | if (!IS_ERR(handle)) |
| 491 | ocfs2_commit_trans(osb, handle); | 491 | ocfs2_commit_trans(osb, handle); |
| 492 | handle = ERR_PTR(ret); | 492 | handle = ERR_PTR(ret); |
| 493 | } | 493 | } |
diff --git a/fs/ocfs2/cluster/Makefile b/fs/ocfs2/cluster/Makefile index cdd162f13650..bc8c5e7d8608 100644 --- a/fs/ocfs2/cluster/Makefile +++ b/fs/ocfs2/cluster/Makefile | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o | 1 | obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o |
| 2 | 2 | ||
| 3 | ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \ | 3 | ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \ |
| 4 | quorum.o tcp.o ver.o | 4 | quorum.o tcp.o netdebug.o ver.o |
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c new file mode 100644 index 000000000000..7bf3c0ea7bd9 --- /dev/null +++ b/fs/ocfs2/cluster/netdebug.c | |||
| @@ -0,0 +1,441 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * netdebug.c | ||
| 5 | * | ||
| 6 | * debug functionality for o2net | ||
| 7 | * | ||
| 8 | * Copyright (C) 2005, 2008 Oracle. All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public | ||
| 12 | * License as published by the Free Software Foundation; either | ||
| 13 | * version 2 of the License, or (at your option) any later version. | ||
| 14 | * | ||
| 15 | * This program is distributed in the hope that it will be useful, | ||
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 18 | * General Public License for more details. | ||
| 19 | * | ||
| 20 | * You should have received a copy of the GNU General Public | ||
| 21 | * License along with this program; if not, write to the | ||
| 22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 23 | * Boston, MA 021110-1307, USA. | ||
| 24 | * | ||
| 25 | */ | ||
| 26 | |||
| 27 | #ifdef CONFIG_DEBUG_FS | ||
| 28 | |||
| 29 | #include <linux/module.h> | ||
| 30 | #include <linux/types.h> | ||
| 31 | #include <linux/slab.h> | ||
| 32 | #include <linux/idr.h> | ||
| 33 | #include <linux/kref.h> | ||
| 34 | #include <linux/seq_file.h> | ||
| 35 | #include <linux/debugfs.h> | ||
| 36 | |||
| 37 | #include <linux/uaccess.h> | ||
| 38 | |||
| 39 | #include "tcp.h" | ||
| 40 | #include "nodemanager.h" | ||
| 41 | #define MLOG_MASK_PREFIX ML_TCP | ||
| 42 | #include "masklog.h" | ||
| 43 | |||
| 44 | #include "tcp_internal.h" | ||
| 45 | |||
| 46 | #define O2NET_DEBUG_DIR "o2net" | ||
| 47 | #define SC_DEBUG_NAME "sock_containers" | ||
| 48 | #define NST_DEBUG_NAME "send_tracking" | ||
| 49 | |||
| 50 | static struct dentry *o2net_dentry; | ||
| 51 | static struct dentry *sc_dentry; | ||
| 52 | static struct dentry *nst_dentry; | ||
| 53 | |||
| 54 | static DEFINE_SPINLOCK(o2net_debug_lock); | ||
| 55 | |||
| 56 | static LIST_HEAD(sock_containers); | ||
| 57 | static LIST_HEAD(send_tracking); | ||
| 58 | |||
| 59 | void o2net_debug_add_nst(struct o2net_send_tracking *nst) | ||
| 60 | { | ||
| 61 | spin_lock(&o2net_debug_lock); | ||
| 62 | list_add(&nst->st_net_debug_item, &send_tracking); | ||
| 63 | spin_unlock(&o2net_debug_lock); | ||
| 64 | } | ||
| 65 | |||
| 66 | void o2net_debug_del_nst(struct o2net_send_tracking *nst) | ||
| 67 | { | ||
| 68 | spin_lock(&o2net_debug_lock); | ||
| 69 | if (!list_empty(&nst->st_net_debug_item)) | ||
| 70 | list_del_init(&nst->st_net_debug_item); | ||
| 71 | spin_unlock(&o2net_debug_lock); | ||
| 72 | } | ||
| 73 | |||
| 74 | static struct o2net_send_tracking | ||
| 75 | *next_nst(struct o2net_send_tracking *nst_start) | ||
| 76 | { | ||
| 77 | struct o2net_send_tracking *nst, *ret = NULL; | ||
| 78 | |||
| 79 | assert_spin_locked(&o2net_debug_lock); | ||
| 80 | |||
| 81 | list_for_each_entry(nst, &nst_start->st_net_debug_item, | ||
| 82 | st_net_debug_item) { | ||
| 83 | /* discover the head of the list */ | ||
| 84 | if (&nst->st_net_debug_item == &send_tracking) | ||
| 85 | break; | ||
| 86 | |||
| 87 | /* use st_task to detect real nsts in the list */ | ||
| 88 | if (nst->st_task != NULL) { | ||
| 89 | ret = nst; | ||
| 90 | break; | ||
| 91 | } | ||
| 92 | } | ||
| 93 | |||
| 94 | return ret; | ||
| 95 | } | ||
| 96 | |||
| 97 | static void *nst_seq_start(struct seq_file *seq, loff_t *pos) | ||
| 98 | { | ||
| 99 | struct o2net_send_tracking *nst, *dummy_nst = seq->private; | ||
| 100 | |||
| 101 | spin_lock(&o2net_debug_lock); | ||
| 102 | nst = next_nst(dummy_nst); | ||
| 103 | spin_unlock(&o2net_debug_lock); | ||
| 104 | |||
| 105 | return nst; | ||
| 106 | } | ||
| 107 | |||
| 108 | static void *nst_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
| 109 | { | ||
| 110 | struct o2net_send_tracking *nst, *dummy_nst = seq->private; | ||
| 111 | |||
| 112 | spin_lock(&o2net_debug_lock); | ||
| 113 | nst = next_nst(dummy_nst); | ||
| 114 | list_del_init(&dummy_nst->st_net_debug_item); | ||
| 115 | if (nst) | ||
| 116 | list_add(&dummy_nst->st_net_debug_item, | ||
| 117 | &nst->st_net_debug_item); | ||
| 118 | spin_unlock(&o2net_debug_lock); | ||
| 119 | |||
| 120 | return nst; /* unused, just needs to be null when done */ | ||
| 121 | } | ||
| 122 | |||
| 123 | static int nst_seq_show(struct seq_file *seq, void *v) | ||
| 124 | { | ||
| 125 | struct o2net_send_tracking *nst, *dummy_nst = seq->private; | ||
| 126 | |||
| 127 | spin_lock(&o2net_debug_lock); | ||
| 128 | nst = next_nst(dummy_nst); | ||
| 129 | |||
| 130 | if (nst != NULL) { | ||
| 131 | /* get_task_comm isn't exported. oh well. */ | ||
| 132 | seq_printf(seq, "%p:\n" | ||
| 133 | " pid: %lu\n" | ||
| 134 | " tgid: %lu\n" | ||
| 135 | " process name: %s\n" | ||
| 136 | " node: %u\n" | ||
| 137 | " sc: %p\n" | ||
| 138 | " message id: %d\n" | ||
| 139 | " message type: %u\n" | ||
| 140 | " message key: 0x%08x\n" | ||
| 141 | " sock acquiry: %lu.%lu\n" | ||
| 142 | " send start: %lu.%lu\n" | ||
| 143 | " wait start: %lu.%lu\n", | ||
| 144 | nst, (unsigned long)nst->st_task->pid, | ||
| 145 | (unsigned long)nst->st_task->tgid, | ||
| 146 | nst->st_task->comm, nst->st_node, | ||
| 147 | nst->st_sc, nst->st_id, nst->st_msg_type, | ||
| 148 | nst->st_msg_key, | ||
| 149 | nst->st_sock_time.tv_sec, nst->st_sock_time.tv_usec, | ||
| 150 | nst->st_send_time.tv_sec, nst->st_send_time.tv_usec, | ||
| 151 | nst->st_status_time.tv_sec, | ||
| 152 | nst->st_status_time.tv_usec); | ||
| 153 | } | ||
| 154 | |||
| 155 | spin_unlock(&o2net_debug_lock); | ||
| 156 | |||
| 157 | return 0; | ||
| 158 | } | ||
| 159 | |||
| 160 | static void nst_seq_stop(struct seq_file *seq, void *v) | ||
| 161 | { | ||
| 162 | } | ||
| 163 | |||
| 164 | static struct seq_operations nst_seq_ops = { | ||
| 165 | .start = nst_seq_start, | ||
| 166 | .next = nst_seq_next, | ||
| 167 | .stop = nst_seq_stop, | ||
| 168 | .show = nst_seq_show, | ||
| 169 | }; | ||
| 170 | |||
| 171 | static int nst_fop_open(struct inode *inode, struct file *file) | ||
| 172 | { | ||
| 173 | struct o2net_send_tracking *dummy_nst; | ||
| 174 | struct seq_file *seq; | ||
| 175 | int ret; | ||
| 176 | |||
| 177 | dummy_nst = kmalloc(sizeof(struct o2net_send_tracking), GFP_KERNEL); | ||
| 178 | if (dummy_nst == NULL) { | ||
| 179 | ret = -ENOMEM; | ||
| 180 | goto out; | ||
| 181 | } | ||
| 182 | dummy_nst->st_task = NULL; | ||
| 183 | |||
| 184 | ret = seq_open(file, &nst_seq_ops); | ||
| 185 | if (ret) | ||
| 186 | goto out; | ||
| 187 | |||
| 188 | seq = file->private_data; | ||
| 189 | seq->private = dummy_nst; | ||
| 190 | o2net_debug_add_nst(dummy_nst); | ||
| 191 | |||
| 192 | dummy_nst = NULL; | ||
| 193 | |||
| 194 | out: | ||
| 195 | kfree(dummy_nst); | ||
| 196 | return ret; | ||
| 197 | } | ||
| 198 | |||
| 199 | static int nst_fop_release(struct inode *inode, struct file *file) | ||
| 200 | { | ||
| 201 | struct seq_file *seq = file->private_data; | ||
| 202 | struct o2net_send_tracking *dummy_nst = seq->private; | ||
| 203 | |||
| 204 | o2net_debug_del_nst(dummy_nst); | ||
| 205 | return seq_release_private(inode, file); | ||
| 206 | } | ||
| 207 | |||
| 208 | static struct file_operations nst_seq_fops = { | ||
| 209 | .open = nst_fop_open, | ||
| 210 | .read = seq_read, | ||
| 211 | .llseek = seq_lseek, | ||
| 212 | .release = nst_fop_release, | ||
| 213 | }; | ||
| 214 | |||
| 215 | void o2net_debug_add_sc(struct o2net_sock_container *sc) | ||
| 216 | { | ||
| 217 | spin_lock(&o2net_debug_lock); | ||
| 218 | list_add(&sc->sc_net_debug_item, &sock_containers); | ||
| 219 | spin_unlock(&o2net_debug_lock); | ||
| 220 | } | ||
| 221 | |||
| 222 | void o2net_debug_del_sc(struct o2net_sock_container *sc) | ||
| 223 | { | ||
| 224 | spin_lock(&o2net_debug_lock); | ||
| 225 | list_del_init(&sc->sc_net_debug_item); | ||
| 226 | spin_unlock(&o2net_debug_lock); | ||
| 227 | } | ||
| 228 | |||
| 229 | static struct o2net_sock_container | ||
| 230 | *next_sc(struct o2net_sock_container *sc_start) | ||
| 231 | { | ||
| 232 | struct o2net_sock_container *sc, *ret = NULL; | ||
| 233 | |||
| 234 | assert_spin_locked(&o2net_debug_lock); | ||
| 235 | |||
| 236 | list_for_each_entry(sc, &sc_start->sc_net_debug_item, | ||
| 237 | sc_net_debug_item) { | ||
| 238 | /* discover the head of the list miscast as a sc */ | ||
| 239 | if (&sc->sc_net_debug_item == &sock_containers) | ||
| 240 | break; | ||
| 241 | |||
| 242 | /* use sc_page to detect real scs in the list */ | ||
| 243 | if (sc->sc_page != NULL) { | ||
| 244 | ret = sc; | ||
| 245 | break; | ||
| 246 | } | ||
| 247 | } | ||
| 248 | |||
| 249 | return ret; | ||
| 250 | } | ||
| 251 | |||
| 252 | static void *sc_seq_start(struct seq_file *seq, loff_t *pos) | ||
| 253 | { | ||
| 254 | struct o2net_sock_container *sc, *dummy_sc = seq->private; | ||
| 255 | |||
| 256 | spin_lock(&o2net_debug_lock); | ||
| 257 | sc = next_sc(dummy_sc); | ||
| 258 | spin_unlock(&o2net_debug_lock); | ||
| 259 | |||
| 260 | return sc; | ||
| 261 | } | ||
| 262 | |||
| 263 | static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
| 264 | { | ||
| 265 | struct o2net_sock_container *sc, *dummy_sc = seq->private; | ||
| 266 | |||
| 267 | spin_lock(&o2net_debug_lock); | ||
| 268 | sc = next_sc(dummy_sc); | ||
| 269 | list_del_init(&dummy_sc->sc_net_debug_item); | ||
| 270 | if (sc) | ||
| 271 | list_add(&dummy_sc->sc_net_debug_item, &sc->sc_net_debug_item); | ||
| 272 | spin_unlock(&o2net_debug_lock); | ||
| 273 | |||
| 274 | return sc; /* unused, just needs to be null when done */ | ||
| 275 | } | ||
| 276 | |||
| 277 | #define TV_SEC_USEC(TV) TV.tv_sec, TV.tv_usec | ||
| 278 | |||
| 279 | static int sc_seq_show(struct seq_file *seq, void *v) | ||
| 280 | { | ||
| 281 | struct o2net_sock_container *sc, *dummy_sc = seq->private; | ||
| 282 | |||
| 283 | spin_lock(&o2net_debug_lock); | ||
| 284 | sc = next_sc(dummy_sc); | ||
| 285 | |||
| 286 | if (sc != NULL) { | ||
| 287 | struct inet_sock *inet = NULL; | ||
| 288 | |||
| 289 | __be32 saddr = 0, daddr = 0; | ||
| 290 | __be16 sport = 0, dport = 0; | ||
| 291 | |||
| 292 | if (sc->sc_sock) { | ||
| 293 | inet = inet_sk(sc->sc_sock->sk); | ||
| 294 | /* the stack's structs aren't sparse endian clean */ | ||
| 295 | saddr = (__force __be32)inet->saddr; | ||
| 296 | daddr = (__force __be32)inet->daddr; | ||
| 297 | sport = (__force __be16)inet->sport; | ||
| 298 | dport = (__force __be16)inet->dport; | ||
| 299 | } | ||
| 300 | |||
| 301 | /* XXX sigh, inet-> doesn't have sparse annotation so any | ||
| 302 | * use of it here generates a warning with -Wbitwise */ | ||
| 303 | seq_printf(seq, "%p:\n" | ||
| 304 | " krefs: %d\n" | ||
| 305 | " sock: %u.%u.%u.%u:%u -> " | ||
| 306 | "%u.%u.%u.%u:%u\n" | ||
| 307 | " remote node: %s\n" | ||
| 308 | " page off: %zu\n" | ||
| 309 | " handshake ok: %u\n" | ||
| 310 | " timer: %lu.%lu\n" | ||
| 311 | " data ready: %lu.%lu\n" | ||
| 312 | " advance start: %lu.%lu\n" | ||
| 313 | " advance stop: %lu.%lu\n" | ||
| 314 | " func start: %lu.%lu\n" | ||
| 315 | " func stop: %lu.%lu\n" | ||
| 316 | " func key: %u\n" | ||
| 317 | " func type: %u\n", | ||
| 318 | sc, | ||
| 319 | atomic_read(&sc->sc_kref.refcount), | ||
| 320 | NIPQUAD(saddr), inet ? ntohs(sport) : 0, | ||
| 321 | NIPQUAD(daddr), inet ? ntohs(dport) : 0, | ||
| 322 | sc->sc_node->nd_name, | ||
| 323 | sc->sc_page_off, | ||
| 324 | sc->sc_handshake_ok, | ||
| 325 | TV_SEC_USEC(sc->sc_tv_timer), | ||
| 326 | TV_SEC_USEC(sc->sc_tv_data_ready), | ||
| 327 | TV_SEC_USEC(sc->sc_tv_advance_start), | ||
| 328 | TV_SEC_USEC(sc->sc_tv_advance_stop), | ||
| 329 | TV_SEC_USEC(sc->sc_tv_func_start), | ||
| 330 | TV_SEC_USEC(sc->sc_tv_func_stop), | ||
| 331 | sc->sc_msg_key, | ||
| 332 | sc->sc_msg_type); | ||
| 333 | } | ||
| 334 | |||
| 335 | |||
| 336 | spin_unlock(&o2net_debug_lock); | ||
| 337 | |||
| 338 | return 0; | ||
| 339 | } | ||
| 340 | |||
| 341 | static void sc_seq_stop(struct seq_file *seq, void *v) | ||
| 342 | { | ||
| 343 | } | ||
| 344 | |||
| 345 | static struct seq_operations sc_seq_ops = { | ||
| 346 | .start = sc_seq_start, | ||
| 347 | .next = sc_seq_next, | ||
| 348 | .stop = sc_seq_stop, | ||
| 349 | .show = sc_seq_show, | ||
| 350 | }; | ||
| 351 | |||
| 352 | static int sc_fop_open(struct inode *inode, struct file *file) | ||
| 353 | { | ||
| 354 | struct o2net_sock_container *dummy_sc; | ||
| 355 | struct seq_file *seq; | ||
| 356 | int ret; | ||
| 357 | |||
| 358 | dummy_sc = kmalloc(sizeof(struct o2net_sock_container), GFP_KERNEL); | ||
| 359 | if (dummy_sc == NULL) { | ||
| 360 | ret = -ENOMEM; | ||
| 361 | goto out; | ||
| 362 | } | ||
| 363 | dummy_sc->sc_page = NULL; | ||
| 364 | |||
| 365 | ret = seq_open(file, &sc_seq_ops); | ||
| 366 | if (ret) | ||
| 367 | goto out; | ||
| 368 | |||
| 369 | seq = file->private_data; | ||
| 370 | seq->private = dummy_sc; | ||
| 371 | o2net_debug_add_sc(dummy_sc); | ||
| 372 | |||
| 373 | dummy_sc = NULL; | ||
| 374 | |||
| 375 | out: | ||
| 376 | kfree(dummy_sc); | ||
| 377 | return ret; | ||
| 378 | } | ||
| 379 | |||
| 380 | static int sc_fop_release(struct inode *inode, struct file *file) | ||
| 381 | { | ||
| 382 | struct seq_file *seq = file->private_data; | ||
| 383 | struct o2net_sock_container *dummy_sc = seq->private; | ||
| 384 | |||
| 385 | o2net_debug_del_sc(dummy_sc); | ||
| 386 | return seq_release_private(inode, file); | ||
| 387 | } | ||
| 388 | |||
| 389 | static struct file_operations sc_seq_fops = { | ||
| 390 | .open = sc_fop_open, | ||
| 391 | .read = seq_read, | ||
| 392 | .llseek = seq_lseek, | ||
| 393 | .release = sc_fop_release, | ||
| 394 | }; | ||
| 395 | |||
| 396 | int o2net_debugfs_init(void) | ||
| 397 | { | ||
| 398 | o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); | ||
| 399 | if (!o2net_dentry) { | ||
| 400 | mlog_errno(-ENOMEM); | ||
| 401 | goto bail; | ||
| 402 | } | ||
| 403 | |||
| 404 | nst_dentry = debugfs_create_file(NST_DEBUG_NAME, S_IFREG|S_IRUSR, | ||
| 405 | o2net_dentry, NULL, | ||
| 406 | &nst_seq_fops); | ||
| 407 | if (!nst_dentry) { | ||
| 408 | mlog_errno(-ENOMEM); | ||
| 409 | goto bail; | ||
| 410 | } | ||
| 411 | |||
| 412 | sc_dentry = debugfs_create_file(SC_DEBUG_NAME, S_IFREG|S_IRUSR, | ||
| 413 | o2net_dentry, NULL, | ||
| 414 | &sc_seq_fops); | ||
| 415 | if (!sc_dentry) { | ||
| 416 | mlog_errno(-ENOMEM); | ||
| 417 | goto bail; | ||
| 418 | } | ||
| 419 | |||
| 420 | return 0; | ||
| 421 | bail: | ||
| 422 | if (sc_dentry) | ||
| 423 | debugfs_remove(sc_dentry); | ||
| 424 | if (nst_dentry) | ||
| 425 | debugfs_remove(nst_dentry); | ||
| 426 | if (o2net_dentry) | ||
| 427 | debugfs_remove(o2net_dentry); | ||
| 428 | return -ENOMEM; | ||
| 429 | } | ||
| 430 | |||
| 431 | void o2net_debugfs_exit(void) | ||
| 432 | { | ||
| 433 | if (sc_dentry) | ||
| 434 | debugfs_remove(sc_dentry); | ||
| 435 | if (nst_dentry) | ||
| 436 | debugfs_remove(nst_dentry); | ||
| 437 | if (o2net_dentry) | ||
| 438 | debugfs_remove(o2net_dentry); | ||
| 439 | } | ||
| 440 | |||
| 441 | #endif /* CONFIG_DEBUG_FS */ | ||
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index 709fba25bf7e..cf9401e8cd0b 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
| @@ -959,7 +959,10 @@ static int __init init_o2nm(void) | |||
| 959 | cluster_print_version(); | 959 | cluster_print_version(); |
| 960 | 960 | ||
| 961 | o2hb_init(); | 961 | o2hb_init(); |
| 962 | o2net_init(); | 962 | |
| 963 | ret = o2net_init(); | ||
| 964 | if (ret) | ||
| 965 | goto out; | ||
| 963 | 966 | ||
| 964 | ocfs2_table_header = register_sysctl_table(ocfs2_root_table); | 967 | ocfs2_table_header = register_sysctl_table(ocfs2_root_table); |
| 965 | if (!ocfs2_table_header) { | 968 | if (!ocfs2_table_header) { |
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index 0c095ce7723d..98429fd68499 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c | |||
| @@ -57,6 +57,7 @@ static struct kset *o2cb_kset; | |||
| 57 | void o2cb_sys_shutdown(void) | 57 | void o2cb_sys_shutdown(void) |
| 58 | { | 58 | { |
| 59 | mlog_sys_shutdown(); | 59 | mlog_sys_shutdown(); |
| 60 | sysfs_remove_link(NULL, "o2cb"); | ||
| 60 | kset_unregister(o2cb_kset); | 61 | kset_unregister(o2cb_kset); |
| 61 | } | 62 | } |
| 62 | 63 | ||
| @@ -68,6 +69,14 @@ int o2cb_sys_init(void) | |||
| 68 | if (!o2cb_kset) | 69 | if (!o2cb_kset) |
| 69 | return -ENOMEM; | 70 | return -ENOMEM; |
| 70 | 71 | ||
| 72 | /* | ||
| 73 | * Create this symlink for backwards compatibility with old | ||
| 74 | * versions of ocfs2-tools which look for things in /sys/o2cb. | ||
| 75 | */ | ||
| 76 | ret = sysfs_create_link(NULL, &o2cb_kset->kobj, "o2cb"); | ||
| 77 | if (ret) | ||
| 78 | goto error; | ||
| 79 | |||
| 71 | ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); | 80 | ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); |
| 72 | if (ret) | 81 | if (ret) |
| 73 | goto error; | 82 | goto error; |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index b8057c51b205..1e44ad14881a 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
| @@ -142,23 +142,65 @@ static void o2net_idle_timer(unsigned long data); | |||
| 142 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); | 142 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); |
| 143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); | 143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); |
| 144 | 144 | ||
| 145 | /* | 145 | static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, |
| 146 | * FIXME: These should use to_o2nm_cluster_from_node(), but we end up | 146 | u32 msgkey, struct task_struct *task, u8 node) |
| 147 | * losing our parent link to the cluster during shutdown. This can be | 147 | { |
| 148 | * solved by adding a pre-removal callback to configfs, or passing | 148 | #ifdef CONFIG_DEBUG_FS |
| 149 | * around the cluster with the node. -jeffm | 149 | INIT_LIST_HEAD(&nst->st_net_debug_item); |
| 150 | */ | 150 | nst->st_task = task; |
| 151 | static inline int o2net_reconnect_delay(struct o2nm_node *node) | 151 | nst->st_msg_type = msgtype; |
| 152 | nst->st_msg_key = msgkey; | ||
| 153 | nst->st_node = node; | ||
| 154 | #endif | ||
| 155 | } | ||
| 156 | |||
| 157 | static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | ||
| 158 | { | ||
| 159 | #ifdef CONFIG_DEBUG_FS | ||
| 160 | do_gettimeofday(&nst->st_sock_time); | ||
| 161 | #endif | ||
| 162 | } | ||
| 163 | |||
| 164 | static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | ||
| 165 | { | ||
| 166 | #ifdef CONFIG_DEBUG_FS | ||
| 167 | do_gettimeofday(&nst->st_send_time); | ||
| 168 | #endif | ||
| 169 | } | ||
| 170 | |||
| 171 | static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | ||
| 172 | { | ||
| 173 | #ifdef CONFIG_DEBUG_FS | ||
| 174 | do_gettimeofday(&nst->st_status_time); | ||
| 175 | #endif | ||
| 176 | } | ||
| 177 | |||
| 178 | static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
| 179 | struct o2net_sock_container *sc) | ||
| 180 | { | ||
| 181 | #ifdef CONFIG_DEBUG_FS | ||
| 182 | nst->st_sc = sc; | ||
| 183 | #endif | ||
| 184 | } | ||
| 185 | |||
| 186 | static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) | ||
| 187 | { | ||
| 188 | #ifdef CONFIG_DEBUG_FS | ||
| 189 | nst->st_id = msg_id; | ||
| 190 | #endif | ||
| 191 | } | ||
| 192 | |||
| 193 | static inline int o2net_reconnect_delay(void) | ||
| 152 | { | 194 | { |
| 153 | return o2nm_single_cluster->cl_reconnect_delay_ms; | 195 | return o2nm_single_cluster->cl_reconnect_delay_ms; |
| 154 | } | 196 | } |
| 155 | 197 | ||
| 156 | static inline int o2net_keepalive_delay(struct o2nm_node *node) | 198 | static inline int o2net_keepalive_delay(void) |
| 157 | { | 199 | { |
| 158 | return o2nm_single_cluster->cl_keepalive_delay_ms; | 200 | return o2nm_single_cluster->cl_keepalive_delay_ms; |
| 159 | } | 201 | } |
| 160 | 202 | ||
| 161 | static inline int o2net_idle_timeout(struct o2nm_node *node) | 203 | static inline int o2net_idle_timeout(void) |
| 162 | { | 204 | { |
| 163 | return o2nm_single_cluster->cl_idle_timeout_ms; | 205 | return o2nm_single_cluster->cl_idle_timeout_ms; |
| 164 | } | 206 | } |
| @@ -296,6 +338,7 @@ static void sc_kref_release(struct kref *kref) | |||
| 296 | o2nm_node_put(sc->sc_node); | 338 | o2nm_node_put(sc->sc_node); |
| 297 | sc->sc_node = NULL; | 339 | sc->sc_node = NULL; |
| 298 | 340 | ||
| 341 | o2net_debug_del_sc(sc); | ||
| 299 | kfree(sc); | 342 | kfree(sc); |
| 300 | } | 343 | } |
| 301 | 344 | ||
| @@ -336,6 +379,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node) | |||
| 336 | 379 | ||
| 337 | ret = sc; | 380 | ret = sc; |
| 338 | sc->sc_page = page; | 381 | sc->sc_page = page; |
| 382 | o2net_debug_add_sc(sc); | ||
| 339 | sc = NULL; | 383 | sc = NULL; |
| 340 | page = NULL; | 384 | page = NULL; |
| 341 | 385 | ||
| @@ -399,8 +443,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
| 399 | mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); | 443 | mlog_bug_on_msg(err && valid, "err %d valid %u\n", err, valid); |
| 400 | mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); | 444 | mlog_bug_on_msg(valid && !sc, "valid %u sc %p\n", valid, sc); |
| 401 | 445 | ||
| 402 | /* we won't reconnect after our valid conn goes away for | ||
| 403 | * this hb iteration.. here so it shows up in the logs */ | ||
| 404 | if (was_valid && !valid && err == 0) | 446 | if (was_valid && !valid && err == 0) |
| 405 | err = -ENOTCONN; | 447 | err = -ENOTCONN; |
| 406 | 448 | ||
| @@ -430,11 +472,6 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
| 430 | 472 | ||
| 431 | if (!was_valid && valid) { | 473 | if (!was_valid && valid) { |
| 432 | o2quo_conn_up(o2net_num_from_nn(nn)); | 474 | o2quo_conn_up(o2net_num_from_nn(nn)); |
| 433 | /* this is a bit of a hack. we only try reconnecting | ||
| 434 | * when heartbeating starts until we get a connection. | ||
| 435 | * if that connection then dies we don't try reconnecting. | ||
| 436 | * the only way to start connecting again is to down | ||
| 437 | * heartbeat and bring it back up. */ | ||
| 438 | cancel_delayed_work(&nn->nn_connect_expired); | 475 | cancel_delayed_work(&nn->nn_connect_expired); |
| 439 | printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", | 476 | printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n", |
| 440 | o2nm_this_node() > sc->sc_node->nd_num ? | 477 | o2nm_this_node() > sc->sc_node->nd_num ? |
| @@ -451,12 +488,24 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
| 451 | /* delay if we're withing a RECONNECT_DELAY of the | 488 | /* delay if we're withing a RECONNECT_DELAY of the |
| 452 | * last attempt */ | 489 | * last attempt */ |
| 453 | delay = (nn->nn_last_connect_attempt + | 490 | delay = (nn->nn_last_connect_attempt + |
| 454 | msecs_to_jiffies(o2net_reconnect_delay(NULL))) | 491 | msecs_to_jiffies(o2net_reconnect_delay())) |
| 455 | - jiffies; | 492 | - jiffies; |
| 456 | if (delay > msecs_to_jiffies(o2net_reconnect_delay(NULL))) | 493 | if (delay > msecs_to_jiffies(o2net_reconnect_delay())) |
| 457 | delay = 0; | 494 | delay = 0; |
| 458 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); | 495 | mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay); |
| 459 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); | 496 | queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay); |
| 497 | |||
| 498 | /* | ||
| 499 | * Delay the expired work after idle timeout. | ||
| 500 | * | ||
| 501 | * We might have lots of failed connection attempts that run | ||
| 502 | * through here but we only cancel the connect_expired work when | ||
| 503 | * a connection attempt succeeds. So only the first enqueue of | ||
| 504 | * the connect_expired work will do anything. The rest will see | ||
| 505 | * that it's already queued and do nothing. | ||
| 506 | */ | ||
| 507 | delay += msecs_to_jiffies(o2net_idle_timeout()); | ||
| 508 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, delay); | ||
| 460 | } | 509 | } |
| 461 | 510 | ||
| 462 | /* keep track of the nn's sc ref for the caller */ | 511 | /* keep track of the nn's sc ref for the caller */ |
| @@ -914,6 +963,9 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
| 914 | struct o2net_status_wait nsw = { | 963 | struct o2net_status_wait nsw = { |
| 915 | .ns_node_item = LIST_HEAD_INIT(nsw.ns_node_item), | 964 | .ns_node_item = LIST_HEAD_INIT(nsw.ns_node_item), |
| 916 | }; | 965 | }; |
| 966 | struct o2net_send_tracking nst; | ||
| 967 | |||
| 968 | o2net_init_nst(&nst, msg_type, key, current, target_node); | ||
| 917 | 969 | ||
| 918 | if (o2net_wq == NULL) { | 970 | if (o2net_wq == NULL) { |
| 919 | mlog(0, "attempt to tx without o2netd running\n"); | 971 | mlog(0, "attempt to tx without o2netd running\n"); |
| @@ -939,6 +991,10 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
| 939 | goto out; | 991 | goto out; |
| 940 | } | 992 | } |
| 941 | 993 | ||
| 994 | o2net_debug_add_nst(&nst); | ||
| 995 | |||
| 996 | o2net_set_nst_sock_time(&nst); | ||
| 997 | |||
| 942 | ret = wait_event_interruptible(nn->nn_sc_wq, | 998 | ret = wait_event_interruptible(nn->nn_sc_wq, |
| 943 | o2net_tx_can_proceed(nn, &sc, &error)); | 999 | o2net_tx_can_proceed(nn, &sc, &error)); |
| 944 | if (!ret && error) | 1000 | if (!ret && error) |
| @@ -946,6 +1002,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
| 946 | if (ret) | 1002 | if (ret) |
| 947 | goto out; | 1003 | goto out; |
| 948 | 1004 | ||
| 1005 | o2net_set_nst_sock_container(&nst, sc); | ||
| 1006 | |||
| 949 | veclen = caller_veclen + 1; | 1007 | veclen = caller_veclen + 1; |
| 950 | vec = kmalloc(sizeof(struct kvec) * veclen, GFP_ATOMIC); | 1008 | vec = kmalloc(sizeof(struct kvec) * veclen, GFP_ATOMIC); |
| 951 | if (vec == NULL) { | 1009 | if (vec == NULL) { |
| @@ -972,6 +1030,9 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
| 972 | goto out; | 1030 | goto out; |
| 973 | 1031 | ||
| 974 | msg->msg_num = cpu_to_be32(nsw.ns_id); | 1032 | msg->msg_num = cpu_to_be32(nsw.ns_id); |
| 1033 | o2net_set_nst_msg_id(&nst, nsw.ns_id); | ||
| 1034 | |||
| 1035 | o2net_set_nst_send_time(&nst); | ||
| 975 | 1036 | ||
| 976 | /* finally, convert the message header to network byte-order | 1037 | /* finally, convert the message header to network byte-order |
| 977 | * and send */ | 1038 | * and send */ |
| @@ -986,6 +1047,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
| 986 | } | 1047 | } |
| 987 | 1048 | ||
| 988 | /* wait on other node's handler */ | 1049 | /* wait on other node's handler */ |
| 1050 | o2net_set_nst_status_time(&nst); | ||
| 989 | wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); | 1051 | wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); |
| 990 | 1052 | ||
| 991 | /* Note that we avoid overwriting the callers status return | 1053 | /* Note that we avoid overwriting the callers status return |
| @@ -998,6 +1060,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | |||
| 998 | mlog(0, "woken, returning system status %d, user status %d\n", | 1060 | mlog(0, "woken, returning system status %d, user status %d\n", |
| 999 | ret, nsw.ns_status); | 1061 | ret, nsw.ns_status); |
| 1000 | out: | 1062 | out: |
| 1063 | o2net_debug_del_nst(&nst); /* must be before dropping sc and node */ | ||
| 1001 | if (sc) | 1064 | if (sc) |
| 1002 | sc_put(sc); | 1065 | sc_put(sc); |
| 1003 | if (vec) | 1066 | if (vec) |
| @@ -1154,23 +1217,23 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
| 1154 | * but isn't. This can ultimately cause corruption. | 1217 | * but isn't. This can ultimately cause corruption. |
| 1155 | */ | 1218 | */ |
| 1156 | if (be32_to_cpu(hand->o2net_idle_timeout_ms) != | 1219 | if (be32_to_cpu(hand->o2net_idle_timeout_ms) != |
| 1157 | o2net_idle_timeout(sc->sc_node)) { | 1220 | o2net_idle_timeout()) { |
| 1158 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " | 1221 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " |
| 1159 | "%u ms, but we use %u ms locally. disconnecting\n", | 1222 | "%u ms, but we use %u ms locally. disconnecting\n", |
| 1160 | SC_NODEF_ARGS(sc), | 1223 | SC_NODEF_ARGS(sc), |
| 1161 | be32_to_cpu(hand->o2net_idle_timeout_ms), | 1224 | be32_to_cpu(hand->o2net_idle_timeout_ms), |
| 1162 | o2net_idle_timeout(sc->sc_node)); | 1225 | o2net_idle_timeout()); |
| 1163 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1226 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
| 1164 | return -1; | 1227 | return -1; |
| 1165 | } | 1228 | } |
| 1166 | 1229 | ||
| 1167 | if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != | 1230 | if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != |
| 1168 | o2net_keepalive_delay(sc->sc_node)) { | 1231 | o2net_keepalive_delay()) { |
| 1169 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " | 1232 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " |
| 1170 | "%u ms, but we use %u ms locally. disconnecting\n", | 1233 | "%u ms, but we use %u ms locally. disconnecting\n", |
| 1171 | SC_NODEF_ARGS(sc), | 1234 | SC_NODEF_ARGS(sc), |
| 1172 | be32_to_cpu(hand->o2net_keepalive_delay_ms), | 1235 | be32_to_cpu(hand->o2net_keepalive_delay_ms), |
| 1173 | o2net_keepalive_delay(sc->sc_node)); | 1236 | o2net_keepalive_delay()); |
| 1174 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1237 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
| 1175 | return -1; | 1238 | return -1; |
| 1176 | } | 1239 | } |
| @@ -1193,6 +1256,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
| 1193 | * shut down already */ | 1256 | * shut down already */ |
| 1194 | if (nn->nn_sc == sc) { | 1257 | if (nn->nn_sc == sc) { |
| 1195 | o2net_sc_reset_idle_timer(sc); | 1258 | o2net_sc_reset_idle_timer(sc); |
| 1259 | atomic_set(&nn->nn_timeout, 0); | ||
| 1196 | o2net_set_nn_state(nn, sc, 1, 0); | 1260 | o2net_set_nn_state(nn, sc, 1, 0); |
| 1197 | } | 1261 | } |
| 1198 | spin_unlock(&nn->nn_lock); | 1262 | spin_unlock(&nn->nn_lock); |
| @@ -1347,12 +1411,11 @@ static void o2net_initialize_handshake(void) | |||
| 1347 | { | 1411 | { |
| 1348 | o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( | 1412 | o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32( |
| 1349 | O2HB_MAX_WRITE_TIMEOUT_MS); | 1413 | O2HB_MAX_WRITE_TIMEOUT_MS); |
| 1350 | o2net_hand->o2net_idle_timeout_ms = cpu_to_be32( | 1414 | o2net_hand->o2net_idle_timeout_ms = cpu_to_be32(o2net_idle_timeout()); |
| 1351 | o2net_idle_timeout(NULL)); | ||
| 1352 | o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( | 1415 | o2net_hand->o2net_keepalive_delay_ms = cpu_to_be32( |
| 1353 | o2net_keepalive_delay(NULL)); | 1416 | o2net_keepalive_delay()); |
| 1354 | o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( | 1417 | o2net_hand->o2net_reconnect_delay_ms = cpu_to_be32( |
| 1355 | o2net_reconnect_delay(NULL)); | 1418 | o2net_reconnect_delay()); |
| 1356 | } | 1419 | } |
| 1357 | 1420 | ||
| 1358 | /* ------------------------------------------------------------ */ | 1421 | /* ------------------------------------------------------------ */ |
| @@ -1391,14 +1454,15 @@ static void o2net_sc_send_keep_req(struct work_struct *work) | |||
| 1391 | static void o2net_idle_timer(unsigned long data) | 1454 | static void o2net_idle_timer(unsigned long data) |
| 1392 | { | 1455 | { |
| 1393 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; | 1456 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; |
| 1457 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); | ||
| 1394 | struct timeval now; | 1458 | struct timeval now; |
| 1395 | 1459 | ||
| 1396 | do_gettimeofday(&now); | 1460 | do_gettimeofday(&now); |
| 1397 | 1461 | ||
| 1398 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " | 1462 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " |
| 1399 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), | 1463 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), |
| 1400 | o2net_idle_timeout(sc->sc_node) / 1000, | 1464 | o2net_idle_timeout() / 1000, |
| 1401 | o2net_idle_timeout(sc->sc_node) % 1000); | 1465 | o2net_idle_timeout() % 1000); |
| 1402 | mlog(ML_NOTICE, "here are some times that might help debug the " | 1466 | mlog(ML_NOTICE, "here are some times that might help debug the " |
| 1403 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " | 1467 | "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " |
| 1404 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", | 1468 | "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", |
| @@ -1413,6 +1477,12 @@ static void o2net_idle_timer(unsigned long data) | |||
| 1413 | sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, | 1477 | sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, |
| 1414 | sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); | 1478 | sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); |
| 1415 | 1479 | ||
| 1480 | /* | ||
| 1481 | * Initialize the nn_timeout so that the next connection attempt | ||
| 1482 | * will continue in o2net_start_connect. | ||
| 1483 | */ | ||
| 1484 | atomic_set(&nn->nn_timeout, 1); | ||
| 1485 | |||
| 1416 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | 1486 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); |
| 1417 | } | 1487 | } |
| 1418 | 1488 | ||
| @@ -1420,10 +1490,10 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) | |||
| 1420 | { | 1490 | { |
| 1421 | o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); | 1491 | o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); |
| 1422 | o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, | 1492 | o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, |
| 1423 | msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node))); | 1493 | msecs_to_jiffies(o2net_keepalive_delay())); |
| 1424 | do_gettimeofday(&sc->sc_tv_timer); | 1494 | do_gettimeofday(&sc->sc_tv_timer); |
| 1425 | mod_timer(&sc->sc_idle_timeout, | 1495 | mod_timer(&sc->sc_idle_timeout, |
| 1426 | jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node))); | 1496 | jiffies + msecs_to_jiffies(o2net_idle_timeout())); |
| 1427 | } | 1497 | } |
| 1428 | 1498 | ||
| 1429 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) | 1499 | static void o2net_sc_postpone_idle(struct o2net_sock_container *sc) |
| @@ -1447,6 +1517,7 @@ static void o2net_start_connect(struct work_struct *work) | |||
| 1447 | struct socket *sock = NULL; | 1517 | struct socket *sock = NULL; |
| 1448 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; | 1518 | struct sockaddr_in myaddr = {0, }, remoteaddr = {0, }; |
| 1449 | int ret = 0, stop; | 1519 | int ret = 0, stop; |
| 1520 | unsigned int timeout; | ||
| 1450 | 1521 | ||
| 1451 | /* if we're greater we initiate tx, otherwise we accept */ | 1522 | /* if we're greater we initiate tx, otherwise we accept */ |
| 1452 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) | 1523 | if (o2nm_this_node() <= o2net_num_from_nn(nn)) |
| @@ -1466,8 +1537,17 @@ static void o2net_start_connect(struct work_struct *work) | |||
| 1466 | } | 1537 | } |
| 1467 | 1538 | ||
| 1468 | spin_lock(&nn->nn_lock); | 1539 | spin_lock(&nn->nn_lock); |
| 1469 | /* see if we already have one pending or have given up */ | 1540 | /* |
| 1470 | stop = (nn->nn_sc || nn->nn_persistent_error); | 1541 | * see if we already have one pending or have given up. |
| 1542 | * For nn_timeout, it is set when we close the connection | ||
| 1543 | * because of the idle time out. So it means that we have | ||
| 1544 | * at least connected to that node successfully once, | ||
| 1545 | * now try to connect to it again. | ||
| 1546 | */ | ||
| 1547 | timeout = atomic_read(&nn->nn_timeout); | ||
| 1548 | stop = (nn->nn_sc || | ||
| 1549 | (nn->nn_persistent_error && | ||
| 1550 | (nn->nn_persistent_error != -ENOTCONN || timeout == 0))); | ||
| 1471 | spin_unlock(&nn->nn_lock); | 1551 | spin_unlock(&nn->nn_lock); |
| 1472 | if (stop) | 1552 | if (stop) |
| 1473 | goto out; | 1553 | goto out; |
| @@ -1555,8 +1635,8 @@ static void o2net_connect_expired(struct work_struct *work) | |||
| 1555 | mlog(ML_ERROR, "no connection established with node %u after " | 1635 | mlog(ML_ERROR, "no connection established with node %u after " |
| 1556 | "%u.%u seconds, giving up and returning errors.\n", | 1636 | "%u.%u seconds, giving up and returning errors.\n", |
| 1557 | o2net_num_from_nn(nn), | 1637 | o2net_num_from_nn(nn), |
| 1558 | o2net_idle_timeout(NULL) / 1000, | 1638 | o2net_idle_timeout() / 1000, |
| 1559 | o2net_idle_timeout(NULL) % 1000); | 1639 | o2net_idle_timeout() % 1000); |
| 1560 | 1640 | ||
| 1561 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); | 1641 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); |
| 1562 | } | 1642 | } |
| @@ -1579,6 +1659,7 @@ void o2net_disconnect_node(struct o2nm_node *node) | |||
| 1579 | 1659 | ||
| 1580 | /* don't reconnect until it's heartbeating again */ | 1660 | /* don't reconnect until it's heartbeating again */ |
| 1581 | spin_lock(&nn->nn_lock); | 1661 | spin_lock(&nn->nn_lock); |
| 1662 | atomic_set(&nn->nn_timeout, 0); | ||
| 1582 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); | 1663 | o2net_set_nn_state(nn, NULL, 0, -ENOTCONN); |
| 1583 | spin_unlock(&nn->nn_lock); | 1664 | spin_unlock(&nn->nn_lock); |
| 1584 | 1665 | ||
| @@ -1610,20 +1691,15 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
| 1610 | 1691 | ||
| 1611 | /* ensure an immediate connect attempt */ | 1692 | /* ensure an immediate connect attempt */ |
| 1612 | nn->nn_last_connect_attempt = jiffies - | 1693 | nn->nn_last_connect_attempt = jiffies - |
| 1613 | (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1); | 1694 | (msecs_to_jiffies(o2net_reconnect_delay()) + 1); |
| 1614 | 1695 | ||
| 1615 | if (node_num != o2nm_this_node()) { | 1696 | if (node_num != o2nm_this_node()) { |
| 1616 | /* heartbeat doesn't work unless a local node number is | ||
| 1617 | * configured and doing so brings up the o2net_wq, so we can | ||
| 1618 | * use it.. */ | ||
| 1619 | queue_delayed_work(o2net_wq, &nn->nn_connect_expired, | ||
| 1620 | msecs_to_jiffies(o2net_idle_timeout(node))); | ||
| 1621 | |||
| 1622 | /* believe it or not, accept and node hearbeating testing | 1697 | /* believe it or not, accept and node hearbeating testing |
| 1623 | * can succeed for this node before we got here.. so | 1698 | * can succeed for this node before we got here.. so |
| 1624 | * only use set_nn_state to clear the persistent error | 1699 | * only use set_nn_state to clear the persistent error |
| 1625 | * if that hasn't already happened */ | 1700 | * if that hasn't already happened */ |
| 1626 | spin_lock(&nn->nn_lock); | 1701 | spin_lock(&nn->nn_lock); |
| 1702 | atomic_set(&nn->nn_timeout, 0); | ||
| 1627 | if (nn->nn_persistent_error) | 1703 | if (nn->nn_persistent_error) |
| 1628 | o2net_set_nn_state(nn, NULL, 0, 0); | 1704 | o2net_set_nn_state(nn, NULL, 0, 0); |
| 1629 | spin_unlock(&nn->nn_lock); | 1705 | spin_unlock(&nn->nn_lock); |
| @@ -1747,6 +1823,7 @@ static int o2net_accept_one(struct socket *sock) | |||
| 1747 | new_sock = NULL; | 1823 | new_sock = NULL; |
| 1748 | 1824 | ||
| 1749 | spin_lock(&nn->nn_lock); | 1825 | spin_lock(&nn->nn_lock); |
| 1826 | atomic_set(&nn->nn_timeout, 0); | ||
| 1750 | o2net_set_nn_state(nn, sc, 0, 0); | 1827 | o2net_set_nn_state(nn, sc, 0, 0); |
| 1751 | spin_unlock(&nn->nn_lock); | 1828 | spin_unlock(&nn->nn_lock); |
| 1752 | 1829 | ||
| @@ -1922,6 +1999,9 @@ int o2net_init(void) | |||
| 1922 | 1999 | ||
| 1923 | o2quo_init(); | 2000 | o2quo_init(); |
| 1924 | 2001 | ||
| 2002 | if (o2net_debugfs_init()) | ||
| 2003 | return -ENOMEM; | ||
| 2004 | |||
| 1925 | o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); | 2005 | o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); |
| 1926 | o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); | 2006 | o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); |
| 1927 | o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); | 2007 | o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); |
| @@ -1941,6 +2021,7 @@ int o2net_init(void) | |||
| 1941 | for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { | 2021 | for (i = 0; i < ARRAY_SIZE(o2net_nodes); i++) { |
| 1942 | struct o2net_node *nn = o2net_nn_from_num(i); | 2022 | struct o2net_node *nn = o2net_nn_from_num(i); |
| 1943 | 2023 | ||
| 2024 | atomic_set(&nn->nn_timeout, 0); | ||
| 1944 | spin_lock_init(&nn->nn_lock); | 2025 | spin_lock_init(&nn->nn_lock); |
| 1945 | INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); | 2026 | INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect); |
| 1946 | INIT_DELAYED_WORK(&nn->nn_connect_expired, | 2027 | INIT_DELAYED_WORK(&nn->nn_connect_expired, |
| @@ -1962,4 +2043,5 @@ void o2net_exit(void) | |||
| 1962 | kfree(o2net_hand); | 2043 | kfree(o2net_hand); |
| 1963 | kfree(o2net_keep_req); | 2044 | kfree(o2net_keep_req); |
| 1964 | kfree(o2net_keep_resp); | 2045 | kfree(o2net_keep_resp); |
| 2046 | o2net_debugfs_exit(); | ||
| 1965 | } | 2047 | } |
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index f36f66aab3dd..a705d5d19036 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h | |||
| @@ -117,4 +117,36 @@ int o2net_num_connected_peers(void); | |||
| 117 | int o2net_init(void); | 117 | int o2net_init(void); |
| 118 | void o2net_exit(void); | 118 | void o2net_exit(void); |
| 119 | 119 | ||
| 120 | struct o2net_send_tracking; | ||
| 121 | struct o2net_sock_container; | ||
| 122 | |||
| 123 | #ifdef CONFIG_DEBUG_FS | ||
| 124 | int o2net_debugfs_init(void); | ||
| 125 | void o2net_debugfs_exit(void); | ||
| 126 | void o2net_debug_add_nst(struct o2net_send_tracking *nst); | ||
| 127 | void o2net_debug_del_nst(struct o2net_send_tracking *nst); | ||
| 128 | void o2net_debug_add_sc(struct o2net_sock_container *sc); | ||
| 129 | void o2net_debug_del_sc(struct o2net_sock_container *sc); | ||
| 130 | #else | ||
| 131 | static int o2net_debugfs_init(void) | ||
| 132 | { | ||
| 133 | return 0; | ||
| 134 | } | ||
| 135 | static void o2net_debugfs_exit(void) | ||
| 136 | { | ||
| 137 | } | ||
| 138 | static void o2net_debug_add_nst(struct o2net_send_tracking *nst) | ||
| 139 | { | ||
| 140 | } | ||
| 141 | static void o2net_debug_del_nst(struct o2net_send_tracking *nst) | ||
| 142 | { | ||
| 143 | } | ||
| 144 | static void o2net_debug_add_sc(struct o2net_sock_container *sc) | ||
| 145 | { | ||
| 146 | } | ||
| 147 | static void o2net_debug_del_sc(struct o2net_sock_container *sc) | ||
| 148 | { | ||
| 149 | } | ||
| 150 | #endif /* CONFIG_DEBUG_FS */ | ||
| 151 | |||
| 120 | #endif /* O2CLUSTER_TCP_H */ | 152 | #endif /* O2CLUSTER_TCP_H */ |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index d25b9af28500..8d58cfe410b1 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
| @@ -95,6 +95,8 @@ struct o2net_node { | |||
| 95 | unsigned nn_sc_valid:1; | 95 | unsigned nn_sc_valid:1; |
| 96 | /* if this is set tx just returns it */ | 96 | /* if this is set tx just returns it */ |
| 97 | int nn_persistent_error; | 97 | int nn_persistent_error; |
| 98 | /* It is only set to 1 after the idle time out. */ | ||
| 99 | atomic_t nn_timeout; | ||
| 98 | 100 | ||
| 99 | /* threads waiting for an sc to arrive wait on the wq for generation | 101 | /* threads waiting for an sc to arrive wait on the wq for generation |
| 100 | * to increase. it is increased when a connecting socket succeeds | 102 | * to increase. it is increased when a connecting socket succeeds |
| @@ -164,7 +166,9 @@ struct o2net_sock_container { | |||
| 164 | /* original handlers for the sockets */ | 166 | /* original handlers for the sockets */ |
| 165 | void (*sc_state_change)(struct sock *sk); | 167 | void (*sc_state_change)(struct sock *sk); |
| 166 | void (*sc_data_ready)(struct sock *sk, int bytes); | 168 | void (*sc_data_ready)(struct sock *sk, int bytes); |
| 167 | 169 | #ifdef CONFIG_DEBUG_FS | |
| 170 | struct list_head sc_net_debug_item; | ||
| 171 | #endif | ||
| 168 | struct timeval sc_tv_timer; | 172 | struct timeval sc_tv_timer; |
| 169 | struct timeval sc_tv_data_ready; | 173 | struct timeval sc_tv_data_ready; |
| 170 | struct timeval sc_tv_advance_start; | 174 | struct timeval sc_tv_advance_start; |
| @@ -206,4 +210,24 @@ struct o2net_status_wait { | |||
| 206 | struct list_head ns_node_item; | 210 | struct list_head ns_node_item; |
| 207 | }; | 211 | }; |
| 208 | 212 | ||
| 213 | #ifdef CONFIG_DEBUG_FS | ||
| 214 | /* just for state dumps */ | ||
| 215 | struct o2net_send_tracking { | ||
| 216 | struct list_head st_net_debug_item; | ||
| 217 | struct task_struct *st_task; | ||
| 218 | struct o2net_sock_container *st_sc; | ||
| 219 | u32 st_id; | ||
| 220 | u32 st_msg_type; | ||
| 221 | u32 st_msg_key; | ||
| 222 | u8 st_node; | ||
| 223 | struct timeval st_sock_time; | ||
| 224 | struct timeval st_send_time; | ||
| 225 | struct timeval st_status_time; | ||
| 226 | }; | ||
| 227 | #else | ||
| 228 | struct o2net_send_tracking { | ||
| 229 | u32 dummy; | ||
| 230 | }; | ||
| 231 | #endif /* CONFIG_DEBUG_FS */ | ||
| 232 | |||
| 209 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ | 233 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ |
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile index ce3f7c29d270..190361375700 100644 --- a/fs/ocfs2/dlm/Makefile +++ b/fs/ocfs2/dlm/Makefile | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | EXTRA_CFLAGS += -Ifs/ocfs2 | 1 | EXTRA_CFLAGS += -Ifs/ocfs2 |
| 2 | 2 | ||
| 3 | obj-$(CONFIG_OCFS2_FS) += ocfs2_dlm.o ocfs2_dlmfs.o | 3 | obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o ocfs2_dlmfs.o |
| 4 | 4 | ||
| 5 | ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ | 5 | ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ |
| 6 | dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o | 6 | dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index dc8ea666efdb..d5a86fb81a49 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
| @@ -49,6 +49,41 @@ | |||
| 49 | /* Intended to make it easier for us to switch out hash functions */ | 49 | /* Intended to make it easier for us to switch out hash functions */ |
| 50 | #define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l) | 50 | #define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l) |
| 51 | 51 | ||
| 52 | enum dlm_mle_type { | ||
| 53 | DLM_MLE_BLOCK, | ||
| 54 | DLM_MLE_MASTER, | ||
| 55 | DLM_MLE_MIGRATION | ||
| 56 | }; | ||
| 57 | |||
| 58 | struct dlm_lock_name { | ||
| 59 | u8 len; | ||
| 60 | u8 name[DLM_LOCKID_NAME_MAX]; | ||
| 61 | }; | ||
| 62 | |||
| 63 | struct dlm_master_list_entry { | ||
| 64 | struct list_head list; | ||
| 65 | struct list_head hb_events; | ||
| 66 | struct dlm_ctxt *dlm; | ||
| 67 | spinlock_t spinlock; | ||
| 68 | wait_queue_head_t wq; | ||
| 69 | atomic_t woken; | ||
| 70 | struct kref mle_refs; | ||
| 71 | int inuse; | ||
| 72 | unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 73 | unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 74 | unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 75 | unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 76 | u8 master; | ||
| 77 | u8 new_master; | ||
| 78 | enum dlm_mle_type type; | ||
| 79 | struct o2hb_callback_func mle_hb_up; | ||
| 80 | struct o2hb_callback_func mle_hb_down; | ||
| 81 | union { | ||
| 82 | struct dlm_lock_resource *res; | ||
| 83 | struct dlm_lock_name name; | ||
| 84 | } u; | ||
| 85 | }; | ||
| 86 | |||
| 52 | enum dlm_ast_type { | 87 | enum dlm_ast_type { |
| 53 | DLM_AST = 0, | 88 | DLM_AST = 0, |
| 54 | DLM_BAST, | 89 | DLM_BAST, |
| @@ -101,6 +136,7 @@ struct dlm_ctxt | |||
| 101 | struct list_head purge_list; | 136 | struct list_head purge_list; |
| 102 | struct list_head pending_asts; | 137 | struct list_head pending_asts; |
| 103 | struct list_head pending_basts; | 138 | struct list_head pending_basts; |
| 139 | struct list_head tracking_list; | ||
| 104 | unsigned int purge_count; | 140 | unsigned int purge_count; |
| 105 | spinlock_t spinlock; | 141 | spinlock_t spinlock; |
| 106 | spinlock_t ast_lock; | 142 | spinlock_t ast_lock; |
| @@ -122,6 +158,9 @@ struct dlm_ctxt | |||
| 122 | atomic_t remote_resources; | 158 | atomic_t remote_resources; |
| 123 | atomic_t unknown_resources; | 159 | atomic_t unknown_resources; |
| 124 | 160 | ||
| 161 | struct dlm_debug_ctxt *dlm_debug_ctxt; | ||
| 162 | struct dentry *dlm_debugfs_subroot; | ||
| 163 | |||
| 125 | /* NOTE: Next three are protected by dlm_domain_lock */ | 164 | /* NOTE: Next three are protected by dlm_domain_lock */ |
| 126 | struct kref dlm_refs; | 165 | struct kref dlm_refs; |
| 127 | enum dlm_ctxt_state dlm_state; | 166 | enum dlm_ctxt_state dlm_state; |
| @@ -270,6 +309,9 @@ struct dlm_lock_resource | |||
| 270 | struct list_head dirty; | 309 | struct list_head dirty; |
| 271 | struct list_head recovering; // dlm_recovery_ctxt.resources list | 310 | struct list_head recovering; // dlm_recovery_ctxt.resources list |
| 272 | 311 | ||
| 312 | /* Added during init and removed during release */ | ||
| 313 | struct list_head tracking; /* dlm->tracking_list */ | ||
| 314 | |||
| 273 | /* unused lock resources have their last_used stamped and are | 315 | /* unused lock resources have their last_used stamped and are |
| 274 | * put on a list for the dlm thread to run. */ | 316 | * put on a list for the dlm thread to run. */ |
| 275 | unsigned long last_used; | 317 | unsigned long last_used; |
| @@ -963,9 +1005,16 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res) | |||
| 963 | DLM_LOCK_RES_MIGRATING)); | 1005 | DLM_LOCK_RES_MIGRATING)); |
| 964 | } | 1006 | } |
| 965 | 1007 | ||
| 1008 | /* create/destroy slab caches */ | ||
| 1009 | int dlm_init_master_caches(void); | ||
| 1010 | void dlm_destroy_master_caches(void); | ||
| 1011 | |||
| 1012 | int dlm_init_lock_cache(void); | ||
| 1013 | void dlm_destroy_lock_cache(void); | ||
| 966 | 1014 | ||
| 967 | int dlm_init_mle_cache(void); | 1015 | int dlm_init_mle_cache(void); |
| 968 | void dlm_destroy_mle_cache(void); | 1016 | void dlm_destroy_mle_cache(void); |
| 1017 | |||
| 969 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); | 1018 | void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up); |
| 970 | int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, | 1019 | int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, |
| 971 | struct dlm_lock_resource *res); | 1020 | struct dlm_lock_resource *res); |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 64239b37e5d4..5f6d858770a2 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * | 5 | * |
| 6 | * debug functionality for the dlm | 6 | * debug functionality for the dlm |
| 7 | * | 7 | * |
| 8 | * Copyright (C) 2004 Oracle. All rights reserved. | 8 | * Copyright (C) 2004, 2008 Oracle. All rights reserved. |
| 9 | * | 9 | * |
| 10 | * This program is free software; you can redistribute it and/or | 10 | * This program is free software; you can redistribute it and/or |
| 11 | * modify it under the terms of the GNU General Public | 11 | * modify it under the terms of the GNU General Public |
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/utsname.h> | 30 | #include <linux/utsname.h> |
| 31 | #include <linux/sysctl.h> | 31 | #include <linux/sysctl.h> |
| 32 | #include <linux/spinlock.h> | 32 | #include <linux/spinlock.h> |
| 33 | #include <linux/debugfs.h> | ||
| 33 | 34 | ||
| 34 | #include "cluster/heartbeat.h" | 35 | #include "cluster/heartbeat.h" |
| 35 | #include "cluster/nodemanager.h" | 36 | #include "cluster/nodemanager.h" |
| @@ -37,17 +38,16 @@ | |||
| 37 | 38 | ||
| 38 | #include "dlmapi.h" | 39 | #include "dlmapi.h" |
| 39 | #include "dlmcommon.h" | 40 | #include "dlmcommon.h" |
| 40 | |||
| 41 | #include "dlmdomain.h" | 41 | #include "dlmdomain.h" |
| 42 | #include "dlmdebug.h" | ||
| 42 | 43 | ||
| 43 | #define MLOG_MASK_PREFIX ML_DLM | 44 | #define MLOG_MASK_PREFIX ML_DLM |
| 44 | #include "cluster/masklog.h" | 45 | #include "cluster/masklog.h" |
| 45 | 46 | ||
| 47 | int stringify_lockname(const char *lockname, int locklen, char *buf, int len); | ||
| 48 | |||
| 46 | void dlm_print_one_lock_resource(struct dlm_lock_resource *res) | 49 | void dlm_print_one_lock_resource(struct dlm_lock_resource *res) |
| 47 | { | 50 | { |
| 48 | mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n", | ||
| 49 | res->lockname.len, res->lockname.name, | ||
| 50 | res->owner, res->state); | ||
| 51 | spin_lock(&res->spinlock); | 51 | spin_lock(&res->spinlock); |
| 52 | __dlm_print_one_lock_resource(res); | 52 | __dlm_print_one_lock_resource(res); |
| 53 | spin_unlock(&res->spinlock); | 53 | spin_unlock(&res->spinlock); |
| @@ -58,7 +58,7 @@ static void dlm_print_lockres_refmap(struct dlm_lock_resource *res) | |||
| 58 | int bit; | 58 | int bit; |
| 59 | assert_spin_locked(&res->spinlock); | 59 | assert_spin_locked(&res->spinlock); |
| 60 | 60 | ||
| 61 | mlog(ML_NOTICE, " refmap nodes: [ "); | 61 | printk(" refmap nodes: [ "); |
| 62 | bit = 0; | 62 | bit = 0; |
| 63 | while (1) { | 63 | while (1) { |
| 64 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); | 64 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); |
| @@ -70,63 +70,66 @@ static void dlm_print_lockres_refmap(struct dlm_lock_resource *res) | |||
| 70 | printk("], inflight=%u\n", res->inflight_locks); | 70 | printk("], inflight=%u\n", res->inflight_locks); |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | static void __dlm_print_lock(struct dlm_lock *lock) | ||
| 74 | { | ||
| 75 | spin_lock(&lock->spinlock); | ||
| 76 | |||
| 77 | printk(" type=%d, conv=%d, node=%u, cookie=%u:%llu, " | ||
| 78 | "ref=%u, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c), " | ||
| 79 | "pending=(conv=%c,lock=%c,cancel=%c,unlock=%c)\n", | ||
| 80 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | ||
| 81 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
| 82 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
| 83 | atomic_read(&lock->lock_refs.refcount), | ||
| 84 | (list_empty(&lock->ast_list) ? 'y' : 'n'), | ||
| 85 | (lock->ast_pending ? 'y' : 'n'), | ||
| 86 | (list_empty(&lock->bast_list) ? 'y' : 'n'), | ||
| 87 | (lock->bast_pending ? 'y' : 'n'), | ||
| 88 | (lock->convert_pending ? 'y' : 'n'), | ||
| 89 | (lock->lock_pending ? 'y' : 'n'), | ||
| 90 | (lock->cancel_pending ? 'y' : 'n'), | ||
| 91 | (lock->unlock_pending ? 'y' : 'n')); | ||
| 92 | |||
| 93 | spin_unlock(&lock->spinlock); | ||
| 94 | } | ||
| 95 | |||
| 73 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) | 96 | void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) |
| 74 | { | 97 | { |
| 75 | struct list_head *iter2; | 98 | struct list_head *iter2; |
| 76 | struct dlm_lock *lock; | 99 | struct dlm_lock *lock; |
| 100 | char buf[DLM_LOCKID_NAME_MAX]; | ||
| 77 | 101 | ||
| 78 | assert_spin_locked(&res->spinlock); | 102 | assert_spin_locked(&res->spinlock); |
| 79 | 103 | ||
| 80 | mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n", | 104 | stringify_lockname(res->lockname.name, res->lockname.len, |
| 81 | res->lockname.len, res->lockname.name, | 105 | buf, sizeof(buf) - 1); |
| 82 | res->owner, res->state); | 106 | printk("lockres: %s, owner=%u, state=%u\n", |
| 83 | mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n", | 107 | buf, res->owner, res->state); |
| 84 | res->last_used, list_empty(&res->purge) ? "no" : "yes"); | 108 | printk(" last used: %lu, refcnt: %u, on purge list: %s\n", |
| 109 | res->last_used, atomic_read(&res->refs.refcount), | ||
| 110 | list_empty(&res->purge) ? "no" : "yes"); | ||
| 111 | printk(" on dirty list: %s, on reco list: %s, " | ||
| 112 | "migrating pending: %s\n", | ||
| 113 | list_empty(&res->dirty) ? "no" : "yes", | ||
| 114 | list_empty(&res->recovering) ? "no" : "yes", | ||
| 115 | res->migration_pending ? "yes" : "no"); | ||
| 116 | printk(" inflight locks: %d, asts reserved: %d\n", | ||
| 117 | res->inflight_locks, atomic_read(&res->asts_reserved)); | ||
| 85 | dlm_print_lockres_refmap(res); | 118 | dlm_print_lockres_refmap(res); |
| 86 | mlog(ML_NOTICE, " granted queue: \n"); | 119 | printk(" granted queue:\n"); |
| 87 | list_for_each(iter2, &res->granted) { | 120 | list_for_each(iter2, &res->granted) { |
| 88 | lock = list_entry(iter2, struct dlm_lock, list); | 121 | lock = list_entry(iter2, struct dlm_lock, list); |
| 89 | spin_lock(&lock->spinlock); | 122 | __dlm_print_lock(lock); |
| 90 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | ||
| 91 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | ||
| 92 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | ||
| 93 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
| 94 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
| 95 | list_empty(&lock->ast_list) ? 'y' : 'n', | ||
| 96 | lock->ast_pending ? 'y' : 'n', | ||
| 97 | list_empty(&lock->bast_list) ? 'y' : 'n', | ||
| 98 | lock->bast_pending ? 'y' : 'n'); | ||
| 99 | spin_unlock(&lock->spinlock); | ||
| 100 | } | 123 | } |
| 101 | mlog(ML_NOTICE, " converting queue: \n"); | 124 | printk(" converting queue:\n"); |
| 102 | list_for_each(iter2, &res->converting) { | 125 | list_for_each(iter2, &res->converting) { |
| 103 | lock = list_entry(iter2, struct dlm_lock, list); | 126 | lock = list_entry(iter2, struct dlm_lock, list); |
| 104 | spin_lock(&lock->spinlock); | 127 | __dlm_print_lock(lock); |
| 105 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | ||
| 106 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | ||
| 107 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | ||
| 108 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
| 109 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
| 110 | list_empty(&lock->ast_list) ? 'y' : 'n', | ||
| 111 | lock->ast_pending ? 'y' : 'n', | ||
| 112 | list_empty(&lock->bast_list) ? 'y' : 'n', | ||
| 113 | lock->bast_pending ? 'y' : 'n'); | ||
| 114 | spin_unlock(&lock->spinlock); | ||
| 115 | } | 128 | } |
| 116 | mlog(ML_NOTICE, " blocked queue: \n"); | 129 | printk(" blocked queue:\n"); |
| 117 | list_for_each(iter2, &res->blocked) { | 130 | list_for_each(iter2, &res->blocked) { |
| 118 | lock = list_entry(iter2, struct dlm_lock, list); | 131 | lock = list_entry(iter2, struct dlm_lock, list); |
| 119 | spin_lock(&lock->spinlock); | 132 | __dlm_print_lock(lock); |
| 120 | mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " | ||
| 121 | "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", | ||
| 122 | lock->ml.type, lock->ml.convert_type, lock->ml.node, | ||
| 123 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
| 124 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
| 125 | list_empty(&lock->ast_list) ? 'y' : 'n', | ||
| 126 | lock->ast_pending ? 'y' : 'n', | ||
| 127 | list_empty(&lock->bast_list) ? 'y' : 'n', | ||
| 128 | lock->bast_pending ? 'y' : 'n'); | ||
| 129 | spin_unlock(&lock->spinlock); | ||
| 130 | } | 133 | } |
| 131 | } | 134 | } |
| 132 | 135 | ||
| @@ -136,31 +139,6 @@ void dlm_print_one_lock(struct dlm_lock *lockid) | |||
| 136 | } | 139 | } |
| 137 | EXPORT_SYMBOL_GPL(dlm_print_one_lock); | 140 | EXPORT_SYMBOL_GPL(dlm_print_one_lock); |
| 138 | 141 | ||
| 139 | #if 0 | ||
| 140 | void dlm_dump_lock_resources(struct dlm_ctxt *dlm) | ||
| 141 | { | ||
| 142 | struct dlm_lock_resource *res; | ||
| 143 | struct hlist_node *iter; | ||
| 144 | struct hlist_head *bucket; | ||
| 145 | int i; | ||
| 146 | |||
| 147 | mlog(ML_NOTICE, "struct dlm_ctxt: %s, node=%u, key=%u\n", | ||
| 148 | dlm->name, dlm->node_num, dlm->key); | ||
| 149 | if (!dlm || !dlm->name) { | ||
| 150 | mlog(ML_ERROR, "dlm=%p\n", dlm); | ||
| 151 | return; | ||
| 152 | } | ||
| 153 | |||
| 154 | spin_lock(&dlm->spinlock); | ||
| 155 | for (i=0; i<DLM_HASH_BUCKETS; i++) { | ||
| 156 | bucket = dlm_lockres_hash(dlm, i); | ||
| 157 | hlist_for_each_entry(res, iter, bucket, hash_node) | ||
| 158 | dlm_print_one_lock_resource(res); | ||
| 159 | } | ||
| 160 | spin_unlock(&dlm->spinlock); | ||
| 161 | } | ||
| 162 | #endif /* 0 */ | ||
| 163 | |||
| 164 | static const char *dlm_errnames[] = { | 142 | static const char *dlm_errnames[] = { |
| 165 | [DLM_NORMAL] = "DLM_NORMAL", | 143 | [DLM_NORMAL] = "DLM_NORMAL", |
| 166 | [DLM_GRANTED] = "DLM_GRANTED", | 144 | [DLM_GRANTED] = "DLM_GRANTED", |
| @@ -266,3 +244,792 @@ const char *dlm_errname(enum dlm_status err) | |||
| 266 | return dlm_errnames[err]; | 244 | return dlm_errnames[err]; |
| 267 | } | 245 | } |
| 268 | EXPORT_SYMBOL_GPL(dlm_errname); | 246 | EXPORT_SYMBOL_GPL(dlm_errname); |
| 247 | |||
| 248 | /* NOTE: This function converts a lockname into a string. It uses knowledge | ||
| 249 | * of the format of the lockname that should be outside the purview of the dlm. | ||
| 250 | * We are adding only to make dlm debugging slightly easier. | ||
| 251 | * | ||
| 252 | * For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h. | ||
| 253 | */ | ||
| 254 | int stringify_lockname(const char *lockname, int locklen, char *buf, int len) | ||
| 255 | { | ||
| 256 | int out = 0; | ||
| 257 | __be64 inode_blkno_be; | ||
| 258 | |||
| 259 | #define OCFS2_DENTRY_LOCK_INO_START 18 | ||
| 260 | if (*lockname == 'N') { | ||
| 261 | memcpy((__be64 *)&inode_blkno_be, | ||
| 262 | (char *)&lockname[OCFS2_DENTRY_LOCK_INO_START], | ||
| 263 | sizeof(__be64)); | ||
| 264 | out += snprintf(buf + out, len - out, "%.*s%08x", | ||
| 265 | OCFS2_DENTRY_LOCK_INO_START - 1, lockname, | ||
| 266 | (unsigned int)be64_to_cpu(inode_blkno_be)); | ||
| 267 | } else | ||
| 268 | out += snprintf(buf + out, len - out, "%.*s", | ||
| 269 | locklen, lockname); | ||
| 270 | return out; | ||
| 271 | } | ||
| 272 | |||
| 273 | static int stringify_nodemap(unsigned long *nodemap, int maxnodes, | ||
| 274 | char *buf, int len) | ||
| 275 | { | ||
| 276 | int out = 0; | ||
| 277 | int i = -1; | ||
| 278 | |||
| 279 | while ((i = find_next_bit(nodemap, maxnodes, i + 1)) < maxnodes) | ||
| 280 | out += snprintf(buf + out, len - out, "%d ", i); | ||
| 281 | |||
| 282 | return out; | ||
| 283 | } | ||
| 284 | |||
| 285 | static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len) | ||
| 286 | { | ||
| 287 | int out = 0; | ||
| 288 | unsigned int namelen; | ||
| 289 | const char *name; | ||
| 290 | char *mle_type; | ||
| 291 | |||
| 292 | if (mle->type != DLM_MLE_MASTER) { | ||
| 293 | namelen = mle->u.name.len; | ||
| 294 | name = mle->u.name.name; | ||
| 295 | } else { | ||
| 296 | namelen = mle->u.res->lockname.len; | ||
| 297 | name = mle->u.res->lockname.name; | ||
| 298 | } | ||
| 299 | |||
| 300 | if (mle->type == DLM_MLE_BLOCK) | ||
| 301 | mle_type = "BLK"; | ||
| 302 | else if (mle->type == DLM_MLE_MASTER) | ||
| 303 | mle_type = "MAS"; | ||
| 304 | else | ||
| 305 | mle_type = "MIG"; | ||
| 306 | |||
| 307 | out += stringify_lockname(name, namelen, buf + out, len - out); | ||
| 308 | out += snprintf(buf + out, len - out, | ||
| 309 | "\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n", | ||
| 310 | mle_type, mle->master, mle->new_master, | ||
| 311 | !list_empty(&mle->hb_events), | ||
| 312 | !!mle->inuse, | ||
| 313 | atomic_read(&mle->mle_refs.refcount)); | ||
| 314 | |||
| 315 | out += snprintf(buf + out, len - out, "Maybe="); | ||
| 316 | out += stringify_nodemap(mle->maybe_map, O2NM_MAX_NODES, | ||
| 317 | buf + out, len - out); | ||
| 318 | out += snprintf(buf + out, len - out, "\n"); | ||
| 319 | |||
| 320 | out += snprintf(buf + out, len - out, "Vote="); | ||
| 321 | out += stringify_nodemap(mle->vote_map, O2NM_MAX_NODES, | ||
| 322 | buf + out, len - out); | ||
| 323 | out += snprintf(buf + out, len - out, "\n"); | ||
| 324 | |||
| 325 | out += snprintf(buf + out, len - out, "Response="); | ||
| 326 | out += stringify_nodemap(mle->response_map, O2NM_MAX_NODES, | ||
| 327 | buf + out, len - out); | ||
| 328 | out += snprintf(buf + out, len - out, "\n"); | ||
| 329 | |||
| 330 | out += snprintf(buf + out, len - out, "Node="); | ||
| 331 | out += stringify_nodemap(mle->node_map, O2NM_MAX_NODES, | ||
| 332 | buf + out, len - out); | ||
| 333 | out += snprintf(buf + out, len - out, "\n"); | ||
| 334 | |||
| 335 | out += snprintf(buf + out, len - out, "\n"); | ||
| 336 | |||
| 337 | return out; | ||
| 338 | } | ||
| 339 | |||
| 340 | void dlm_print_one_mle(struct dlm_master_list_entry *mle) | ||
| 341 | { | ||
| 342 | char *buf; | ||
| 343 | |||
| 344 | buf = (char *) get_zeroed_page(GFP_NOFS); | ||
| 345 | if (buf) { | ||
| 346 | dump_mle(mle, buf, PAGE_SIZE - 1); | ||
| 347 | free_page((unsigned long)buf); | ||
| 348 | } | ||
| 349 | } | ||
| 350 | |||
| 351 | #ifdef CONFIG_DEBUG_FS | ||
| 352 | |||
| 353 | static struct dentry *dlm_debugfs_root = NULL; | ||
| 354 | |||
| 355 | #define DLM_DEBUGFS_DIR "o2dlm" | ||
| 356 | #define DLM_DEBUGFS_DLM_STATE "dlm_state" | ||
| 357 | #define DLM_DEBUGFS_LOCKING_STATE "locking_state" | ||
| 358 | #define DLM_DEBUGFS_MLE_STATE "mle_state" | ||
| 359 | #define DLM_DEBUGFS_PURGE_LIST "purge_list" | ||
| 360 | |||
| 361 | /* begin - utils funcs */ | ||
| 362 | static void dlm_debug_free(struct kref *kref) | ||
| 363 | { | ||
| 364 | struct dlm_debug_ctxt *dc; | ||
| 365 | |||
| 366 | dc = container_of(kref, struct dlm_debug_ctxt, debug_refcnt); | ||
| 367 | |||
| 368 | kfree(dc); | ||
| 369 | } | ||
| 370 | |||
| 371 | void dlm_debug_put(struct dlm_debug_ctxt *dc) | ||
| 372 | { | ||
| 373 | if (dc) | ||
| 374 | kref_put(&dc->debug_refcnt, dlm_debug_free); | ||
| 375 | } | ||
| 376 | |||
| 377 | static void dlm_debug_get(struct dlm_debug_ctxt *dc) | ||
| 378 | { | ||
| 379 | kref_get(&dc->debug_refcnt); | ||
| 380 | } | ||
| 381 | |||
| 382 | static struct debug_buffer *debug_buffer_allocate(void) | ||
| 383 | { | ||
| 384 | struct debug_buffer *db = NULL; | ||
| 385 | |||
| 386 | db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL); | ||
| 387 | if (!db) | ||
| 388 | goto bail; | ||
| 389 | |||
| 390 | db->len = PAGE_SIZE; | ||
| 391 | db->buf = kmalloc(db->len, GFP_KERNEL); | ||
| 392 | if (!db->buf) | ||
| 393 | goto bail; | ||
| 394 | |||
| 395 | return db; | ||
| 396 | bail: | ||
| 397 | kfree(db); | ||
| 398 | return NULL; | ||
| 399 | } | ||
| 400 | |||
| 401 | static ssize_t debug_buffer_read(struct file *file, char __user *buf, | ||
| 402 | size_t nbytes, loff_t *ppos) | ||
| 403 | { | ||
| 404 | struct debug_buffer *db = file->private_data; | ||
| 405 | |||
| 406 | return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len); | ||
| 407 | } | ||
| 408 | |||
| 409 | static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence) | ||
| 410 | { | ||
| 411 | struct debug_buffer *db = file->private_data; | ||
| 412 | loff_t new = -1; | ||
| 413 | |||
| 414 | switch (whence) { | ||
| 415 | case 0: | ||
| 416 | new = off; | ||
| 417 | break; | ||
| 418 | case 1: | ||
| 419 | new = file->f_pos + off; | ||
| 420 | break; | ||
| 421 | } | ||
| 422 | |||
| 423 | if (new < 0 || new > db->len) | ||
| 424 | return -EINVAL; | ||
| 425 | |||
| 426 | return (file->f_pos = new); | ||
| 427 | } | ||
| 428 | |||
| 429 | static int debug_buffer_release(struct inode *inode, struct file *file) | ||
| 430 | { | ||
| 431 | struct debug_buffer *db = (struct debug_buffer *)file->private_data; | ||
| 432 | |||
| 433 | if (db) | ||
| 434 | kfree(db->buf); | ||
| 435 | kfree(db); | ||
| 436 | |||
| 437 | return 0; | ||
| 438 | } | ||
| 439 | /* end - util funcs */ | ||
| 440 | |||
| 441 | /* begin - purge list funcs */ | ||
| 442 | static int debug_purgelist_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | ||
| 443 | { | ||
| 444 | struct dlm_lock_resource *res; | ||
| 445 | int out = 0; | ||
| 446 | unsigned long total = 0; | ||
| 447 | |||
| 448 | out += snprintf(db->buf + out, db->len - out, | ||
| 449 | "Dumping Purgelist for Domain: %s\n", dlm->name); | ||
| 450 | |||
| 451 | spin_lock(&dlm->spinlock); | ||
| 452 | list_for_each_entry(res, &dlm->purge_list, purge) { | ||
| 453 | ++total; | ||
| 454 | if (db->len - out < 100) | ||
| 455 | continue; | ||
| 456 | spin_lock(&res->spinlock); | ||
| 457 | out += stringify_lockname(res->lockname.name, | ||
| 458 | res->lockname.len, | ||
| 459 | db->buf + out, db->len - out); | ||
| 460 | out += snprintf(db->buf + out, db->len - out, "\t%ld\n", | ||
| 461 | (jiffies - res->last_used)/HZ); | ||
| 462 | spin_unlock(&res->spinlock); | ||
| 463 | } | ||
| 464 | spin_unlock(&dlm->spinlock); | ||
| 465 | |||
| 466 | out += snprintf(db->buf + out, db->len - out, | ||
| 467 | "Total on list: %ld\n", total); | ||
| 468 | |||
| 469 | return out; | ||
| 470 | } | ||
| 471 | |||
| 472 | static int debug_purgelist_open(struct inode *inode, struct file *file) | ||
| 473 | { | ||
| 474 | struct dlm_ctxt *dlm = inode->i_private; | ||
| 475 | struct debug_buffer *db; | ||
| 476 | |||
| 477 | db = debug_buffer_allocate(); | ||
| 478 | if (!db) | ||
| 479 | goto bail; | ||
| 480 | |||
| 481 | db->len = debug_purgelist_print(dlm, db); | ||
| 482 | |||
| 483 | file->private_data = db; | ||
| 484 | |||
| 485 | return 0; | ||
| 486 | bail: | ||
| 487 | return -ENOMEM; | ||
| 488 | } | ||
| 489 | |||
| 490 | static struct file_operations debug_purgelist_fops = { | ||
| 491 | .open = debug_purgelist_open, | ||
| 492 | .release = debug_buffer_release, | ||
| 493 | .read = debug_buffer_read, | ||
| 494 | .llseek = debug_buffer_llseek, | ||
| 495 | }; | ||
| 496 | /* end - purge list funcs */ | ||
| 497 | |||
| 498 | /* begin - debug mle funcs */ | ||
| 499 | static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | ||
| 500 | { | ||
| 501 | struct dlm_master_list_entry *mle; | ||
| 502 | int out = 0; | ||
| 503 | unsigned long total = 0; | ||
| 504 | |||
| 505 | out += snprintf(db->buf + out, db->len - out, | ||
| 506 | "Dumping MLEs for Domain: %s\n", dlm->name); | ||
| 507 | |||
| 508 | spin_lock(&dlm->master_lock); | ||
| 509 | list_for_each_entry(mle, &dlm->master_list, list) { | ||
| 510 | ++total; | ||
| 511 | if (db->len - out < 200) | ||
| 512 | continue; | ||
| 513 | out += dump_mle(mle, db->buf + out, db->len - out); | ||
| 514 | } | ||
| 515 | spin_unlock(&dlm->master_lock); | ||
| 516 | |||
| 517 | out += snprintf(db->buf + out, db->len - out, | ||
| 518 | "Total on list: %ld\n", total); | ||
| 519 | return out; | ||
| 520 | } | ||
| 521 | |||
| 522 | static int debug_mle_open(struct inode *inode, struct file *file) | ||
| 523 | { | ||
| 524 | struct dlm_ctxt *dlm = inode->i_private; | ||
| 525 | struct debug_buffer *db; | ||
| 526 | |||
| 527 | db = debug_buffer_allocate(); | ||
| 528 | if (!db) | ||
| 529 | goto bail; | ||
| 530 | |||
| 531 | db->len = debug_mle_print(dlm, db); | ||
| 532 | |||
| 533 | file->private_data = db; | ||
| 534 | |||
| 535 | return 0; | ||
| 536 | bail: | ||
| 537 | return -ENOMEM; | ||
| 538 | } | ||
| 539 | |||
| 540 | static struct file_operations debug_mle_fops = { | ||
| 541 | .open = debug_mle_open, | ||
| 542 | .release = debug_buffer_release, | ||
| 543 | .read = debug_buffer_read, | ||
| 544 | .llseek = debug_buffer_llseek, | ||
| 545 | }; | ||
| 546 | |||
| 547 | /* end - debug mle funcs */ | ||
| 548 | |||
| 549 | /* begin - debug lockres funcs */ | ||
| 550 | static int dump_lock(struct dlm_lock *lock, int list_type, char *buf, int len) | ||
| 551 | { | ||
| 552 | int out; | ||
| 553 | |||
| 554 | #define DEBUG_LOCK_VERSION 1 | ||
| 555 | spin_lock(&lock->spinlock); | ||
| 556 | out = snprintf(buf, len, "LOCK:%d,%d,%d,%d,%d,%d:%lld,%d,%d,%d,%d,%d," | ||
| 557 | "%d,%d,%d,%d\n", | ||
| 558 | DEBUG_LOCK_VERSION, | ||
| 559 | list_type, lock->ml.type, lock->ml.convert_type, | ||
| 560 | lock->ml.node, | ||
| 561 | dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), | ||
| 562 | dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), | ||
| 563 | !list_empty(&lock->ast_list), | ||
| 564 | !list_empty(&lock->bast_list), | ||
| 565 | lock->ast_pending, lock->bast_pending, | ||
| 566 | lock->convert_pending, lock->lock_pending, | ||
| 567 | lock->cancel_pending, lock->unlock_pending, | ||
| 568 | atomic_read(&lock->lock_refs.refcount)); | ||
| 569 | spin_unlock(&lock->spinlock); | ||
| 570 | |||
| 571 | return out; | ||
| 572 | } | ||
| 573 | |||
| 574 | static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len) | ||
| 575 | { | ||
| 576 | struct dlm_lock *lock; | ||
| 577 | int i; | ||
| 578 | int out = 0; | ||
| 579 | |||
| 580 | out += snprintf(buf + out, len - out, "NAME:"); | ||
| 581 | out += stringify_lockname(res->lockname.name, res->lockname.len, | ||
| 582 | buf + out, len - out); | ||
| 583 | out += snprintf(buf + out, len - out, "\n"); | ||
| 584 | |||
| 585 | #define DEBUG_LRES_VERSION 1 | ||
| 586 | out += snprintf(buf + out, len - out, | ||
| 587 | "LRES:%d,%d,%d,%ld,%d,%d,%d,%d,%d,%d,%d\n", | ||
| 588 | DEBUG_LRES_VERSION, | ||
| 589 | res->owner, res->state, res->last_used, | ||
| 590 | !list_empty(&res->purge), | ||
| 591 | !list_empty(&res->dirty), | ||
| 592 | !list_empty(&res->recovering), | ||
| 593 | res->inflight_locks, res->migration_pending, | ||
| 594 | atomic_read(&res->asts_reserved), | ||
| 595 | atomic_read(&res->refs.refcount)); | ||
| 596 | |||
| 597 | /* refmap */ | ||
| 598 | out += snprintf(buf + out, len - out, "RMAP:"); | ||
| 599 | out += stringify_nodemap(res->refmap, O2NM_MAX_NODES, | ||
| 600 | buf + out, len - out); | ||
| 601 | out += snprintf(buf + out, len - out, "\n"); | ||
| 602 | |||
| 603 | /* lvb */ | ||
| 604 | out += snprintf(buf + out, len - out, "LVBX:"); | ||
| 605 | for (i = 0; i < DLM_LVB_LEN; i++) | ||
| 606 | out += snprintf(buf + out, len - out, | ||
| 607 | "%02x", (unsigned char)res->lvb[i]); | ||
| 608 | out += snprintf(buf + out, len - out, "\n"); | ||
| 609 | |||
| 610 | /* granted */ | ||
| 611 | list_for_each_entry(lock, &res->granted, list) | ||
| 612 | out += dump_lock(lock, 0, buf + out, len - out); | ||
| 613 | |||
| 614 | /* converting */ | ||
| 615 | list_for_each_entry(lock, &res->converting, list) | ||
| 616 | out += dump_lock(lock, 1, buf + out, len - out); | ||
| 617 | |||
| 618 | /* blocked */ | ||
| 619 | list_for_each_entry(lock, &res->blocked, list) | ||
| 620 | out += dump_lock(lock, 2, buf + out, len - out); | ||
| 621 | |||
| 622 | out += snprintf(buf + out, len - out, "\n"); | ||
| 623 | |||
| 624 | return out; | ||
| 625 | } | ||
| 626 | |||
| 627 | static void *lockres_seq_start(struct seq_file *m, loff_t *pos) | ||
| 628 | { | ||
| 629 | struct debug_lockres *dl = m->private; | ||
| 630 | struct dlm_ctxt *dlm = dl->dl_ctxt; | ||
| 631 | struct dlm_lock_resource *res = NULL; | ||
| 632 | |||
| 633 | spin_lock(&dlm->spinlock); | ||
| 634 | |||
| 635 | if (dl->dl_res) { | ||
| 636 | list_for_each_entry(res, &dl->dl_res->tracking, tracking) { | ||
| 637 | if (dl->dl_res) { | ||
| 638 | dlm_lockres_put(dl->dl_res); | ||
| 639 | dl->dl_res = NULL; | ||
| 640 | } | ||
| 641 | if (&res->tracking == &dlm->tracking_list) { | ||
| 642 | mlog(0, "End of list found, %p\n", res); | ||
| 643 | dl = NULL; | ||
| 644 | break; | ||
| 645 | } | ||
| 646 | dlm_lockres_get(res); | ||
| 647 | dl->dl_res = res; | ||
| 648 | break; | ||
| 649 | } | ||
| 650 | } else { | ||
| 651 | if (!list_empty(&dlm->tracking_list)) { | ||
| 652 | list_for_each_entry(res, &dlm->tracking_list, tracking) | ||
| 653 | break; | ||
| 654 | dlm_lockres_get(res); | ||
| 655 | dl->dl_res = res; | ||
| 656 | } else | ||
| 657 | dl = NULL; | ||
| 658 | } | ||
| 659 | |||
| 660 | if (dl) { | ||
| 661 | spin_lock(&dl->dl_res->spinlock); | ||
| 662 | dump_lockres(dl->dl_res, dl->dl_buf, dl->dl_len - 1); | ||
| 663 | spin_unlock(&dl->dl_res->spinlock); | ||
| 664 | } | ||
| 665 | |||
| 666 | spin_unlock(&dlm->spinlock); | ||
| 667 | |||
| 668 | return dl; | ||
| 669 | } | ||
| 670 | |||
| 671 | static void lockres_seq_stop(struct seq_file *m, void *v) | ||
| 672 | { | ||
| 673 | } | ||
| 674 | |||
| 675 | static void *lockres_seq_next(struct seq_file *m, void *v, loff_t *pos) | ||
| 676 | { | ||
| 677 | return NULL; | ||
| 678 | } | ||
| 679 | |||
| 680 | static int lockres_seq_show(struct seq_file *s, void *v) | ||
| 681 | { | ||
| 682 | struct debug_lockres *dl = (struct debug_lockres *)v; | ||
| 683 | |||
| 684 | seq_printf(s, "%s", dl->dl_buf); | ||
| 685 | |||
| 686 | return 0; | ||
| 687 | } | ||
| 688 | |||
| 689 | static struct seq_operations debug_lockres_ops = { | ||
| 690 | .start = lockres_seq_start, | ||
| 691 | .stop = lockres_seq_stop, | ||
| 692 | .next = lockres_seq_next, | ||
| 693 | .show = lockres_seq_show, | ||
| 694 | }; | ||
| 695 | |||
| 696 | static int debug_lockres_open(struct inode *inode, struct file *file) | ||
| 697 | { | ||
| 698 | struct dlm_ctxt *dlm = inode->i_private; | ||
| 699 | int ret = -ENOMEM; | ||
| 700 | struct seq_file *seq; | ||
| 701 | struct debug_lockres *dl = NULL; | ||
| 702 | |||
| 703 | dl = kzalloc(sizeof(struct debug_lockres), GFP_KERNEL); | ||
| 704 | if (!dl) { | ||
| 705 | mlog_errno(ret); | ||
| 706 | goto bail; | ||
| 707 | } | ||
| 708 | |||
| 709 | dl->dl_len = PAGE_SIZE; | ||
| 710 | dl->dl_buf = kmalloc(dl->dl_len, GFP_KERNEL); | ||
| 711 | if (!dl->dl_buf) { | ||
| 712 | mlog_errno(ret); | ||
| 713 | goto bail; | ||
| 714 | } | ||
| 715 | |||
| 716 | ret = seq_open(file, &debug_lockres_ops); | ||
| 717 | if (ret) { | ||
| 718 | mlog_errno(ret); | ||
| 719 | goto bail; | ||
| 720 | } | ||
| 721 | |||
| 722 | seq = (struct seq_file *) file->private_data; | ||
| 723 | seq->private = dl; | ||
| 724 | |||
| 725 | dlm_grab(dlm); | ||
| 726 | dl->dl_ctxt = dlm; | ||
| 727 | |||
| 728 | return 0; | ||
| 729 | bail: | ||
| 730 | if (dl) | ||
| 731 | kfree(dl->dl_buf); | ||
| 732 | kfree(dl); | ||
| 733 | return ret; | ||
| 734 | } | ||
| 735 | |||
| 736 | static int debug_lockres_release(struct inode *inode, struct file *file) | ||
| 737 | { | ||
| 738 | struct seq_file *seq = (struct seq_file *)file->private_data; | ||
| 739 | struct debug_lockres *dl = (struct debug_lockres *)seq->private; | ||
| 740 | |||
| 741 | if (dl->dl_res) | ||
| 742 | dlm_lockres_put(dl->dl_res); | ||
| 743 | dlm_put(dl->dl_ctxt); | ||
| 744 | kfree(dl->dl_buf); | ||
| 745 | return seq_release_private(inode, file); | ||
| 746 | } | ||
| 747 | |||
| 748 | static struct file_operations debug_lockres_fops = { | ||
| 749 | .open = debug_lockres_open, | ||
| 750 | .release = debug_lockres_release, | ||
| 751 | .read = seq_read, | ||
| 752 | .llseek = seq_lseek, | ||
| 753 | }; | ||
| 754 | /* end - debug lockres funcs */ | ||
| 755 | |||
| 756 | /* begin - debug state funcs */ | ||
| 757 | static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | ||
| 758 | { | ||
| 759 | int out = 0; | ||
| 760 | struct dlm_reco_node_data *node; | ||
| 761 | char *state; | ||
| 762 | int lres, rres, ures, tres; | ||
| 763 | |||
| 764 | lres = atomic_read(&dlm->local_resources); | ||
| 765 | rres = atomic_read(&dlm->remote_resources); | ||
| 766 | ures = atomic_read(&dlm->unknown_resources); | ||
| 767 | tres = lres + rres + ures; | ||
| 768 | |||
| 769 | spin_lock(&dlm->spinlock); | ||
| 770 | |||
| 771 | switch (dlm->dlm_state) { | ||
| 772 | case DLM_CTXT_NEW: | ||
| 773 | state = "NEW"; break; | ||
| 774 | case DLM_CTXT_JOINED: | ||
| 775 | state = "JOINED"; break; | ||
| 776 | case DLM_CTXT_IN_SHUTDOWN: | ||
| 777 | state = "SHUTDOWN"; break; | ||
| 778 | case DLM_CTXT_LEAVING: | ||
| 779 | state = "LEAVING"; break; | ||
| 780 | default: | ||
| 781 | state = "UNKNOWN"; break; | ||
| 782 | } | ||
| 783 | |||
| 784 | /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ | ||
| 785 | out += snprintf(db->buf + out, db->len - out, | ||
| 786 | "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key); | ||
| 787 | |||
| 788 | /* Thread Pid: xxx Node: xxx State: xxxxx */ | ||
| 789 | out += snprintf(db->buf + out, db->len - out, | ||
| 790 | "Thread Pid: %d Node: %d State: %s\n", | ||
| 791 | dlm->dlm_thread_task->pid, dlm->node_num, state); | ||
| 792 | |||
| 793 | /* Number of Joins: xxx Joining Node: xxx */ | ||
| 794 | out += snprintf(db->buf + out, db->len - out, | ||
| 795 | "Number of Joins: %d Joining Node: %d\n", | ||
| 796 | dlm->num_joins, dlm->joining_node); | ||
| 797 | |||
| 798 | /* Domain Map: xx xx xx */ | ||
| 799 | out += snprintf(db->buf + out, db->len - out, "Domain Map: "); | ||
| 800 | out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES, | ||
| 801 | db->buf + out, db->len - out); | ||
| 802 | out += snprintf(db->buf + out, db->len - out, "\n"); | ||
| 803 | |||
| 804 | /* Live Map: xx xx xx */ | ||
| 805 | out += snprintf(db->buf + out, db->len - out, "Live Map: "); | ||
| 806 | out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, | ||
| 807 | db->buf + out, db->len - out); | ||
| 808 | out += snprintf(db->buf + out, db->len - out, "\n"); | ||
| 809 | |||
| 810 | /* Mastered Resources Total: xxx Locally: xxx Remotely: ... */ | ||
| 811 | out += snprintf(db->buf + out, db->len - out, | ||
| 812 | "Mastered Resources Total: %d Locally: %d " | ||
| 813 | "Remotely: %d Unknown: %d\n", | ||
| 814 | tres, lres, rres, ures); | ||
| 815 | |||
| 816 | /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ | ||
| 817 | out += snprintf(db->buf + out, db->len - out, | ||
| 818 | "Lists: Dirty=%s Purge=%s PendingASTs=%s " | ||
| 819 | "PendingBASTs=%s Master=%s\n", | ||
| 820 | (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"), | ||
| 821 | (list_empty(&dlm->purge_list) ? "Empty" : "InUse"), | ||
| 822 | (list_empty(&dlm->pending_asts) ? "Empty" : "InUse"), | ||
| 823 | (list_empty(&dlm->pending_basts) ? "Empty" : "InUse"), | ||
| 824 | (list_empty(&dlm->master_list) ? "Empty" : "InUse")); | ||
| 825 | |||
| 826 | /* Purge Count: xxx Refs: xxx */ | ||
| 827 | out += snprintf(db->buf + out, db->len - out, | ||
| 828 | "Purge Count: %d Refs: %d\n", dlm->purge_count, | ||
| 829 | atomic_read(&dlm->dlm_refs.refcount)); | ||
| 830 | |||
| 831 | /* Dead Node: xxx */ | ||
| 832 | out += snprintf(db->buf + out, db->len - out, | ||
| 833 | "Dead Node: %d\n", dlm->reco.dead_node); | ||
| 834 | |||
| 835 | /* What about DLM_RECO_STATE_FINALIZE? */ | ||
| 836 | if (dlm->reco.state == DLM_RECO_STATE_ACTIVE) | ||
| 837 | state = "ACTIVE"; | ||
| 838 | else | ||
| 839 | state = "INACTIVE"; | ||
| 840 | |||
| 841 | /* Recovery Pid: xxxx Master: xxx State: xxxx */ | ||
| 842 | out += snprintf(db->buf + out, db->len - out, | ||
| 843 | "Recovery Pid: %d Master: %d State: %s\n", | ||
| 844 | dlm->dlm_reco_thread_task->pid, | ||
| 845 | dlm->reco.new_master, state); | ||
| 846 | |||
| 847 | /* Recovery Map: xx xx */ | ||
| 848 | out += snprintf(db->buf + out, db->len - out, "Recovery Map: "); | ||
| 849 | out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES, | ||
| 850 | db->buf + out, db->len - out); | ||
| 851 | out += snprintf(db->buf + out, db->len - out, "\n"); | ||
| 852 | |||
| 853 | /* Recovery Node State: */ | ||
| 854 | out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n"); | ||
| 855 | list_for_each_entry(node, &dlm->reco.node_data, list) { | ||
| 856 | switch (node->state) { | ||
| 857 | case DLM_RECO_NODE_DATA_INIT: | ||
| 858 | state = "INIT"; | ||
| 859 | break; | ||
| 860 | case DLM_RECO_NODE_DATA_REQUESTING: | ||
| 861 | state = "REQUESTING"; | ||
| 862 | break; | ||
| 863 | case DLM_RECO_NODE_DATA_DEAD: | ||
| 864 | state = "DEAD"; | ||
| 865 | break; | ||
| 866 | case DLM_RECO_NODE_DATA_RECEIVING: | ||
| 867 | state = "RECEIVING"; | ||
| 868 | break; | ||
| 869 | case DLM_RECO_NODE_DATA_REQUESTED: | ||
| 870 | state = "REQUESTED"; | ||
| 871 | break; | ||
| 872 | case DLM_RECO_NODE_DATA_DONE: | ||
| 873 | state = "DONE"; | ||
| 874 | break; | ||
| 875 | case DLM_RECO_NODE_DATA_FINALIZE_SENT: | ||
| 876 | state = "FINALIZE-SENT"; | ||
| 877 | break; | ||
| 878 | default: | ||
| 879 | state = "BAD"; | ||
| 880 | break; | ||
| 881 | } | ||
| 882 | out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n", | ||
| 883 | node->node_num, state); | ||
| 884 | } | ||
| 885 | |||
| 886 | spin_unlock(&dlm->spinlock); | ||
| 887 | |||
| 888 | return out; | ||
| 889 | } | ||
| 890 | |||
| 891 | static int debug_state_open(struct inode *inode, struct file *file) | ||
| 892 | { | ||
| 893 | struct dlm_ctxt *dlm = inode->i_private; | ||
| 894 | struct debug_buffer *db = NULL; | ||
| 895 | |||
| 896 | db = debug_buffer_allocate(); | ||
| 897 | if (!db) | ||
| 898 | goto bail; | ||
| 899 | |||
| 900 | db->len = debug_state_print(dlm, db); | ||
| 901 | |||
| 902 | file->private_data = db; | ||
| 903 | |||
| 904 | return 0; | ||
| 905 | bail: | ||
| 906 | return -ENOMEM; | ||
| 907 | } | ||
| 908 | |||
| 909 | static struct file_operations debug_state_fops = { | ||
| 910 | .open = debug_state_open, | ||
| 911 | .release = debug_buffer_release, | ||
| 912 | .read = debug_buffer_read, | ||
| 913 | .llseek = debug_buffer_llseek, | ||
| 914 | }; | ||
| 915 | /* end - debug state funcs */ | ||
| 916 | |||
| 917 | /* files in subroot */ | ||
| 918 | int dlm_debug_init(struct dlm_ctxt *dlm) | ||
| 919 | { | ||
| 920 | struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; | ||
| 921 | |||
| 922 | /* for dumping dlm_ctxt */ | ||
| 923 | dc->debug_state_dentry = debugfs_create_file(DLM_DEBUGFS_DLM_STATE, | ||
| 924 | S_IFREG|S_IRUSR, | ||
| 925 | dlm->dlm_debugfs_subroot, | ||
| 926 | dlm, &debug_state_fops); | ||
| 927 | if (!dc->debug_state_dentry) { | ||
| 928 | mlog_errno(-ENOMEM); | ||
| 929 | goto bail; | ||
| 930 | } | ||
| 931 | |||
| 932 | /* for dumping lockres */ | ||
| 933 | dc->debug_lockres_dentry = | ||
| 934 | debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE, | ||
| 935 | S_IFREG|S_IRUSR, | ||
| 936 | dlm->dlm_debugfs_subroot, | ||
| 937 | dlm, &debug_lockres_fops); | ||
| 938 | if (!dc->debug_lockres_dentry) { | ||
| 939 | mlog_errno(-ENOMEM); | ||
| 940 | goto bail; | ||
| 941 | } | ||
| 942 | |||
| 943 | /* for dumping mles */ | ||
| 944 | dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE, | ||
| 945 | S_IFREG|S_IRUSR, | ||
| 946 | dlm->dlm_debugfs_subroot, | ||
| 947 | dlm, &debug_mle_fops); | ||
| 948 | if (!dc->debug_mle_dentry) { | ||
| 949 | mlog_errno(-ENOMEM); | ||
| 950 | goto bail; | ||
| 951 | } | ||
| 952 | |||
| 953 | /* for dumping lockres on the purge list */ | ||
| 954 | dc->debug_purgelist_dentry = | ||
| 955 | debugfs_create_file(DLM_DEBUGFS_PURGE_LIST, | ||
| 956 | S_IFREG|S_IRUSR, | ||
| 957 | dlm->dlm_debugfs_subroot, | ||
| 958 | dlm, &debug_purgelist_fops); | ||
| 959 | if (!dc->debug_purgelist_dentry) { | ||
| 960 | mlog_errno(-ENOMEM); | ||
| 961 | goto bail; | ||
| 962 | } | ||
| 963 | |||
| 964 | dlm_debug_get(dc); | ||
| 965 | return 0; | ||
| 966 | |||
| 967 | bail: | ||
| 968 | dlm_debug_shutdown(dlm); | ||
| 969 | return -ENOMEM; | ||
| 970 | } | ||
| 971 | |||
| 972 | void dlm_debug_shutdown(struct dlm_ctxt *dlm) | ||
| 973 | { | ||
| 974 | struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; | ||
| 975 | |||
| 976 | if (dc) { | ||
| 977 | if (dc->debug_purgelist_dentry) | ||
| 978 | debugfs_remove(dc->debug_purgelist_dentry); | ||
| 979 | if (dc->debug_mle_dentry) | ||
| 980 | debugfs_remove(dc->debug_mle_dentry); | ||
| 981 | if (dc->debug_lockres_dentry) | ||
| 982 | debugfs_remove(dc->debug_lockres_dentry); | ||
| 983 | if (dc->debug_state_dentry) | ||
| 984 | debugfs_remove(dc->debug_state_dentry); | ||
| 985 | dlm_debug_put(dc); | ||
| 986 | } | ||
| 987 | } | ||
| 988 | |||
| 989 | /* subroot - domain dir */ | ||
| 990 | int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) | ||
| 991 | { | ||
| 992 | dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name, | ||
| 993 | dlm_debugfs_root); | ||
| 994 | if (!dlm->dlm_debugfs_subroot) { | ||
| 995 | mlog_errno(-ENOMEM); | ||
| 996 | goto bail; | ||
| 997 | } | ||
| 998 | |||
| 999 | dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt), | ||
| 1000 | GFP_KERNEL); | ||
| 1001 | if (!dlm->dlm_debug_ctxt) { | ||
| 1002 | mlog_errno(-ENOMEM); | ||
| 1003 | goto bail; | ||
| 1004 | } | ||
| 1005 | kref_init(&dlm->dlm_debug_ctxt->debug_refcnt); | ||
| 1006 | |||
| 1007 | return 0; | ||
| 1008 | bail: | ||
| 1009 | dlm_destroy_debugfs_subroot(dlm); | ||
| 1010 | return -ENOMEM; | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) | ||
| 1014 | { | ||
| 1015 | if (dlm->dlm_debugfs_subroot) | ||
| 1016 | debugfs_remove(dlm->dlm_debugfs_subroot); | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | /* debugfs root */ | ||
| 1020 | int dlm_create_debugfs_root(void) | ||
| 1021 | { | ||
| 1022 | dlm_debugfs_root = debugfs_create_dir(DLM_DEBUGFS_DIR, NULL); | ||
| 1023 | if (!dlm_debugfs_root) { | ||
| 1024 | mlog_errno(-ENOMEM); | ||
| 1025 | return -ENOMEM; | ||
| 1026 | } | ||
| 1027 | return 0; | ||
| 1028 | } | ||
| 1029 | |||
| 1030 | void dlm_destroy_debugfs_root(void) | ||
| 1031 | { | ||
| 1032 | if (dlm_debugfs_root) | ||
| 1033 | debugfs_remove(dlm_debugfs_root); | ||
| 1034 | } | ||
| 1035 | #endif /* CONFIG_DEBUG_FS */ | ||
diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h new file mode 100644 index 000000000000..d34a62a3a625 --- /dev/null +++ b/fs/ocfs2/dlm/dlmdebug.h | |||
| @@ -0,0 +1,86 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * dlmdebug.h | ||
| 5 | * | ||
| 6 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public | ||
| 10 | * License as published by the Free Software Foundation; either | ||
| 11 | * version 2 of the License, or (at your option) any later version. | ||
| 12 | * | ||
| 13 | * This program is distributed in the hope that it will be useful, | ||
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 16 | * General Public License for more details. | ||
| 17 | * | ||
| 18 | * You should have received a copy of the GNU General Public | ||
| 19 | * License along with this program; if not, write to the | ||
| 20 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 21 | * Boston, MA 021110-1307, USA. | ||
| 22 | * | ||
| 23 | */ | ||
| 24 | |||
| 25 | #ifndef DLMDEBUG_H | ||
| 26 | #define DLMDEBUG_H | ||
| 27 | |||
| 28 | void dlm_print_one_mle(struct dlm_master_list_entry *mle); | ||
| 29 | |||
| 30 | #ifdef CONFIG_DEBUG_FS | ||
| 31 | |||
| 32 | struct dlm_debug_ctxt { | ||
| 33 | struct kref debug_refcnt; | ||
| 34 | struct dentry *debug_state_dentry; | ||
| 35 | struct dentry *debug_lockres_dentry; | ||
| 36 | struct dentry *debug_mle_dentry; | ||
| 37 | struct dentry *debug_purgelist_dentry; | ||
| 38 | }; | ||
| 39 | |||
| 40 | struct debug_buffer { | ||
| 41 | int len; | ||
| 42 | char *buf; | ||
| 43 | }; | ||
| 44 | |||
| 45 | struct debug_lockres { | ||
| 46 | int dl_len; | ||
| 47 | char *dl_buf; | ||
| 48 | struct dlm_ctxt *dl_ctxt; | ||
| 49 | struct dlm_lock_resource *dl_res; | ||
| 50 | }; | ||
| 51 | |||
| 52 | int dlm_debug_init(struct dlm_ctxt *dlm); | ||
| 53 | void dlm_debug_shutdown(struct dlm_ctxt *dlm); | ||
| 54 | |||
| 55 | int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm); | ||
| 56 | void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm); | ||
| 57 | |||
| 58 | int dlm_create_debugfs_root(void); | ||
| 59 | void dlm_destroy_debugfs_root(void); | ||
| 60 | |||
| 61 | #else | ||
| 62 | |||
| 63 | static int dlm_debug_init(struct dlm_ctxt *dlm) | ||
| 64 | { | ||
| 65 | return 0; | ||
| 66 | } | ||
| 67 | static void dlm_debug_shutdown(struct dlm_ctxt *dlm) | ||
| 68 | { | ||
| 69 | } | ||
| 70 | static int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) | ||
| 71 | { | ||
| 72 | return 0; | ||
| 73 | } | ||
| 74 | static void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) | ||
| 75 | { | ||
| 76 | } | ||
| 77 | static int dlm_create_debugfs_root(void) | ||
| 78 | { | ||
| 79 | return 0; | ||
| 80 | } | ||
| 81 | static void dlm_destroy_debugfs_root(void) | ||
| 82 | { | ||
| 83 | } | ||
| 84 | |||
| 85 | #endif /* CONFIG_DEBUG_FS */ | ||
| 86 | #endif /* DLMDEBUG_H */ | ||
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 0879d86113e3..63f8125824e8 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include <linux/spinlock.h> | 33 | #include <linux/spinlock.h> |
| 34 | #include <linux/delay.h> | 34 | #include <linux/delay.h> |
| 35 | #include <linux/err.h> | 35 | #include <linux/err.h> |
| 36 | #include <linux/debugfs.h> | ||
| 36 | 37 | ||
| 37 | #include "cluster/heartbeat.h" | 38 | #include "cluster/heartbeat.h" |
| 38 | #include "cluster/nodemanager.h" | 39 | #include "cluster/nodemanager.h" |
| @@ -40,8 +41,8 @@ | |||
| 40 | 41 | ||
| 41 | #include "dlmapi.h" | 42 | #include "dlmapi.h" |
| 42 | #include "dlmcommon.h" | 43 | #include "dlmcommon.h" |
| 43 | |||
| 44 | #include "dlmdomain.h" | 44 | #include "dlmdomain.h" |
| 45 | #include "dlmdebug.h" | ||
| 45 | 46 | ||
| 46 | #include "dlmver.h" | 47 | #include "dlmver.h" |
| 47 | 48 | ||
| @@ -298,6 +299,8 @@ static int dlm_wait_on_domain_helper(const char *domain) | |||
| 298 | 299 | ||
| 299 | static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) | 300 | static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) |
| 300 | { | 301 | { |
| 302 | dlm_destroy_debugfs_subroot(dlm); | ||
| 303 | |||
| 301 | if (dlm->lockres_hash) | 304 | if (dlm->lockres_hash) |
| 302 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); | 305 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); |
| 303 | 306 | ||
| @@ -395,6 +398,7 @@ static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm) | |||
| 395 | static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) | 398 | static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) |
| 396 | { | 399 | { |
| 397 | dlm_unregister_domain_handlers(dlm); | 400 | dlm_unregister_domain_handlers(dlm); |
| 401 | dlm_debug_shutdown(dlm); | ||
| 398 | dlm_complete_thread(dlm); | 402 | dlm_complete_thread(dlm); |
| 399 | dlm_complete_recovery_thread(dlm); | 403 | dlm_complete_recovery_thread(dlm); |
| 400 | dlm_destroy_dlm_worker(dlm); | 404 | dlm_destroy_dlm_worker(dlm); |
| @@ -644,6 +648,7 @@ int dlm_shutting_down(struct dlm_ctxt *dlm) | |||
| 644 | void dlm_unregister_domain(struct dlm_ctxt *dlm) | 648 | void dlm_unregister_domain(struct dlm_ctxt *dlm) |
| 645 | { | 649 | { |
| 646 | int leave = 0; | 650 | int leave = 0; |
| 651 | struct dlm_lock_resource *res; | ||
| 647 | 652 | ||
| 648 | spin_lock(&dlm_domain_lock); | 653 | spin_lock(&dlm_domain_lock); |
| 649 | BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED); | 654 | BUG_ON(dlm->dlm_state != DLM_CTXT_JOINED); |
| @@ -673,6 +678,15 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
| 673 | msleep(500); | 678 | msleep(500); |
| 674 | mlog(0, "%s: more migration to do\n", dlm->name); | 679 | mlog(0, "%s: more migration to do\n", dlm->name); |
| 675 | } | 680 | } |
| 681 | |||
| 682 | /* This list should be empty. If not, print remaining lockres */ | ||
| 683 | if (!list_empty(&dlm->tracking_list)) { | ||
| 684 | mlog(ML_ERROR, "Following lockres' are still on the " | ||
| 685 | "tracking list:\n"); | ||
| 686 | list_for_each_entry(res, &dlm->tracking_list, tracking) | ||
| 687 | dlm_print_one_lock_resource(res); | ||
| 688 | } | ||
| 689 | |||
| 676 | dlm_mark_domain_leaving(dlm); | 690 | dlm_mark_domain_leaving(dlm); |
| 677 | dlm_leave_domain(dlm); | 691 | dlm_leave_domain(dlm); |
| 678 | dlm_complete_dlm_shutdown(dlm); | 692 | dlm_complete_dlm_shutdown(dlm); |
| @@ -1405,6 +1419,12 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) | |||
| 1405 | goto bail; | 1419 | goto bail; |
| 1406 | } | 1420 | } |
| 1407 | 1421 | ||
| 1422 | status = dlm_debug_init(dlm); | ||
| 1423 | if (status < 0) { | ||
| 1424 | mlog_errno(status); | ||
| 1425 | goto bail; | ||
| 1426 | } | ||
| 1427 | |||
| 1408 | status = dlm_launch_thread(dlm); | 1428 | status = dlm_launch_thread(dlm); |
| 1409 | if (status < 0) { | 1429 | if (status < 0) { |
| 1410 | mlog_errno(status); | 1430 | mlog_errno(status); |
| @@ -1472,6 +1492,7 @@ bail: | |||
| 1472 | 1492 | ||
| 1473 | if (status) { | 1493 | if (status) { |
| 1474 | dlm_unregister_domain_handlers(dlm); | 1494 | dlm_unregister_domain_handlers(dlm); |
| 1495 | dlm_debug_shutdown(dlm); | ||
| 1475 | dlm_complete_thread(dlm); | 1496 | dlm_complete_thread(dlm); |
| 1476 | dlm_complete_recovery_thread(dlm); | 1497 | dlm_complete_recovery_thread(dlm); |
| 1477 | dlm_destroy_dlm_worker(dlm); | 1498 | dlm_destroy_dlm_worker(dlm); |
| @@ -1484,6 +1505,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
| 1484 | u32 key) | 1505 | u32 key) |
| 1485 | { | 1506 | { |
| 1486 | int i; | 1507 | int i; |
| 1508 | int ret; | ||
| 1487 | struct dlm_ctxt *dlm = NULL; | 1509 | struct dlm_ctxt *dlm = NULL; |
| 1488 | 1510 | ||
| 1489 | dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); | 1511 | dlm = kzalloc(sizeof(*dlm), GFP_KERNEL); |
| @@ -1516,6 +1538,15 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
| 1516 | dlm->key = key; | 1538 | dlm->key = key; |
| 1517 | dlm->node_num = o2nm_this_node(); | 1539 | dlm->node_num = o2nm_this_node(); |
| 1518 | 1540 | ||
| 1541 | ret = dlm_create_debugfs_subroot(dlm); | ||
| 1542 | if (ret < 0) { | ||
| 1543 | dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); | ||
| 1544 | kfree(dlm->name); | ||
| 1545 | kfree(dlm); | ||
| 1546 | dlm = NULL; | ||
| 1547 | goto leave; | ||
| 1548 | } | ||
| 1549 | |||
| 1519 | spin_lock_init(&dlm->spinlock); | 1550 | spin_lock_init(&dlm->spinlock); |
| 1520 | spin_lock_init(&dlm->master_lock); | 1551 | spin_lock_init(&dlm->master_lock); |
| 1521 | spin_lock_init(&dlm->ast_lock); | 1552 | spin_lock_init(&dlm->ast_lock); |
| @@ -1526,6 +1557,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, | |||
| 1526 | INIT_LIST_HEAD(&dlm->reco.node_data); | 1557 | INIT_LIST_HEAD(&dlm->reco.node_data); |
| 1527 | INIT_LIST_HEAD(&dlm->purge_list); | 1558 | INIT_LIST_HEAD(&dlm->purge_list); |
| 1528 | INIT_LIST_HEAD(&dlm->dlm_domain_handlers); | 1559 | INIT_LIST_HEAD(&dlm->dlm_domain_handlers); |
| 1560 | INIT_LIST_HEAD(&dlm->tracking_list); | ||
| 1529 | dlm->reco.state = 0; | 1561 | dlm->reco.state = 0; |
| 1530 | 1562 | ||
| 1531 | INIT_LIST_HEAD(&dlm->pending_asts); | 1563 | INIT_LIST_HEAD(&dlm->pending_asts); |
| @@ -1816,21 +1848,49 @@ static int __init dlm_init(void) | |||
| 1816 | dlm_print_version(); | 1848 | dlm_print_version(); |
| 1817 | 1849 | ||
| 1818 | status = dlm_init_mle_cache(); | 1850 | status = dlm_init_mle_cache(); |
| 1819 | if (status) | 1851 | if (status) { |
| 1820 | return -1; | 1852 | mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n"); |
| 1853 | goto error; | ||
| 1854 | } | ||
| 1855 | |||
| 1856 | status = dlm_init_master_caches(); | ||
| 1857 | if (status) { | ||
| 1858 | mlog(ML_ERROR, "Could not create o2dlm_lockres and " | ||
| 1859 | "o2dlm_lockname slabcaches\n"); | ||
| 1860 | goto error; | ||
| 1861 | } | ||
| 1862 | |||
| 1863 | status = dlm_init_lock_cache(); | ||
| 1864 | if (status) { | ||
| 1865 | mlog(ML_ERROR, "Count not create o2dlm_lock slabcache\n"); | ||
| 1866 | goto error; | ||
| 1867 | } | ||
| 1821 | 1868 | ||
| 1822 | status = dlm_register_net_handlers(); | 1869 | status = dlm_register_net_handlers(); |
| 1823 | if (status) { | 1870 | if (status) { |
| 1824 | dlm_destroy_mle_cache(); | 1871 | mlog(ML_ERROR, "Unable to register network handlers\n"); |
| 1825 | return -1; | 1872 | goto error; |
| 1826 | } | 1873 | } |
| 1827 | 1874 | ||
| 1875 | status = dlm_create_debugfs_root(); | ||
| 1876 | if (status) | ||
| 1877 | goto error; | ||
| 1878 | |||
| 1828 | return 0; | 1879 | return 0; |
| 1880 | error: | ||
| 1881 | dlm_unregister_net_handlers(); | ||
| 1882 | dlm_destroy_lock_cache(); | ||
| 1883 | dlm_destroy_master_caches(); | ||
| 1884 | dlm_destroy_mle_cache(); | ||
| 1885 | return -1; | ||
| 1829 | } | 1886 | } |
| 1830 | 1887 | ||
| 1831 | static void __exit dlm_exit (void) | 1888 | static void __exit dlm_exit (void) |
| 1832 | { | 1889 | { |
| 1890 | dlm_destroy_debugfs_root(); | ||
| 1833 | dlm_unregister_net_handlers(); | 1891 | dlm_unregister_net_handlers(); |
| 1892 | dlm_destroy_lock_cache(); | ||
| 1893 | dlm_destroy_master_caches(); | ||
| 1834 | dlm_destroy_mle_cache(); | 1894 | dlm_destroy_mle_cache(); |
| 1835 | } | 1895 | } |
| 1836 | 1896 | ||
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 52578d907d9a..83a9f2972ac8 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
| @@ -53,6 +53,8 @@ | |||
| 53 | #define MLOG_MASK_PREFIX ML_DLM | 53 | #define MLOG_MASK_PREFIX ML_DLM |
| 54 | #include "cluster/masklog.h" | 54 | #include "cluster/masklog.h" |
| 55 | 55 | ||
| 56 | static struct kmem_cache *dlm_lock_cache = NULL; | ||
| 57 | |||
| 56 | static DEFINE_SPINLOCK(dlm_cookie_lock); | 58 | static DEFINE_SPINLOCK(dlm_cookie_lock); |
| 57 | static u64 dlm_next_cookie = 1; | 59 | static u64 dlm_next_cookie = 1; |
| 58 | 60 | ||
| @@ -64,6 +66,22 @@ static void dlm_init_lock(struct dlm_lock *newlock, int type, | |||
| 64 | static void dlm_lock_release(struct kref *kref); | 66 | static void dlm_lock_release(struct kref *kref); |
| 65 | static void dlm_lock_detach_lockres(struct dlm_lock *lock); | 67 | static void dlm_lock_detach_lockres(struct dlm_lock *lock); |
| 66 | 68 | ||
| 69 | int dlm_init_lock_cache(void) | ||
| 70 | { | ||
| 71 | dlm_lock_cache = kmem_cache_create("o2dlm_lock", | ||
| 72 | sizeof(struct dlm_lock), | ||
| 73 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
| 74 | if (dlm_lock_cache == NULL) | ||
| 75 | return -ENOMEM; | ||
| 76 | return 0; | ||
| 77 | } | ||
| 78 | |||
| 79 | void dlm_destroy_lock_cache(void) | ||
| 80 | { | ||
| 81 | if (dlm_lock_cache) | ||
| 82 | kmem_cache_destroy(dlm_lock_cache); | ||
| 83 | } | ||
| 84 | |||
| 67 | /* Tell us whether we can grant a new lock request. | 85 | /* Tell us whether we can grant a new lock request. |
| 68 | * locking: | 86 | * locking: |
| 69 | * caller needs: res->spinlock | 87 | * caller needs: res->spinlock |
| @@ -353,7 +371,7 @@ static void dlm_lock_release(struct kref *kref) | |||
| 353 | mlog(0, "freeing kernel-allocated lksb\n"); | 371 | mlog(0, "freeing kernel-allocated lksb\n"); |
| 354 | kfree(lock->lksb); | 372 | kfree(lock->lksb); |
| 355 | } | 373 | } |
| 356 | kfree(lock); | 374 | kmem_cache_free(dlm_lock_cache, lock); |
| 357 | } | 375 | } |
| 358 | 376 | ||
| 359 | /* associate a lock with it's lockres, getting a ref on the lockres */ | 377 | /* associate a lock with it's lockres, getting a ref on the lockres */ |
| @@ -412,7 +430,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, | |||
| 412 | struct dlm_lock *lock; | 430 | struct dlm_lock *lock; |
| 413 | int kernel_allocated = 0; | 431 | int kernel_allocated = 0; |
| 414 | 432 | ||
| 415 | lock = kzalloc(sizeof(*lock), GFP_NOFS); | 433 | lock = (struct dlm_lock *) kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS); |
| 416 | if (!lock) | 434 | if (!lock) |
| 417 | return NULL; | 435 | return NULL; |
| 418 | 436 | ||
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index ea6b89577860..efc015c6128a 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -48,47 +48,11 @@ | |||
| 48 | #include "dlmapi.h" | 48 | #include "dlmapi.h" |
| 49 | #include "dlmcommon.h" | 49 | #include "dlmcommon.h" |
| 50 | #include "dlmdomain.h" | 50 | #include "dlmdomain.h" |
| 51 | #include "dlmdebug.h" | ||
| 51 | 52 | ||
| 52 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER) | 53 | #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER) |
| 53 | #include "cluster/masklog.h" | 54 | #include "cluster/masklog.h" |
| 54 | 55 | ||
| 55 | enum dlm_mle_type { | ||
| 56 | DLM_MLE_BLOCK, | ||
| 57 | DLM_MLE_MASTER, | ||
| 58 | DLM_MLE_MIGRATION | ||
| 59 | }; | ||
| 60 | |||
| 61 | struct dlm_lock_name | ||
| 62 | { | ||
| 63 | u8 len; | ||
| 64 | u8 name[DLM_LOCKID_NAME_MAX]; | ||
| 65 | }; | ||
| 66 | |||
| 67 | struct dlm_master_list_entry | ||
| 68 | { | ||
| 69 | struct list_head list; | ||
| 70 | struct list_head hb_events; | ||
| 71 | struct dlm_ctxt *dlm; | ||
| 72 | spinlock_t spinlock; | ||
| 73 | wait_queue_head_t wq; | ||
| 74 | atomic_t woken; | ||
| 75 | struct kref mle_refs; | ||
| 76 | int inuse; | ||
| 77 | unsigned long maybe_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 78 | unsigned long vote_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 79 | unsigned long response_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 80 | unsigned long node_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 81 | u8 master; | ||
| 82 | u8 new_master; | ||
| 83 | enum dlm_mle_type type; | ||
| 84 | struct o2hb_callback_func mle_hb_up; | ||
| 85 | struct o2hb_callback_func mle_hb_down; | ||
| 86 | union { | ||
| 87 | struct dlm_lock_resource *res; | ||
| 88 | struct dlm_lock_name name; | ||
| 89 | } u; | ||
| 90 | }; | ||
| 91 | |||
| 92 | static void dlm_mle_node_down(struct dlm_ctxt *dlm, | 56 | static void dlm_mle_node_down(struct dlm_ctxt *dlm, |
| 93 | struct dlm_master_list_entry *mle, | 57 | struct dlm_master_list_entry *mle, |
| 94 | struct o2nm_node *node, | 58 | struct o2nm_node *node, |
| @@ -128,98 +92,10 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm, | |||
| 128 | return 1; | 92 | return 1; |
| 129 | } | 93 | } |
| 130 | 94 | ||
| 131 | #define dlm_print_nodemap(m) _dlm_print_nodemap(m,#m) | 95 | static struct kmem_cache *dlm_lockres_cache = NULL; |
| 132 | static void _dlm_print_nodemap(unsigned long *map, const char *mapname) | 96 | static struct kmem_cache *dlm_lockname_cache = NULL; |
| 133 | { | ||
| 134 | int i; | ||
| 135 | printk("%s=[ ", mapname); | ||
| 136 | for (i=0; i<O2NM_MAX_NODES; i++) | ||
| 137 | if (test_bit(i, map)) | ||
| 138 | printk("%d ", i); | ||
| 139 | printk("]"); | ||
| 140 | } | ||
| 141 | |||
| 142 | static void dlm_print_one_mle(struct dlm_master_list_entry *mle) | ||
| 143 | { | ||
| 144 | int refs; | ||
| 145 | char *type; | ||
| 146 | char attached; | ||
| 147 | u8 master; | ||
| 148 | unsigned int namelen; | ||
| 149 | const char *name; | ||
| 150 | struct kref *k; | ||
| 151 | unsigned long *maybe = mle->maybe_map, | ||
| 152 | *vote = mle->vote_map, | ||
| 153 | *resp = mle->response_map, | ||
| 154 | *node = mle->node_map; | ||
| 155 | |||
| 156 | k = &mle->mle_refs; | ||
| 157 | if (mle->type == DLM_MLE_BLOCK) | ||
| 158 | type = "BLK"; | ||
| 159 | else if (mle->type == DLM_MLE_MASTER) | ||
| 160 | type = "MAS"; | ||
| 161 | else | ||
| 162 | type = "MIG"; | ||
| 163 | refs = atomic_read(&k->refcount); | ||
| 164 | master = mle->master; | ||
| 165 | attached = (list_empty(&mle->hb_events) ? 'N' : 'Y'); | ||
| 166 | |||
| 167 | if (mle->type != DLM_MLE_MASTER) { | ||
| 168 | namelen = mle->u.name.len; | ||
| 169 | name = mle->u.name.name; | ||
| 170 | } else { | ||
| 171 | namelen = mle->u.res->lockname.len; | ||
| 172 | name = mle->u.res->lockname.name; | ||
| 173 | } | ||
| 174 | |||
| 175 | mlog(ML_NOTICE, "%.*s: %3s refs=%3d mas=%3u new=%3u evt=%c inuse=%d ", | ||
| 176 | namelen, name, type, refs, master, mle->new_master, attached, | ||
| 177 | mle->inuse); | ||
| 178 | dlm_print_nodemap(maybe); | ||
| 179 | printk(", "); | ||
| 180 | dlm_print_nodemap(vote); | ||
| 181 | printk(", "); | ||
| 182 | dlm_print_nodemap(resp); | ||
| 183 | printk(", "); | ||
| 184 | dlm_print_nodemap(node); | ||
| 185 | printk(", "); | ||
| 186 | printk("\n"); | ||
| 187 | } | ||
| 188 | |||
| 189 | #if 0 | ||
| 190 | /* Code here is included but defined out as it aids debugging */ | ||
| 191 | |||
| 192 | static void dlm_dump_mles(struct dlm_ctxt *dlm) | ||
| 193 | { | ||
| 194 | struct dlm_master_list_entry *mle; | ||
| 195 | |||
| 196 | mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name); | ||
| 197 | spin_lock(&dlm->master_lock); | ||
| 198 | list_for_each_entry(mle, &dlm->master_list, list) | ||
| 199 | dlm_print_one_mle(mle); | ||
| 200 | spin_unlock(&dlm->master_lock); | ||
| 201 | } | ||
| 202 | |||
| 203 | int dlm_dump_all_mles(const char __user *data, unsigned int len) | ||
| 204 | { | ||
| 205 | struct dlm_ctxt *dlm; | ||
| 206 | |||
| 207 | spin_lock(&dlm_domain_lock); | ||
| 208 | list_for_each_entry(dlm, &dlm_domains, list) { | ||
| 209 | mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name); | ||
| 210 | dlm_dump_mles(dlm); | ||
| 211 | } | ||
| 212 | spin_unlock(&dlm_domain_lock); | ||
| 213 | return len; | ||
| 214 | } | ||
| 215 | EXPORT_SYMBOL_GPL(dlm_dump_all_mles); | ||
| 216 | |||
| 217 | #endif /* 0 */ | ||
| 218 | |||
| 219 | |||
| 220 | static struct kmem_cache *dlm_mle_cache = NULL; | 97 | static struct kmem_cache *dlm_mle_cache = NULL; |
| 221 | 98 | ||
| 222 | |||
| 223 | static void dlm_mle_release(struct kref *kref); | 99 | static void dlm_mle_release(struct kref *kref); |
| 224 | static void dlm_init_mle(struct dlm_master_list_entry *mle, | 100 | static void dlm_init_mle(struct dlm_master_list_entry *mle, |
| 225 | enum dlm_mle_type type, | 101 | enum dlm_mle_type type, |
| @@ -507,7 +383,7 @@ static void dlm_mle_node_up(struct dlm_ctxt *dlm, | |||
| 507 | 383 | ||
| 508 | int dlm_init_mle_cache(void) | 384 | int dlm_init_mle_cache(void) |
| 509 | { | 385 | { |
| 510 | dlm_mle_cache = kmem_cache_create("dlm_mle_cache", | 386 | dlm_mle_cache = kmem_cache_create("o2dlm_mle", |
| 511 | sizeof(struct dlm_master_list_entry), | 387 | sizeof(struct dlm_master_list_entry), |
| 512 | 0, SLAB_HWCACHE_ALIGN, | 388 | 0, SLAB_HWCACHE_ALIGN, |
| 513 | NULL); | 389 | NULL); |
| @@ -560,6 +436,35 @@ static void dlm_mle_release(struct kref *kref) | |||
| 560 | * LOCK RESOURCE FUNCTIONS | 436 | * LOCK RESOURCE FUNCTIONS |
| 561 | */ | 437 | */ |
| 562 | 438 | ||
| 439 | int dlm_init_master_caches(void) | ||
| 440 | { | ||
| 441 | dlm_lockres_cache = kmem_cache_create("o2dlm_lockres", | ||
| 442 | sizeof(struct dlm_lock_resource), | ||
| 443 | 0, SLAB_HWCACHE_ALIGN, NULL); | ||
| 444 | if (!dlm_lockres_cache) | ||
| 445 | goto bail; | ||
| 446 | |||
| 447 | dlm_lockname_cache = kmem_cache_create("o2dlm_lockname", | ||
| 448 | DLM_LOCKID_NAME_MAX, 0, | ||
| 449 | SLAB_HWCACHE_ALIGN, NULL); | ||
| 450 | if (!dlm_lockname_cache) | ||
| 451 | goto bail; | ||
| 452 | |||
| 453 | return 0; | ||
| 454 | bail: | ||
| 455 | dlm_destroy_master_caches(); | ||
| 456 | return -ENOMEM; | ||
| 457 | } | ||
| 458 | |||
| 459 | void dlm_destroy_master_caches(void) | ||
| 460 | { | ||
| 461 | if (dlm_lockname_cache) | ||
| 462 | kmem_cache_destroy(dlm_lockname_cache); | ||
| 463 | |||
| 464 | if (dlm_lockres_cache) | ||
| 465 | kmem_cache_destroy(dlm_lockres_cache); | ||
| 466 | } | ||
| 467 | |||
| 563 | static void dlm_set_lockres_owner(struct dlm_ctxt *dlm, | 468 | static void dlm_set_lockres_owner(struct dlm_ctxt *dlm, |
| 564 | struct dlm_lock_resource *res, | 469 | struct dlm_lock_resource *res, |
| 565 | u8 owner) | 470 | u8 owner) |
| @@ -610,6 +515,14 @@ static void dlm_lockres_release(struct kref *kref) | |||
| 610 | mlog(0, "destroying lockres %.*s\n", res->lockname.len, | 515 | mlog(0, "destroying lockres %.*s\n", res->lockname.len, |
| 611 | res->lockname.name); | 516 | res->lockname.name); |
| 612 | 517 | ||
| 518 | if (!list_empty(&res->tracking)) | ||
| 519 | list_del_init(&res->tracking); | ||
| 520 | else { | ||
| 521 | mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n", | ||
| 522 | res->lockname.len, res->lockname.name); | ||
| 523 | dlm_print_one_lock_resource(res); | ||
| 524 | } | ||
| 525 | |||
| 613 | if (!hlist_unhashed(&res->hash_node) || | 526 | if (!hlist_unhashed(&res->hash_node) || |
| 614 | !list_empty(&res->granted) || | 527 | !list_empty(&res->granted) || |
| 615 | !list_empty(&res->converting) || | 528 | !list_empty(&res->converting) || |
| @@ -642,9 +555,9 @@ static void dlm_lockres_release(struct kref *kref) | |||
| 642 | BUG_ON(!list_empty(&res->recovering)); | 555 | BUG_ON(!list_empty(&res->recovering)); |
| 643 | BUG_ON(!list_empty(&res->purge)); | 556 | BUG_ON(!list_empty(&res->purge)); |
| 644 | 557 | ||
| 645 | kfree(res->lockname.name); | 558 | kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name); |
| 646 | 559 | ||
| 647 | kfree(res); | 560 | kmem_cache_free(dlm_lockres_cache, res); |
| 648 | } | 561 | } |
| 649 | 562 | ||
| 650 | void dlm_lockres_put(struct dlm_lock_resource *res) | 563 | void dlm_lockres_put(struct dlm_lock_resource *res) |
| @@ -677,6 +590,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
| 677 | INIT_LIST_HEAD(&res->dirty); | 590 | INIT_LIST_HEAD(&res->dirty); |
| 678 | INIT_LIST_HEAD(&res->recovering); | 591 | INIT_LIST_HEAD(&res->recovering); |
| 679 | INIT_LIST_HEAD(&res->purge); | 592 | INIT_LIST_HEAD(&res->purge); |
| 593 | INIT_LIST_HEAD(&res->tracking); | ||
| 680 | atomic_set(&res->asts_reserved, 0); | 594 | atomic_set(&res->asts_reserved, 0); |
| 681 | res->migration_pending = 0; | 595 | res->migration_pending = 0; |
| 682 | res->inflight_locks = 0; | 596 | res->inflight_locks = 0; |
| @@ -692,6 +606,8 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
| 692 | 606 | ||
| 693 | res->last_used = 0; | 607 | res->last_used = 0; |
| 694 | 608 | ||
| 609 | list_add_tail(&res->tracking, &dlm->tracking_list); | ||
| 610 | |||
| 695 | memset(res->lvb, 0, DLM_LVB_LEN); | 611 | memset(res->lvb, 0, DLM_LVB_LEN); |
| 696 | memset(res->refmap, 0, sizeof(res->refmap)); | 612 | memset(res->refmap, 0, sizeof(res->refmap)); |
| 697 | } | 613 | } |
| @@ -700,20 +616,28 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | |||
| 700 | const char *name, | 616 | const char *name, |
| 701 | unsigned int namelen) | 617 | unsigned int namelen) |
| 702 | { | 618 | { |
| 703 | struct dlm_lock_resource *res; | 619 | struct dlm_lock_resource *res = NULL; |
| 704 | 620 | ||
| 705 | res = kmalloc(sizeof(struct dlm_lock_resource), GFP_NOFS); | 621 | res = (struct dlm_lock_resource *) |
| 622 | kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS); | ||
| 706 | if (!res) | 623 | if (!res) |
| 707 | return NULL; | 624 | goto error; |
| 708 | 625 | ||
| 709 | res->lockname.name = kmalloc(namelen, GFP_NOFS); | 626 | res->lockname.name = (char *) |
| 710 | if (!res->lockname.name) { | 627 | kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS); |
| 711 | kfree(res); | 628 | if (!res->lockname.name) |
| 712 | return NULL; | 629 | goto error; |
| 713 | } | ||
| 714 | 630 | ||
| 715 | dlm_init_lockres(dlm, res, name, namelen); | 631 | dlm_init_lockres(dlm, res, name, namelen); |
| 716 | return res; | 632 | return res; |
| 633 | |||
| 634 | error: | ||
| 635 | if (res && res->lockname.name) | ||
| 636 | kmem_cache_free(dlm_lockname_cache, (void *)res->lockname.name); | ||
| 637 | |||
| 638 | if (res) | ||
| 639 | kmem_cache_free(dlm_lockres_cache, res); | ||
| 640 | return NULL; | ||
| 717 | } | 641 | } |
| 718 | 642 | ||
| 719 | void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | 643 | void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 1f1873bf41fb..394d25a131a5 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -27,18 +27,11 @@ | |||
| 27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
| 28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
| 29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
| 30 | #include <linux/crc32.h> | ||
| 31 | #include <linux/kthread.h> | 30 | #include <linux/kthread.h> |
| 32 | #include <linux/pagemap.h> | 31 | #include <linux/pagemap.h> |
| 33 | #include <linux/debugfs.h> | 32 | #include <linux/debugfs.h> |
| 34 | #include <linux/seq_file.h> | 33 | #include <linux/seq_file.h> |
| 35 | 34 | ||
| 36 | #include <cluster/heartbeat.h> | ||
| 37 | #include <cluster/nodemanager.h> | ||
| 38 | #include <cluster/tcp.h> | ||
| 39 | |||
| 40 | #include <dlm/dlmapi.h> | ||
| 41 | |||
| 42 | #define MLOG_MASK_PREFIX ML_DLM_GLUE | 35 | #define MLOG_MASK_PREFIX ML_DLM_GLUE |
| 43 | #include <cluster/masklog.h> | 36 | #include <cluster/masklog.h> |
| 44 | 37 | ||
| @@ -53,6 +46,7 @@ | |||
| 53 | #include "heartbeat.h" | 46 | #include "heartbeat.h" |
| 54 | #include "inode.h" | 47 | #include "inode.h" |
| 55 | #include "journal.h" | 48 | #include "journal.h" |
| 49 | #include "stackglue.h" | ||
| 56 | #include "slot_map.h" | 50 | #include "slot_map.h" |
| 57 | #include "super.h" | 51 | #include "super.h" |
| 58 | #include "uptodate.h" | 52 | #include "uptodate.h" |
| @@ -113,7 +107,8 @@ static void ocfs2_dump_meta_lvb_info(u64 level, | |||
| 113 | unsigned int line, | 107 | unsigned int line, |
| 114 | struct ocfs2_lock_res *lockres) | 108 | struct ocfs2_lock_res *lockres) |
| 115 | { | 109 | { |
| 116 | struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 110 | struct ocfs2_meta_lvb *lvb = |
| 111 | (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); | ||
| 117 | 112 | ||
| 118 | mlog(level, "LVB information for %s (called from %s:%u):\n", | 113 | mlog(level, "LVB information for %s (called from %s:%u):\n", |
| 119 | lockres->l_name, function, line); | 114 | lockres->l_name, function, line); |
| @@ -259,31 +254,6 @@ static struct ocfs2_lock_res_ops ocfs2_flock_lops = { | |||
| 259 | .flags = 0, | 254 | .flags = 0, |
| 260 | }; | 255 | }; |
| 261 | 256 | ||
| 262 | /* | ||
| 263 | * This is the filesystem locking protocol version. | ||
| 264 | * | ||
| 265 | * Whenever the filesystem does new things with locks (adds or removes a | ||
| 266 | * lock, orders them differently, does different things underneath a lock), | ||
| 267 | * the version must be changed. The protocol is negotiated when joining | ||
| 268 | * the dlm domain. A node may join the domain if its major version is | ||
| 269 | * identical to all other nodes and its minor version is greater than | ||
| 270 | * or equal to all other nodes. When its minor version is greater than | ||
| 271 | * the other nodes, it will run at the minor version specified by the | ||
| 272 | * other nodes. | ||
| 273 | * | ||
| 274 | * If a locking change is made that will not be compatible with older | ||
| 275 | * versions, the major number must be increased and the minor version set | ||
| 276 | * to zero. If a change merely adds a behavior that can be disabled when | ||
| 277 | * speaking to older versions, the minor version must be increased. If a | ||
| 278 | * change adds a fully backwards compatible change (eg, LVB changes that | ||
| 279 | * are just ignored by older versions), the version does not need to be | ||
| 280 | * updated. | ||
| 281 | */ | ||
| 282 | const struct dlm_protocol_version ocfs2_locking_protocol = { | ||
| 283 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | ||
| 284 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | ||
| 285 | }; | ||
| 286 | |||
| 287 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 257 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) |
| 288 | { | 258 | { |
| 289 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 259 | return lockres->l_type == OCFS2_LOCK_TYPE_META || |
| @@ -316,7 +286,7 @@ static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *l | |||
| 316 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 286 | static int ocfs2_lock_create(struct ocfs2_super *osb, |
| 317 | struct ocfs2_lock_res *lockres, | 287 | struct ocfs2_lock_res *lockres, |
| 318 | int level, | 288 | int level, |
| 319 | int dlm_flags); | 289 | u32 dlm_flags); |
| 320 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, | 290 | static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, |
| 321 | int wanted); | 291 | int wanted); |
| 322 | static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | 292 | static void ocfs2_cluster_unlock(struct ocfs2_super *osb, |
| @@ -330,10 +300,9 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | |||
| 330 | struct ocfs2_lock_res *lockres); | 300 | struct ocfs2_lock_res *lockres); |
| 331 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | 301 | static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, |
| 332 | int convert); | 302 | int convert); |
| 333 | #define ocfs2_log_dlm_error(_func, _stat, _lockres) do { \ | 303 | #define ocfs2_log_dlm_error(_func, _err, _lockres) do { \ |
| 334 | mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ | 304 | mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \ |
| 335 | "resource %s: %s\n", dlm_errname(_stat), _func, \ | 305 | _err, _func, _lockres->l_name); \ |
| 336 | _lockres->l_name, dlm_errmsg(_stat)); \ | ||
| 337 | } while (0) | 306 | } while (0) |
| 338 | static int ocfs2_downconvert_thread(void *arg); | 307 | static int ocfs2_downconvert_thread(void *arg); |
| 339 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | 308 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
| @@ -342,12 +311,13 @@ static int ocfs2_inode_lock_update(struct inode *inode, | |||
| 342 | struct buffer_head **bh); | 311 | struct buffer_head **bh); |
| 343 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 312 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); |
| 344 | static inline int ocfs2_highest_compat_lock_level(int level); | 313 | static inline int ocfs2_highest_compat_lock_level(int level); |
| 345 | static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | 314 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, |
| 346 | int new_level); | 315 | int new_level); |
| 347 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | 316 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, |
| 348 | struct ocfs2_lock_res *lockres, | 317 | struct ocfs2_lock_res *lockres, |
| 349 | int new_level, | 318 | int new_level, |
| 350 | int lvb); | 319 | int lvb, |
| 320 | unsigned int generation); | ||
| 351 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | 321 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, |
| 352 | struct ocfs2_lock_res *lockres); | 322 | struct ocfs2_lock_res *lockres); |
| 353 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | 323 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, |
| @@ -406,9 +376,9 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, | |||
| 406 | res->l_ops = ops; | 376 | res->l_ops = ops; |
| 407 | res->l_priv = priv; | 377 | res->l_priv = priv; |
| 408 | 378 | ||
| 409 | res->l_level = LKM_IVMODE; | 379 | res->l_level = DLM_LOCK_IV; |
| 410 | res->l_requested = LKM_IVMODE; | 380 | res->l_requested = DLM_LOCK_IV; |
| 411 | res->l_blocking = LKM_IVMODE; | 381 | res->l_blocking = DLM_LOCK_IV; |
| 412 | res->l_action = OCFS2_AST_INVALID; | 382 | res->l_action = OCFS2_AST_INVALID; |
| 413 | res->l_unlock_action = OCFS2_UNLOCK_INVALID; | 383 | res->l_unlock_action = OCFS2_UNLOCK_INVALID; |
| 414 | 384 | ||
| @@ -604,10 +574,10 @@ static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres, | |||
| 604 | BUG_ON(!lockres); | 574 | BUG_ON(!lockres); |
| 605 | 575 | ||
| 606 | switch(level) { | 576 | switch(level) { |
| 607 | case LKM_EXMODE: | 577 | case DLM_LOCK_EX: |
| 608 | lockres->l_ex_holders++; | 578 | lockres->l_ex_holders++; |
| 609 | break; | 579 | break; |
| 610 | case LKM_PRMODE: | 580 | case DLM_LOCK_PR: |
| 611 | lockres->l_ro_holders++; | 581 | lockres->l_ro_holders++; |
| 612 | break; | 582 | break; |
| 613 | default: | 583 | default: |
| @@ -625,11 +595,11 @@ static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, | |||
| 625 | BUG_ON(!lockres); | 595 | BUG_ON(!lockres); |
| 626 | 596 | ||
| 627 | switch(level) { | 597 | switch(level) { |
| 628 | case LKM_EXMODE: | 598 | case DLM_LOCK_EX: |
| 629 | BUG_ON(!lockres->l_ex_holders); | 599 | BUG_ON(!lockres->l_ex_holders); |
| 630 | lockres->l_ex_holders--; | 600 | lockres->l_ex_holders--; |
| 631 | break; | 601 | break; |
| 632 | case LKM_PRMODE: | 602 | case DLM_LOCK_PR: |
| 633 | BUG_ON(!lockres->l_ro_holders); | 603 | BUG_ON(!lockres->l_ro_holders); |
| 634 | lockres->l_ro_holders--; | 604 | lockres->l_ro_holders--; |
| 635 | break; | 605 | break; |
| @@ -644,12 +614,12 @@ static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres, | |||
| 644 | * lock types are added. */ | 614 | * lock types are added. */ |
| 645 | static inline int ocfs2_highest_compat_lock_level(int level) | 615 | static inline int ocfs2_highest_compat_lock_level(int level) |
| 646 | { | 616 | { |
| 647 | int new_level = LKM_EXMODE; | 617 | int new_level = DLM_LOCK_EX; |
| 648 | 618 | ||
| 649 | if (level == LKM_EXMODE) | 619 | if (level == DLM_LOCK_EX) |
| 650 | new_level = LKM_NLMODE; | 620 | new_level = DLM_LOCK_NL; |
| 651 | else if (level == LKM_PRMODE) | 621 | else if (level == DLM_LOCK_PR) |
| 652 | new_level = LKM_PRMODE; | 622 | new_level = DLM_LOCK_PR; |
| 653 | return new_level; | 623 | return new_level; |
| 654 | } | 624 | } |
| 655 | 625 | ||
| @@ -688,12 +658,12 @@ static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res | |||
| 688 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | 658 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); |
| 689 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); | 659 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED)); |
| 690 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); | 660 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED)); |
| 691 | BUG_ON(lockres->l_blocking <= LKM_NLMODE); | 661 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); |
| 692 | 662 | ||
| 693 | lockres->l_level = lockres->l_requested; | 663 | lockres->l_level = lockres->l_requested; |
| 694 | if (lockres->l_level <= | 664 | if (lockres->l_level <= |
| 695 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) { | 665 | ocfs2_highest_compat_lock_level(lockres->l_blocking)) { |
| 696 | lockres->l_blocking = LKM_NLMODE; | 666 | lockres->l_blocking = DLM_LOCK_NL; |
| 697 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); | 667 | lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED); |
| 698 | } | 668 | } |
| 699 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 669 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
| @@ -712,7 +682,7 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo | |||
| 712 | * information is already up to data. Convert from NL to | 682 | * information is already up to data. Convert from NL to |
| 713 | * *anything* however should mark ourselves as needing an | 683 | * *anything* however should mark ourselves as needing an |
| 714 | * update */ | 684 | * update */ |
| 715 | if (lockres->l_level == LKM_NLMODE && | 685 | if (lockres->l_level == DLM_LOCK_NL && |
| 716 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 686 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) |
| 717 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 687 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
| 718 | 688 | ||
| @@ -729,7 +699,7 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc | |||
| 729 | BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); | 699 | BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); |
| 730 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 700 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
| 731 | 701 | ||
| 732 | if (lockres->l_requested > LKM_NLMODE && | 702 | if (lockres->l_requested > DLM_LOCK_NL && |
| 733 | !(lockres->l_flags & OCFS2_LOCK_LOCAL) && | 703 | !(lockres->l_flags & OCFS2_LOCK_LOCAL) && |
| 734 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) | 704 | lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH) |
| 735 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); | 705 | lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH); |
| @@ -767,6 +737,113 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, | |||
| 767 | return needs_downconvert; | 737 | return needs_downconvert; |
| 768 | } | 738 | } |
| 769 | 739 | ||
| 740 | /* | ||
| 741 | * OCFS2_LOCK_PENDING and l_pending_gen. | ||
| 742 | * | ||
| 743 | * Why does OCFS2_LOCK_PENDING exist? To close a race between setting | ||
| 744 | * OCFS2_LOCK_BUSY and calling ocfs2_dlm_lock(). See ocfs2_unblock_lock() | ||
| 745 | * for more details on the race. | ||
| 746 | * | ||
| 747 | * OCFS2_LOCK_PENDING closes the race quite nicely. However, it introduces | ||
| 748 | * a race on itself. In o2dlm, we can get the ast before ocfs2_dlm_lock() | ||
| 749 | * returns. The ast clears OCFS2_LOCK_BUSY, and must therefore clear | ||
| 750 | * OCFS2_LOCK_PENDING at the same time. When ocfs2_dlm_lock() returns, | ||
| 751 | * the caller is going to try to clear PENDING again. If nothing else is | ||
| 752 | * happening, __lockres_clear_pending() sees PENDING is unset and does | ||
| 753 | * nothing. | ||
| 754 | * | ||
| 755 | * But what if another path (eg downconvert thread) has just started a | ||
| 756 | * new locking action? The other path has re-set PENDING. Our path | ||
| 757 | * cannot clear PENDING, because that will re-open the original race | ||
| 758 | * window. | ||
| 759 | * | ||
| 760 | * [Example] | ||
| 761 | * | ||
| 762 | * ocfs2_meta_lock() | ||
| 763 | * ocfs2_cluster_lock() | ||
| 764 | * set BUSY | ||
| 765 | * set PENDING | ||
| 766 | * drop l_lock | ||
| 767 | * ocfs2_dlm_lock() | ||
| 768 | * ocfs2_locking_ast() ocfs2_downconvert_thread() | ||
| 769 | * clear PENDING ocfs2_unblock_lock() | ||
| 770 | * take_l_lock | ||
| 771 | * !BUSY | ||
| 772 | * ocfs2_prepare_downconvert() | ||
| 773 | * set BUSY | ||
| 774 | * set PENDING | ||
| 775 | * drop l_lock | ||
| 776 | * take l_lock | ||
| 777 | * clear PENDING | ||
| 778 | * drop l_lock | ||
| 779 | * <window> | ||
| 780 | * ocfs2_dlm_lock() | ||
| 781 | * | ||
| 782 | * So as you can see, we now have a window where l_lock is not held, | ||
| 783 | * PENDING is not set, and ocfs2_dlm_lock() has not been called. | ||
| 784 | * | ||
| 785 | * The core problem is that ocfs2_cluster_lock() has cleared the PENDING | ||
| 786 | * set by ocfs2_prepare_downconvert(). That wasn't nice. | ||
| 787 | * | ||
| 788 | * To solve this we introduce l_pending_gen. A call to | ||
| 789 | * lockres_clear_pending() will only do so when it is passed a generation | ||
| 790 | * number that matches the lockres. lockres_set_pending() will return the | ||
| 791 | * current generation number. When ocfs2_cluster_lock() goes to clear | ||
| 792 | * PENDING, it passes the generation it got from set_pending(). In our | ||
| 793 | * example above, the generation numbers will *not* match. Thus, | ||
| 794 | * ocfs2_cluster_lock() will not clear the PENDING set by | ||
| 795 | * ocfs2_prepare_downconvert(). | ||
| 796 | */ | ||
| 797 | |||
| 798 | /* Unlocked version for ocfs2_locking_ast() */ | ||
| 799 | static void __lockres_clear_pending(struct ocfs2_lock_res *lockres, | ||
| 800 | unsigned int generation, | ||
| 801 | struct ocfs2_super *osb) | ||
| 802 | { | ||
| 803 | assert_spin_locked(&lockres->l_lock); | ||
| 804 | |||
| 805 | /* | ||
| 806 | * The ast and locking functions can race us here. The winner | ||
| 807 | * will clear pending, the loser will not. | ||
| 808 | */ | ||
| 809 | if (!(lockres->l_flags & OCFS2_LOCK_PENDING) || | ||
| 810 | (lockres->l_pending_gen != generation)) | ||
| 811 | return; | ||
| 812 | |||
| 813 | lockres_clear_flags(lockres, OCFS2_LOCK_PENDING); | ||
| 814 | lockres->l_pending_gen++; | ||
| 815 | |||
| 816 | /* | ||
| 817 | * The downconvert thread may have skipped us because we | ||
| 818 | * were PENDING. Wake it up. | ||
| 819 | */ | ||
| 820 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) | ||
| 821 | ocfs2_wake_downconvert_thread(osb); | ||
| 822 | } | ||
| 823 | |||
| 824 | /* Locked version for callers of ocfs2_dlm_lock() */ | ||
| 825 | static void lockres_clear_pending(struct ocfs2_lock_res *lockres, | ||
| 826 | unsigned int generation, | ||
| 827 | struct ocfs2_super *osb) | ||
| 828 | { | ||
| 829 | unsigned long flags; | ||
| 830 | |||
| 831 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
| 832 | __lockres_clear_pending(lockres, generation, osb); | ||
| 833 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
| 834 | } | ||
| 835 | |||
| 836 | static unsigned int lockres_set_pending(struct ocfs2_lock_res *lockres) | ||
| 837 | { | ||
| 838 | assert_spin_locked(&lockres->l_lock); | ||
| 839 | BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY)); | ||
| 840 | |||
| 841 | lockres_or_flags(lockres, OCFS2_LOCK_PENDING); | ||
| 842 | |||
| 843 | return lockres->l_pending_gen; | ||
| 844 | } | ||
| 845 | |||
| 846 | |||
| 770 | static void ocfs2_blocking_ast(void *opaque, int level) | 847 | static void ocfs2_blocking_ast(void *opaque, int level) |
| 771 | { | 848 | { |
| 772 | struct ocfs2_lock_res *lockres = opaque; | 849 | struct ocfs2_lock_res *lockres = opaque; |
| @@ -774,7 +851,7 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
| 774 | int needs_downconvert; | 851 | int needs_downconvert; |
| 775 | unsigned long flags; | 852 | unsigned long flags; |
| 776 | 853 | ||
| 777 | BUG_ON(level <= LKM_NLMODE); | 854 | BUG_ON(level <= DLM_LOCK_NL); |
| 778 | 855 | ||
| 779 | mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", | 856 | mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n", |
| 780 | lockres->l_name, level, lockres->l_level, | 857 | lockres->l_name, level, lockres->l_level, |
| @@ -801,14 +878,22 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
| 801 | static void ocfs2_locking_ast(void *opaque) | 878 | static void ocfs2_locking_ast(void *opaque) |
| 802 | { | 879 | { |
| 803 | struct ocfs2_lock_res *lockres = opaque; | 880 | struct ocfs2_lock_res *lockres = opaque; |
| 804 | struct dlm_lockstatus *lksb = &lockres->l_lksb; | 881 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); |
| 805 | unsigned long flags; | 882 | unsigned long flags; |
| 883 | int status; | ||
| 806 | 884 | ||
| 807 | spin_lock_irqsave(&lockres->l_lock, flags); | 885 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 808 | 886 | ||
| 809 | if (lksb->status != DLM_NORMAL) { | 887 | status = ocfs2_dlm_lock_status(&lockres->l_lksb); |
| 810 | mlog(ML_ERROR, "lockres %s: lksb status value of %u!\n", | 888 | |
| 811 | lockres->l_name, lksb->status); | 889 | if (status == -EAGAIN) { |
| 890 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | ||
| 891 | goto out; | ||
| 892 | } | ||
| 893 | |||
| 894 | if (status) { | ||
| 895 | mlog(ML_ERROR, "lockres %s: lksb status value of %d!\n", | ||
| 896 | lockres->l_name, status); | ||
| 812 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 897 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 813 | return; | 898 | return; |
| 814 | } | 899 | } |
| @@ -831,11 +916,23 @@ static void ocfs2_locking_ast(void *opaque) | |||
| 831 | lockres->l_unlock_action); | 916 | lockres->l_unlock_action); |
| 832 | BUG(); | 917 | BUG(); |
| 833 | } | 918 | } |
| 834 | 919 | out: | |
| 835 | /* set it to something invalid so if we get called again we | 920 | /* set it to something invalid so if we get called again we |
| 836 | * can catch it. */ | 921 | * can catch it. */ |
| 837 | lockres->l_action = OCFS2_AST_INVALID; | 922 | lockres->l_action = OCFS2_AST_INVALID; |
| 838 | 923 | ||
| 924 | /* Did we try to cancel this lock? Clear that state */ | ||
| 925 | if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) | ||
| 926 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | ||
| 927 | |||
| 928 | /* | ||
| 929 | * We may have beaten the locking functions here. We certainly | ||
| 930 | * know that dlm_lock() has been called :-) | ||
| 931 | * Because we can't have two lock calls in flight at once, we | ||
| 932 | * can use lockres->l_pending_gen. | ||
| 933 | */ | ||
| 934 | __lockres_clear_pending(lockres, lockres->l_pending_gen, osb); | ||
| 935 | |||
| 839 | wake_up(&lockres->l_event); | 936 | wake_up(&lockres->l_event); |
| 840 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 937 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 841 | } | 938 | } |
| @@ -865,15 +962,15 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | |||
| 865 | static int ocfs2_lock_create(struct ocfs2_super *osb, | 962 | static int ocfs2_lock_create(struct ocfs2_super *osb, |
| 866 | struct ocfs2_lock_res *lockres, | 963 | struct ocfs2_lock_res *lockres, |
| 867 | int level, | 964 | int level, |
| 868 | int dlm_flags) | 965 | u32 dlm_flags) |
| 869 | { | 966 | { |
| 870 | int ret = 0; | 967 | int ret = 0; |
| 871 | enum dlm_status status = DLM_NORMAL; | ||
| 872 | unsigned long flags; | 968 | unsigned long flags; |
| 969 | unsigned int gen; | ||
| 873 | 970 | ||
| 874 | mlog_entry_void(); | 971 | mlog_entry_void(); |
| 875 | 972 | ||
| 876 | mlog(0, "lock %s, level = %d, flags = %d\n", lockres->l_name, level, | 973 | mlog(0, "lock %s, level = %d, flags = %u\n", lockres->l_name, level, |
| 877 | dlm_flags); | 974 | dlm_flags); |
| 878 | 975 | ||
| 879 | spin_lock_irqsave(&lockres->l_lock, flags); | 976 | spin_lock_irqsave(&lockres->l_lock, flags); |
| @@ -886,24 +983,23 @@ static int ocfs2_lock_create(struct ocfs2_super *osb, | |||
| 886 | lockres->l_action = OCFS2_AST_ATTACH; | 983 | lockres->l_action = OCFS2_AST_ATTACH; |
| 887 | lockres->l_requested = level; | 984 | lockres->l_requested = level; |
| 888 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 985 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
| 986 | gen = lockres_set_pending(lockres); | ||
| 889 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 987 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 890 | 988 | ||
| 891 | status = dlmlock(osb->dlm, | 989 | ret = ocfs2_dlm_lock(osb->cconn, |
| 892 | level, | 990 | level, |
| 893 | &lockres->l_lksb, | 991 | &lockres->l_lksb, |
| 894 | dlm_flags, | 992 | dlm_flags, |
| 895 | lockres->l_name, | 993 | lockres->l_name, |
| 896 | OCFS2_LOCK_ID_MAX_LEN - 1, | 994 | OCFS2_LOCK_ID_MAX_LEN - 1, |
| 897 | ocfs2_locking_ast, | 995 | lockres); |
| 898 | lockres, | 996 | lockres_clear_pending(lockres, gen, osb); |
| 899 | ocfs2_blocking_ast); | 997 | if (ret) { |
| 900 | if (status != DLM_NORMAL) { | 998 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
| 901 | ocfs2_log_dlm_error("dlmlock", status, lockres); | ||
| 902 | ret = -EINVAL; | ||
| 903 | ocfs2_recover_from_dlm_error(lockres, 1); | 999 | ocfs2_recover_from_dlm_error(lockres, 1); |
| 904 | } | 1000 | } |
| 905 | 1001 | ||
| 906 | mlog(0, "lock %s, successfull return from dlmlock\n", lockres->l_name); | 1002 | mlog(0, "lock %s, return from ocfs2_dlm_lock\n", lockres->l_name); |
| 907 | 1003 | ||
| 908 | bail: | 1004 | bail: |
| 909 | mlog_exit(ret); | 1005 | mlog_exit(ret); |
| @@ -1016,21 +1112,22 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, | |||
| 1016 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, | 1112 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, |
| 1017 | struct ocfs2_lock_res *lockres, | 1113 | struct ocfs2_lock_res *lockres, |
| 1018 | int level, | 1114 | int level, |
| 1019 | int lkm_flags, | 1115 | u32 lkm_flags, |
| 1020 | int arg_flags) | 1116 | int arg_flags) |
| 1021 | { | 1117 | { |
| 1022 | struct ocfs2_mask_waiter mw; | 1118 | struct ocfs2_mask_waiter mw; |
| 1023 | enum dlm_status status; | ||
| 1024 | int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); | 1119 | int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); |
| 1025 | int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ | 1120 | int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */ |
| 1026 | unsigned long flags; | 1121 | unsigned long flags; |
| 1122 | unsigned int gen; | ||
| 1123 | int noqueue_attempted = 0; | ||
| 1027 | 1124 | ||
| 1028 | mlog_entry_void(); | 1125 | mlog_entry_void(); |
| 1029 | 1126 | ||
| 1030 | ocfs2_init_mask_waiter(&mw); | 1127 | ocfs2_init_mask_waiter(&mw); |
| 1031 | 1128 | ||
| 1032 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 1129 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) |
| 1033 | lkm_flags |= LKM_VALBLK; | 1130 | lkm_flags |= DLM_LKF_VALBLK; |
| 1034 | 1131 | ||
| 1035 | again: | 1132 | again: |
| 1036 | wait = 0; | 1133 | wait = 0; |
| @@ -1068,52 +1165,56 @@ again: | |||
| 1068 | } | 1165 | } |
| 1069 | 1166 | ||
| 1070 | if (level > lockres->l_level) { | 1167 | if (level > lockres->l_level) { |
| 1168 | if (noqueue_attempted > 0) { | ||
| 1169 | ret = -EAGAIN; | ||
| 1170 | goto unlock; | ||
| 1171 | } | ||
| 1172 | if (lkm_flags & DLM_LKF_NOQUEUE) | ||
| 1173 | noqueue_attempted = 1; | ||
| 1174 | |||
| 1071 | if (lockres->l_action != OCFS2_AST_INVALID) | 1175 | if (lockres->l_action != OCFS2_AST_INVALID) |
| 1072 | mlog(ML_ERROR, "lockres %s has action %u pending\n", | 1176 | mlog(ML_ERROR, "lockres %s has action %u pending\n", |
| 1073 | lockres->l_name, lockres->l_action); | 1177 | lockres->l_name, lockres->l_action); |
| 1074 | 1178 | ||
| 1075 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | 1179 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { |
| 1076 | lockres->l_action = OCFS2_AST_ATTACH; | 1180 | lockres->l_action = OCFS2_AST_ATTACH; |
| 1077 | lkm_flags &= ~LKM_CONVERT; | 1181 | lkm_flags &= ~DLM_LKF_CONVERT; |
| 1078 | } else { | 1182 | } else { |
| 1079 | lockres->l_action = OCFS2_AST_CONVERT; | 1183 | lockres->l_action = OCFS2_AST_CONVERT; |
| 1080 | lkm_flags |= LKM_CONVERT; | 1184 | lkm_flags |= DLM_LKF_CONVERT; |
| 1081 | } | 1185 | } |
| 1082 | 1186 | ||
| 1083 | lockres->l_requested = level; | 1187 | lockres->l_requested = level; |
| 1084 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 1188 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
| 1189 | gen = lockres_set_pending(lockres); | ||
| 1085 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1190 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 1086 | 1191 | ||
| 1087 | BUG_ON(level == LKM_IVMODE); | 1192 | BUG_ON(level == DLM_LOCK_IV); |
| 1088 | BUG_ON(level == LKM_NLMODE); | 1193 | BUG_ON(level == DLM_LOCK_NL); |
| 1089 | 1194 | ||
| 1090 | mlog(0, "lock %s, convert from %d to level = %d\n", | 1195 | mlog(0, "lock %s, convert from %d to level = %d\n", |
| 1091 | lockres->l_name, lockres->l_level, level); | 1196 | lockres->l_name, lockres->l_level, level); |
| 1092 | 1197 | ||
| 1093 | /* call dlm_lock to upgrade lock now */ | 1198 | /* call dlm_lock to upgrade lock now */ |
| 1094 | status = dlmlock(osb->dlm, | 1199 | ret = ocfs2_dlm_lock(osb->cconn, |
| 1095 | level, | 1200 | level, |
| 1096 | &lockres->l_lksb, | 1201 | &lockres->l_lksb, |
| 1097 | lkm_flags, | 1202 | lkm_flags, |
| 1098 | lockres->l_name, | 1203 | lockres->l_name, |
| 1099 | OCFS2_LOCK_ID_MAX_LEN - 1, | 1204 | OCFS2_LOCK_ID_MAX_LEN - 1, |
| 1100 | ocfs2_locking_ast, | 1205 | lockres); |
| 1101 | lockres, | 1206 | lockres_clear_pending(lockres, gen, osb); |
| 1102 | ocfs2_blocking_ast); | 1207 | if (ret) { |
| 1103 | if (status != DLM_NORMAL) { | 1208 | if (!(lkm_flags & DLM_LKF_NOQUEUE) || |
| 1104 | if ((lkm_flags & LKM_NOQUEUE) && | 1209 | (ret != -EAGAIN)) { |
| 1105 | (status == DLM_NOTQUEUED)) | 1210 | ocfs2_log_dlm_error("ocfs2_dlm_lock", |
| 1106 | ret = -EAGAIN; | 1211 | ret, lockres); |
| 1107 | else { | ||
| 1108 | ocfs2_log_dlm_error("dlmlock", status, | ||
| 1109 | lockres); | ||
| 1110 | ret = -EINVAL; | ||
| 1111 | } | 1212 | } |
| 1112 | ocfs2_recover_from_dlm_error(lockres, 1); | 1213 | ocfs2_recover_from_dlm_error(lockres, 1); |
| 1113 | goto out; | 1214 | goto out; |
| 1114 | } | 1215 | } |
| 1115 | 1216 | ||
| 1116 | mlog(0, "lock %s, successfull return from dlmlock\n", | 1217 | mlog(0, "lock %s, successfull return from ocfs2_dlm_lock\n", |
| 1117 | lockres->l_name); | 1218 | lockres->l_name); |
| 1118 | 1219 | ||
| 1119 | /* At this point we've gone inside the dlm and need to | 1220 | /* At this point we've gone inside the dlm and need to |
| @@ -1177,9 +1278,9 @@ static int ocfs2_create_new_lock(struct ocfs2_super *osb, | |||
| 1177 | int ex, | 1278 | int ex, |
| 1178 | int local) | 1279 | int local) |
| 1179 | { | 1280 | { |
| 1180 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 1281 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 1181 | unsigned long flags; | 1282 | unsigned long flags; |
| 1182 | int lkm_flags = local ? LKM_LOCAL : 0; | 1283 | u32 lkm_flags = local ? DLM_LKF_LOCAL : 0; |
| 1183 | 1284 | ||
| 1184 | spin_lock_irqsave(&lockres->l_lock, flags); | 1285 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 1185 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 1286 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
| @@ -1222,7 +1323,7 @@ int ocfs2_create_new_inode_locks(struct inode *inode) | |||
| 1222 | } | 1323 | } |
| 1223 | 1324 | ||
| 1224 | /* | 1325 | /* |
| 1225 | * We don't want to use LKM_LOCAL on a meta data lock as they | 1326 | * We don't want to use DLM_LKF_LOCAL on a meta data lock as they |
| 1226 | * don't use a generation in their lock names. | 1327 | * don't use a generation in their lock names. |
| 1227 | */ | 1328 | */ |
| 1228 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); | 1329 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); |
| @@ -1261,7 +1362,7 @@ int ocfs2_rw_lock(struct inode *inode, int write) | |||
| 1261 | 1362 | ||
| 1262 | lockres = &OCFS2_I(inode)->ip_rw_lockres; | 1363 | lockres = &OCFS2_I(inode)->ip_rw_lockres; |
| 1263 | 1364 | ||
| 1264 | level = write ? LKM_EXMODE : LKM_PRMODE; | 1365 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 1265 | 1366 | ||
| 1266 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, | 1367 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0, |
| 1267 | 0); | 1368 | 0); |
| @@ -1274,7 +1375,7 @@ int ocfs2_rw_lock(struct inode *inode, int write) | |||
| 1274 | 1375 | ||
| 1275 | void ocfs2_rw_unlock(struct inode *inode, int write) | 1376 | void ocfs2_rw_unlock(struct inode *inode, int write) |
| 1276 | { | 1377 | { |
| 1277 | int level = write ? LKM_EXMODE : LKM_PRMODE; | 1378 | int level = write ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 1278 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; | 1379 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres; |
| 1279 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1380 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 1280 | 1381 | ||
| @@ -1312,7 +1413,7 @@ int ocfs2_open_lock(struct inode *inode) | |||
| 1312 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 1413 | lockres = &OCFS2_I(inode)->ip_open_lockres; |
| 1313 | 1414 | ||
| 1314 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 1415 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, |
| 1315 | LKM_PRMODE, 0, 0); | 1416 | DLM_LOCK_PR, 0, 0); |
| 1316 | if (status < 0) | 1417 | if (status < 0) |
| 1317 | mlog_errno(status); | 1418 | mlog_errno(status); |
| 1318 | 1419 | ||
| @@ -1340,16 +1441,16 @@ int ocfs2_try_open_lock(struct inode *inode, int write) | |||
| 1340 | 1441 | ||
| 1341 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 1442 | lockres = &OCFS2_I(inode)->ip_open_lockres; |
| 1342 | 1443 | ||
| 1343 | level = write ? LKM_EXMODE : LKM_PRMODE; | 1444 | level = write ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 1344 | 1445 | ||
| 1345 | /* | 1446 | /* |
| 1346 | * The file system may already holding a PRMODE/EXMODE open lock. | 1447 | * The file system may already holding a PRMODE/EXMODE open lock. |
| 1347 | * Since we pass LKM_NOQUEUE, the request won't block waiting on | 1448 | * Since we pass DLM_LKF_NOQUEUE, the request won't block waiting on |
| 1348 | * other nodes and the -EAGAIN will indicate to the caller that | 1449 | * other nodes and the -EAGAIN will indicate to the caller that |
| 1349 | * this inode is still in use. | 1450 | * this inode is still in use. |
| 1350 | */ | 1451 | */ |
| 1351 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, | 1452 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, |
| 1352 | level, LKM_NOQUEUE, 0); | 1453 | level, DLM_LKF_NOQUEUE, 0); |
| 1353 | 1454 | ||
| 1354 | out: | 1455 | out: |
| 1355 | mlog_exit(status); | 1456 | mlog_exit(status); |
| @@ -1374,10 +1475,10 @@ void ocfs2_open_unlock(struct inode *inode) | |||
| 1374 | 1475 | ||
| 1375 | if(lockres->l_ro_holders) | 1476 | if(lockres->l_ro_holders) |
| 1376 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 1477 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, |
| 1377 | LKM_PRMODE); | 1478 | DLM_LOCK_PR); |
| 1378 | if(lockres->l_ex_holders) | 1479 | if(lockres->l_ex_holders) |
| 1379 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, | 1480 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, |
| 1380 | LKM_EXMODE); | 1481 | DLM_LOCK_EX); |
| 1381 | 1482 | ||
| 1382 | out: | 1483 | out: |
| 1383 | mlog_exit_void(); | 1484 | mlog_exit_void(); |
| @@ -1464,7 +1565,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
| 1464 | ocfs2_init_mask_waiter(&mw); | 1565 | ocfs2_init_mask_waiter(&mw); |
| 1465 | 1566 | ||
| 1466 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || | 1567 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || |
| 1467 | (lockres->l_level > LKM_NLMODE)) { | 1568 | (lockres->l_level > DLM_LOCK_NL)) { |
| 1468 | mlog(ML_ERROR, | 1569 | mlog(ML_ERROR, |
| 1469 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " | 1570 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " |
| 1470 | "level: %u\n", lockres->l_name, lockres->l_flags, | 1571 | "level: %u\n", lockres->l_name, lockres->l_flags, |
| @@ -1503,14 +1604,12 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
| 1503 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 1604 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
| 1504 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1605 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 1505 | 1606 | ||
| 1506 | ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags, | 1607 | ret = ocfs2_dlm_lock(osb->cconn, level, &lockres->l_lksb, lkm_flags, |
| 1507 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, | 1608 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, |
| 1508 | ocfs2_locking_ast, lockres, ocfs2_blocking_ast); | 1609 | lockres); |
| 1509 | if (ret != DLM_NORMAL) { | 1610 | if (ret) { |
| 1510 | if (trylock && ret == DLM_NOTQUEUED) | 1611 | if (!trylock || (ret != -EAGAIN)) { |
| 1511 | ret = -EAGAIN; | 1612 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
| 1512 | else { | ||
| 1513 | ocfs2_log_dlm_error("dlmlock", ret, lockres); | ||
| 1514 | ret = -EINVAL; | 1613 | ret = -EINVAL; |
| 1515 | } | 1614 | } |
| 1516 | 1615 | ||
| @@ -1537,6 +1636,10 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock) | |||
| 1537 | * to just bubble sucess back up to the user. | 1636 | * to just bubble sucess back up to the user. |
| 1538 | */ | 1637 | */ |
| 1539 | ret = ocfs2_flock_handle_signal(lockres, level); | 1638 | ret = ocfs2_flock_handle_signal(lockres, level); |
| 1639 | } else if (!ret && (level > lockres->l_level)) { | ||
| 1640 | /* Trylock failed asynchronously */ | ||
| 1641 | BUG_ON(!trylock); | ||
| 1642 | ret = -EAGAIN; | ||
| 1540 | } | 1643 | } |
| 1541 | 1644 | ||
| 1542 | out: | 1645 | out: |
| @@ -1549,6 +1652,7 @@ out: | |||
| 1549 | void ocfs2_file_unlock(struct file *file) | 1652 | void ocfs2_file_unlock(struct file *file) |
| 1550 | { | 1653 | { |
| 1551 | int ret; | 1654 | int ret; |
| 1655 | unsigned int gen; | ||
| 1552 | unsigned long flags; | 1656 | unsigned long flags; |
| 1553 | struct ocfs2_file_private *fp = file->private_data; | 1657 | struct ocfs2_file_private *fp = file->private_data; |
| 1554 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | 1658 | struct ocfs2_lock_res *lockres = &fp->fp_flock; |
| @@ -1572,13 +1676,13 @@ void ocfs2_file_unlock(struct file *file) | |||
| 1572 | * Fake a blocking ast for the downconvert code. | 1676 | * Fake a blocking ast for the downconvert code. |
| 1573 | */ | 1677 | */ |
| 1574 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | 1678 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); |
| 1575 | lockres->l_blocking = LKM_EXMODE; | 1679 | lockres->l_blocking = DLM_LOCK_EX; |
| 1576 | 1680 | ||
| 1577 | ocfs2_prepare_downconvert(lockres, LKM_NLMODE); | 1681 | gen = ocfs2_prepare_downconvert(lockres, LKM_NLMODE); |
| 1578 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | 1682 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
| 1579 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1683 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 1580 | 1684 | ||
| 1581 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0); | 1685 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0, gen); |
| 1582 | if (ret) { | 1686 | if (ret) { |
| 1583 | mlog_errno(ret); | 1687 | mlog_errno(ret); |
| 1584 | return; | 1688 | return; |
| @@ -1601,11 +1705,11 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | |||
| 1601 | * condition. */ | 1705 | * condition. */ |
| 1602 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 1706 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { |
| 1603 | switch(lockres->l_blocking) { | 1707 | switch(lockres->l_blocking) { |
| 1604 | case LKM_EXMODE: | 1708 | case DLM_LOCK_EX: |
| 1605 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) | 1709 | if (!lockres->l_ex_holders && !lockres->l_ro_holders) |
| 1606 | kick = 1; | 1710 | kick = 1; |
| 1607 | break; | 1711 | break; |
| 1608 | case LKM_PRMODE: | 1712 | case DLM_LOCK_PR: |
| 1609 | if (!lockres->l_ex_holders) | 1713 | if (!lockres->l_ex_holders) |
| 1610 | kick = 1; | 1714 | kick = 1; |
| 1611 | break; | 1715 | break; |
| @@ -1648,7 +1752,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode) | |||
| 1648 | 1752 | ||
| 1649 | mlog_entry_void(); | 1753 | mlog_entry_void(); |
| 1650 | 1754 | ||
| 1651 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1755 | lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); |
| 1652 | 1756 | ||
| 1653 | /* | 1757 | /* |
| 1654 | * Invalidate the LVB of a deleted inode - this way other | 1758 | * Invalidate the LVB of a deleted inode - this way other |
| @@ -1700,7 +1804,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | |||
| 1700 | 1804 | ||
| 1701 | mlog_meta_lvb(0, lockres); | 1805 | mlog_meta_lvb(0, lockres); |
| 1702 | 1806 | ||
| 1703 | lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1807 | lvb = (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); |
| 1704 | 1808 | ||
| 1705 | /* We're safe here without the lockres lock... */ | 1809 | /* We're safe here without the lockres lock... */ |
| 1706 | spin_lock(&oi->ip_lock); | 1810 | spin_lock(&oi->ip_lock); |
| @@ -1735,7 +1839,8 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | |||
| 1735 | static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, | 1839 | static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, |
| 1736 | struct ocfs2_lock_res *lockres) | 1840 | struct ocfs2_lock_res *lockres) |
| 1737 | { | 1841 | { |
| 1738 | struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb; | 1842 | struct ocfs2_meta_lvb *lvb = |
| 1843 | (struct ocfs2_meta_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); | ||
| 1739 | 1844 | ||
| 1740 | if (lvb->lvb_version == OCFS2_LVB_VERSION | 1845 | if (lvb->lvb_version == OCFS2_LVB_VERSION |
| 1741 | && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) | 1846 | && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) |
| @@ -1923,7 +2028,8 @@ int ocfs2_inode_lock_full(struct inode *inode, | |||
| 1923 | int ex, | 2028 | int ex, |
| 1924 | int arg_flags) | 2029 | int arg_flags) |
| 1925 | { | 2030 | { |
| 1926 | int status, level, dlm_flags, acquired; | 2031 | int status, level, acquired; |
| 2032 | u32 dlm_flags; | ||
| 1927 | struct ocfs2_lock_res *lockres = NULL; | 2033 | struct ocfs2_lock_res *lockres = NULL; |
| 1928 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2034 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 1929 | struct buffer_head *local_bh = NULL; | 2035 | struct buffer_head *local_bh = NULL; |
| @@ -1950,14 +2056,13 @@ int ocfs2_inode_lock_full(struct inode *inode, | |||
| 1950 | goto local; | 2056 | goto local; |
| 1951 | 2057 | ||
| 1952 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 2058 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) |
| 1953 | wait_event(osb->recovery_event, | 2059 | ocfs2_wait_for_recovery(osb); |
| 1954 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | ||
| 1955 | 2060 | ||
| 1956 | lockres = &OCFS2_I(inode)->ip_inode_lockres; | 2061 | lockres = &OCFS2_I(inode)->ip_inode_lockres; |
| 1957 | level = ex ? LKM_EXMODE : LKM_PRMODE; | 2062 | level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 1958 | dlm_flags = 0; | 2063 | dlm_flags = 0; |
| 1959 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) | 2064 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) |
| 1960 | dlm_flags |= LKM_NOQUEUE; | 2065 | dlm_flags |= DLM_LKF_NOQUEUE; |
| 1961 | 2066 | ||
| 1962 | status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); | 2067 | status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); |
| 1963 | if (status < 0) { | 2068 | if (status < 0) { |
| @@ -1974,8 +2079,7 @@ int ocfs2_inode_lock_full(struct inode *inode, | |||
| 1974 | * committed to owning this lock so we don't allow signals to | 2079 | * committed to owning this lock so we don't allow signals to |
| 1975 | * abort the operation. */ | 2080 | * abort the operation. */ |
| 1976 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) | 2081 | if (!(arg_flags & OCFS2_META_LOCK_RECOVERY)) |
| 1977 | wait_event(osb->recovery_event, | 2082 | ocfs2_wait_for_recovery(osb); |
| 1978 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | ||
| 1979 | 2083 | ||
| 1980 | local: | 2084 | local: |
| 1981 | /* | 2085 | /* |
| @@ -2109,7 +2213,7 @@ int ocfs2_inode_lock_atime(struct inode *inode, | |||
| 2109 | void ocfs2_inode_unlock(struct inode *inode, | 2213 | void ocfs2_inode_unlock(struct inode *inode, |
| 2110 | int ex) | 2214 | int ex) |
| 2111 | { | 2215 | { |
| 2112 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2216 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 2113 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; | 2217 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; |
| 2114 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2218 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 2115 | 2219 | ||
| @@ -2130,10 +2234,8 @@ int ocfs2_super_lock(struct ocfs2_super *osb, | |||
| 2130 | int ex) | 2234 | int ex) |
| 2131 | { | 2235 | { |
| 2132 | int status = 0; | 2236 | int status = 0; |
| 2133 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2237 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 2134 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 2238 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; |
| 2135 | struct buffer_head *bh; | ||
| 2136 | struct ocfs2_slot_info *si = osb->slot_info; | ||
| 2137 | 2239 | ||
| 2138 | mlog_entry_void(); | 2240 | mlog_entry_void(); |
| 2139 | 2241 | ||
| @@ -2159,11 +2261,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb, | |||
| 2159 | goto bail; | 2261 | goto bail; |
| 2160 | } | 2262 | } |
| 2161 | if (status) { | 2263 | if (status) { |
| 2162 | bh = si->si_bh; | 2264 | status = ocfs2_refresh_slot_info(osb); |
| 2163 | status = ocfs2_read_block(osb, bh->b_blocknr, &bh, 0, | ||
| 2164 | si->si_inode); | ||
| 2165 | if (status == 0) | ||
| 2166 | ocfs2_update_slot_info(si); | ||
| 2167 | 2265 | ||
| 2168 | ocfs2_complete_lock_res_refresh(lockres, status); | 2266 | ocfs2_complete_lock_res_refresh(lockres, status); |
| 2169 | 2267 | ||
| @@ -2178,7 +2276,7 @@ bail: | |||
| 2178 | void ocfs2_super_unlock(struct ocfs2_super *osb, | 2276 | void ocfs2_super_unlock(struct ocfs2_super *osb, |
| 2179 | int ex) | 2277 | int ex) |
| 2180 | { | 2278 | { |
| 2181 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2279 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 2182 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; | 2280 | struct ocfs2_lock_res *lockres = &osb->osb_super_lockres; |
| 2183 | 2281 | ||
| 2184 | if (!ocfs2_mount_local(osb)) | 2282 | if (!ocfs2_mount_local(osb)) |
| @@ -2196,7 +2294,7 @@ int ocfs2_rename_lock(struct ocfs2_super *osb) | |||
| 2196 | if (ocfs2_mount_local(osb)) | 2294 | if (ocfs2_mount_local(osb)) |
| 2197 | return 0; | 2295 | return 0; |
| 2198 | 2296 | ||
| 2199 | status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0); | 2297 | status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); |
| 2200 | if (status < 0) | 2298 | if (status < 0) |
| 2201 | mlog_errno(status); | 2299 | mlog_errno(status); |
| 2202 | 2300 | ||
| @@ -2208,13 +2306,13 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb) | |||
| 2208 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; | 2306 | struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres; |
| 2209 | 2307 | ||
| 2210 | if (!ocfs2_mount_local(osb)) | 2308 | if (!ocfs2_mount_local(osb)) |
| 2211 | ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE); | 2309 | ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); |
| 2212 | } | 2310 | } |
| 2213 | 2311 | ||
| 2214 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) | 2312 | int ocfs2_dentry_lock(struct dentry *dentry, int ex) |
| 2215 | { | 2313 | { |
| 2216 | int ret; | 2314 | int ret; |
| 2217 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2315 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 2218 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 2316 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; |
| 2219 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 2317 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
| 2220 | 2318 | ||
| @@ -2235,7 +2333,7 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex) | |||
| 2235 | 2333 | ||
| 2236 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex) | 2334 | void ocfs2_dentry_unlock(struct dentry *dentry, int ex) |
| 2237 | { | 2335 | { |
| 2238 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2336 | int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; |
| 2239 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 2337 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; |
| 2240 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 2338 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
| 2241 | 2339 | ||
| @@ -2400,7 +2498,7 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) | |||
| 2400 | lockres->l_blocking); | 2498 | lockres->l_blocking); |
| 2401 | 2499 | ||
| 2402 | /* Dump the raw LVB */ | 2500 | /* Dump the raw LVB */ |
| 2403 | lvb = lockres->l_lksb.lvb; | 2501 | lvb = ocfs2_dlm_lvb(&lockres->l_lksb); |
| 2404 | for(i = 0; i < DLM_LVB_LEN; i++) | 2502 | for(i = 0; i < DLM_LVB_LEN; i++) |
| 2405 | seq_printf(m, "0x%x\t", lvb[i]); | 2503 | seq_printf(m, "0x%x\t", lvb[i]); |
| 2406 | 2504 | ||
| @@ -2504,13 +2602,14 @@ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) | |||
| 2504 | int ocfs2_dlm_init(struct ocfs2_super *osb) | 2602 | int ocfs2_dlm_init(struct ocfs2_super *osb) |
| 2505 | { | 2603 | { |
| 2506 | int status = 0; | 2604 | int status = 0; |
| 2507 | u32 dlm_key; | 2605 | struct ocfs2_cluster_connection *conn = NULL; |
| 2508 | struct dlm_ctxt *dlm = NULL; | ||
| 2509 | 2606 | ||
| 2510 | mlog_entry_void(); | 2607 | mlog_entry_void(); |
| 2511 | 2608 | ||
| 2512 | if (ocfs2_mount_local(osb)) | 2609 | if (ocfs2_mount_local(osb)) { |
| 2610 | osb->node_num = 0; | ||
| 2513 | goto local; | 2611 | goto local; |
| 2612 | } | ||
| 2514 | 2613 | ||
| 2515 | status = ocfs2_dlm_init_debug(osb); | 2614 | status = ocfs2_dlm_init_debug(osb); |
| 2516 | if (status < 0) { | 2615 | if (status < 0) { |
| @@ -2527,26 +2626,31 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) | |||
| 2527 | goto bail; | 2626 | goto bail; |
| 2528 | } | 2627 | } |
| 2529 | 2628 | ||
| 2530 | /* used by the dlm code to make message headers unique, each | ||
| 2531 | * node in this domain must agree on this. */ | ||
| 2532 | dlm_key = crc32_le(0, osb->uuid_str, strlen(osb->uuid_str)); | ||
| 2533 | |||
| 2534 | /* for now, uuid == domain */ | 2629 | /* for now, uuid == domain */ |
| 2535 | dlm = dlm_register_domain(osb->uuid_str, dlm_key, | 2630 | status = ocfs2_cluster_connect(osb->osb_cluster_stack, |
| 2536 | &osb->osb_locking_proto); | 2631 | osb->uuid_str, |
| 2537 | if (IS_ERR(dlm)) { | 2632 | strlen(osb->uuid_str), |
| 2538 | status = PTR_ERR(dlm); | 2633 | ocfs2_do_node_down, osb, |
| 2634 | &conn); | ||
| 2635 | if (status) { | ||
| 2539 | mlog_errno(status); | 2636 | mlog_errno(status); |
| 2540 | goto bail; | 2637 | goto bail; |
| 2541 | } | 2638 | } |
| 2542 | 2639 | ||
| 2543 | dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb); | 2640 | status = ocfs2_cluster_this_node(&osb->node_num); |
| 2641 | if (status < 0) { | ||
| 2642 | mlog_errno(status); | ||
| 2643 | mlog(ML_ERROR, | ||
| 2644 | "could not find this host's node number\n"); | ||
| 2645 | ocfs2_cluster_disconnect(conn, 0); | ||
| 2646 | goto bail; | ||
| 2647 | } | ||
| 2544 | 2648 | ||
| 2545 | local: | 2649 | local: |
| 2546 | ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); | 2650 | ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb); |
| 2547 | ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); | 2651 | ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb); |
| 2548 | 2652 | ||
| 2549 | osb->dlm = dlm; | 2653 | osb->cconn = conn; |
| 2550 | 2654 | ||
| 2551 | status = 0; | 2655 | status = 0; |
| 2552 | bail: | 2656 | bail: |
| @@ -2560,14 +2664,19 @@ bail: | |||
| 2560 | return status; | 2664 | return status; |
| 2561 | } | 2665 | } |
| 2562 | 2666 | ||
| 2563 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | 2667 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb, |
| 2668 | int hangup_pending) | ||
| 2564 | { | 2669 | { |
| 2565 | mlog_entry_void(); | 2670 | mlog_entry_void(); |
| 2566 | 2671 | ||
| 2567 | dlm_unregister_eviction_cb(&osb->osb_eviction_cb); | ||
| 2568 | |||
| 2569 | ocfs2_drop_osb_locks(osb); | 2672 | ocfs2_drop_osb_locks(osb); |
| 2570 | 2673 | ||
| 2674 | /* | ||
| 2675 | * Now that we have dropped all locks and ocfs2_dismount_volume() | ||
| 2676 | * has disabled recovery, the DLM won't be talking to us. It's | ||
| 2677 | * safe to tear things down before disconnecting the cluster. | ||
| 2678 | */ | ||
| 2679 | |||
| 2571 | if (osb->dc_task) { | 2680 | if (osb->dc_task) { |
| 2572 | kthread_stop(osb->dc_task); | 2681 | kthread_stop(osb->dc_task); |
| 2573 | osb->dc_task = NULL; | 2682 | osb->dc_task = NULL; |
| @@ -2576,15 +2685,15 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | |||
| 2576 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 2685 | ocfs2_lock_res_free(&osb->osb_super_lockres); |
| 2577 | ocfs2_lock_res_free(&osb->osb_rename_lockres); | 2686 | ocfs2_lock_res_free(&osb->osb_rename_lockres); |
| 2578 | 2687 | ||
| 2579 | dlm_unregister_domain(osb->dlm); | 2688 | ocfs2_cluster_disconnect(osb->cconn, hangup_pending); |
| 2580 | osb->dlm = NULL; | 2689 | osb->cconn = NULL; |
| 2581 | 2690 | ||
| 2582 | ocfs2_dlm_shutdown_debug(osb); | 2691 | ocfs2_dlm_shutdown_debug(osb); |
| 2583 | 2692 | ||
| 2584 | mlog_exit_void(); | 2693 | mlog_exit_void(); |
| 2585 | } | 2694 | } |
| 2586 | 2695 | ||
| 2587 | static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) | 2696 | static void ocfs2_unlock_ast(void *opaque, int error) |
| 2588 | { | 2697 | { |
| 2589 | struct ocfs2_lock_res *lockres = opaque; | 2698 | struct ocfs2_lock_res *lockres = opaque; |
| 2590 | unsigned long flags; | 2699 | unsigned long flags; |
| @@ -2595,24 +2704,9 @@ static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) | |||
| 2595 | lockres->l_unlock_action); | 2704 | lockres->l_unlock_action); |
| 2596 | 2705 | ||
| 2597 | spin_lock_irqsave(&lockres->l_lock, flags); | 2706 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 2598 | /* We tried to cancel a convert request, but it was already | 2707 | if (error) { |
| 2599 | * granted. All we want to do here is clear our unlock | 2708 | mlog(ML_ERROR, "Dlm passes error %d for lock %s, " |
| 2600 | * state. The wake_up call done at the bottom is redundant | 2709 | "unlock_action %d\n", error, lockres->l_name, |
| 2601 | * (ocfs2_prepare_cancel_convert doesn't sleep on this) but doesn't | ||
| 2602 | * hurt anything anyway */ | ||
| 2603 | if (status == DLM_CANCELGRANT && | ||
| 2604 | lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) { | ||
| 2605 | mlog(0, "Got cancelgrant for %s\n", lockres->l_name); | ||
| 2606 | |||
| 2607 | /* We don't clear the busy flag in this case as it | ||
| 2608 | * should have been cleared by the ast which the dlm | ||
| 2609 | * has called. */ | ||
| 2610 | goto complete_unlock; | ||
| 2611 | } | ||
| 2612 | |||
| 2613 | if (status != DLM_NORMAL) { | ||
| 2614 | mlog(ML_ERROR, "Dlm passes status %d for lock %s, " | ||
| 2615 | "unlock_action %d\n", status, lockres->l_name, | ||
| 2616 | lockres->l_unlock_action); | 2710 | lockres->l_unlock_action); |
| 2617 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2711 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 2618 | return; | 2712 | return; |
| @@ -2624,14 +2718,13 @@ static void ocfs2_unlock_ast(void *opaque, enum dlm_status status) | |||
| 2624 | lockres->l_action = OCFS2_AST_INVALID; | 2718 | lockres->l_action = OCFS2_AST_INVALID; |
| 2625 | break; | 2719 | break; |
| 2626 | case OCFS2_UNLOCK_DROP_LOCK: | 2720 | case OCFS2_UNLOCK_DROP_LOCK: |
| 2627 | lockres->l_level = LKM_IVMODE; | 2721 | lockres->l_level = DLM_LOCK_IV; |
| 2628 | break; | 2722 | break; |
| 2629 | default: | 2723 | default: |
| 2630 | BUG(); | 2724 | BUG(); |
| 2631 | } | 2725 | } |
| 2632 | 2726 | ||
| 2633 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); | 2727 | lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); |
| 2634 | complete_unlock: | ||
| 2635 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; | 2728 | lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; |
| 2636 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 2729 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 2637 | 2730 | ||
| @@ -2643,16 +2736,16 @@ complete_unlock: | |||
| 2643 | static int ocfs2_drop_lock(struct ocfs2_super *osb, | 2736 | static int ocfs2_drop_lock(struct ocfs2_super *osb, |
| 2644 | struct ocfs2_lock_res *lockres) | 2737 | struct ocfs2_lock_res *lockres) |
| 2645 | { | 2738 | { |
| 2646 | enum dlm_status status; | 2739 | int ret; |
| 2647 | unsigned long flags; | 2740 | unsigned long flags; |
| 2648 | int lkm_flags = 0; | 2741 | u32 lkm_flags = 0; |
| 2649 | 2742 | ||
| 2650 | /* We didn't get anywhere near actually using this lockres. */ | 2743 | /* We didn't get anywhere near actually using this lockres. */ |
| 2651 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) | 2744 | if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) |
| 2652 | goto out; | 2745 | goto out; |
| 2653 | 2746 | ||
| 2654 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) | 2747 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) |
| 2655 | lkm_flags |= LKM_VALBLK; | 2748 | lkm_flags |= DLM_LKF_VALBLK; |
| 2656 | 2749 | ||
| 2657 | spin_lock_irqsave(&lockres->l_lock, flags); | 2750 | spin_lock_irqsave(&lockres->l_lock, flags); |
| 2658 | 2751 | ||
| @@ -2678,7 +2771,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, | |||
| 2678 | 2771 | ||
| 2679 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 2772 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { |
| 2680 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && | 2773 | if (lockres->l_flags & OCFS2_LOCK_ATTACHED && |
| 2681 | lockres->l_level == LKM_EXMODE && | 2774 | lockres->l_level == DLM_LOCK_EX && |
| 2682 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) | 2775 | !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) |
| 2683 | lockres->l_ops->set_lvb(lockres); | 2776 | lockres->l_ops->set_lvb(lockres); |
| 2684 | } | 2777 | } |
| @@ -2707,15 +2800,15 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb, | |||
| 2707 | 2800 | ||
| 2708 | mlog(0, "lock %s\n", lockres->l_name); | 2801 | mlog(0, "lock %s\n", lockres->l_name); |
| 2709 | 2802 | ||
| 2710 | status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags, | 2803 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, lkm_flags, |
| 2711 | ocfs2_unlock_ast, lockres); | 2804 | lockres); |
| 2712 | if (status != DLM_NORMAL) { | 2805 | if (ret) { |
| 2713 | ocfs2_log_dlm_error("dlmunlock", status, lockres); | 2806 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); |
| 2714 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); | 2807 | mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags); |
| 2715 | dlm_print_one_lock(lockres->l_lksb.lockid); | 2808 | ocfs2_dlm_dump_lksb(&lockres->l_lksb); |
| 2716 | BUG(); | 2809 | BUG(); |
| 2717 | } | 2810 | } |
| 2718 | mlog(0, "lock %s, successfull return from dlmunlock\n", | 2811 | mlog(0, "lock %s, successfull return from ocfs2_dlm_unlock\n", |
| 2719 | lockres->l_name); | 2812 | lockres->l_name); |
| 2720 | 2813 | ||
| 2721 | ocfs2_wait_on_busy_lock(lockres); | 2814 | ocfs2_wait_on_busy_lock(lockres); |
| @@ -2806,15 +2899,15 @@ int ocfs2_drop_inode_locks(struct inode *inode) | |||
| 2806 | return status; | 2899 | return status; |
| 2807 | } | 2900 | } |
| 2808 | 2901 | ||
| 2809 | static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | 2902 | static unsigned int ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, |
| 2810 | int new_level) | 2903 | int new_level) |
| 2811 | { | 2904 | { |
| 2812 | assert_spin_locked(&lockres->l_lock); | 2905 | assert_spin_locked(&lockres->l_lock); |
| 2813 | 2906 | ||
| 2814 | BUG_ON(lockres->l_blocking <= LKM_NLMODE); | 2907 | BUG_ON(lockres->l_blocking <= DLM_LOCK_NL); |
| 2815 | 2908 | ||
| 2816 | if (lockres->l_level <= new_level) { | 2909 | if (lockres->l_level <= new_level) { |
| 2817 | mlog(ML_ERROR, "lockres->l_level (%u) <= new_level (%u)\n", | 2910 | mlog(ML_ERROR, "lockres->l_level (%d) <= new_level (%d)\n", |
| 2818 | lockres->l_level, new_level); | 2911 | lockres->l_level, new_level); |
| 2819 | BUG(); | 2912 | BUG(); |
| 2820 | } | 2913 | } |
| @@ -2825,33 +2918,33 @@ static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | |||
| 2825 | lockres->l_action = OCFS2_AST_DOWNCONVERT; | 2918 | lockres->l_action = OCFS2_AST_DOWNCONVERT; |
| 2826 | lockres->l_requested = new_level; | 2919 | lockres->l_requested = new_level; |
| 2827 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 2920 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
| 2921 | return lockres_set_pending(lockres); | ||
| 2828 | } | 2922 | } |
| 2829 | 2923 | ||
| 2830 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | 2924 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, |
| 2831 | struct ocfs2_lock_res *lockres, | 2925 | struct ocfs2_lock_res *lockres, |
| 2832 | int new_level, | 2926 | int new_level, |
| 2833 | int lvb) | 2927 | int lvb, |
| 2928 | unsigned int generation) | ||
| 2834 | { | 2929 | { |
| 2835 | int ret, dlm_flags = LKM_CONVERT; | 2930 | int ret; |
| 2836 | enum dlm_status status; | 2931 | u32 dlm_flags = DLM_LKF_CONVERT; |
| 2837 | 2932 | ||
| 2838 | mlog_entry_void(); | 2933 | mlog_entry_void(); |
| 2839 | 2934 | ||
| 2840 | if (lvb) | 2935 | if (lvb) |
| 2841 | dlm_flags |= LKM_VALBLK; | 2936 | dlm_flags |= DLM_LKF_VALBLK; |
| 2842 | 2937 | ||
| 2843 | status = dlmlock(osb->dlm, | 2938 | ret = ocfs2_dlm_lock(osb->cconn, |
| 2844 | new_level, | 2939 | new_level, |
| 2845 | &lockres->l_lksb, | 2940 | &lockres->l_lksb, |
| 2846 | dlm_flags, | 2941 | dlm_flags, |
| 2847 | lockres->l_name, | 2942 | lockres->l_name, |
| 2848 | OCFS2_LOCK_ID_MAX_LEN - 1, | 2943 | OCFS2_LOCK_ID_MAX_LEN - 1, |
| 2849 | ocfs2_locking_ast, | 2944 | lockres); |
| 2850 | lockres, | 2945 | lockres_clear_pending(lockres, generation, osb); |
| 2851 | ocfs2_blocking_ast); | 2946 | if (ret) { |
| 2852 | if (status != DLM_NORMAL) { | 2947 | ocfs2_log_dlm_error("ocfs2_dlm_lock", ret, lockres); |
| 2853 | ocfs2_log_dlm_error("dlmlock", status, lockres); | ||
| 2854 | ret = -EINVAL; | ||
| 2855 | ocfs2_recover_from_dlm_error(lockres, 1); | 2948 | ocfs2_recover_from_dlm_error(lockres, 1); |
| 2856 | goto bail; | 2949 | goto bail; |
| 2857 | } | 2950 | } |
| @@ -2862,7 +2955,7 @@ bail: | |||
| 2862 | return ret; | 2955 | return ret; |
| 2863 | } | 2956 | } |
| 2864 | 2957 | ||
| 2865 | /* returns 1 when the caller should unlock and call dlmunlock */ | 2958 | /* returns 1 when the caller should unlock and call ocfs2_dlm_unlock */ |
| 2866 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | 2959 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, |
| 2867 | struct ocfs2_lock_res *lockres) | 2960 | struct ocfs2_lock_res *lockres) |
| 2868 | { | 2961 | { |
| @@ -2898,24 +2991,18 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb, | |||
| 2898 | struct ocfs2_lock_res *lockres) | 2991 | struct ocfs2_lock_res *lockres) |
| 2899 | { | 2992 | { |
| 2900 | int ret; | 2993 | int ret; |
| 2901 | enum dlm_status status; | ||
| 2902 | 2994 | ||
| 2903 | mlog_entry_void(); | 2995 | mlog_entry_void(); |
| 2904 | mlog(0, "lock %s\n", lockres->l_name); | 2996 | mlog(0, "lock %s\n", lockres->l_name); |
| 2905 | 2997 | ||
| 2906 | ret = 0; | 2998 | ret = ocfs2_dlm_unlock(osb->cconn, &lockres->l_lksb, |
| 2907 | status = dlmunlock(osb->dlm, | 2999 | DLM_LKF_CANCEL, lockres); |
| 2908 | &lockres->l_lksb, | 3000 | if (ret) { |
| 2909 | LKM_CANCEL, | 3001 | ocfs2_log_dlm_error("ocfs2_dlm_unlock", ret, lockres); |
| 2910 | ocfs2_unlock_ast, | ||
| 2911 | lockres); | ||
| 2912 | if (status != DLM_NORMAL) { | ||
| 2913 | ocfs2_log_dlm_error("dlmunlock", status, lockres); | ||
| 2914 | ret = -EINVAL; | ||
| 2915 | ocfs2_recover_from_dlm_error(lockres, 0); | 3002 | ocfs2_recover_from_dlm_error(lockres, 0); |
| 2916 | } | 3003 | } |
| 2917 | 3004 | ||
| 2918 | mlog(0, "lock %s return from dlmunlock\n", lockres->l_name); | 3005 | mlog(0, "lock %s return from ocfs2_dlm_unlock\n", lockres->l_name); |
| 2919 | 3006 | ||
| 2920 | mlog_exit(ret); | 3007 | mlog_exit(ret); |
| 2921 | return ret; | 3008 | return ret; |
| @@ -2930,6 +3017,7 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb, | |||
| 2930 | int new_level; | 3017 | int new_level; |
| 2931 | int ret = 0; | 3018 | int ret = 0; |
| 2932 | int set_lvb = 0; | 3019 | int set_lvb = 0; |
| 3020 | unsigned int gen; | ||
| 2933 | 3021 | ||
| 2934 | mlog_entry_void(); | 3022 | mlog_entry_void(); |
| 2935 | 3023 | ||
| @@ -2939,6 +3027,32 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb, | |||
| 2939 | 3027 | ||
| 2940 | recheck: | 3028 | recheck: |
| 2941 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | 3029 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { |
| 3030 | /* XXX | ||
| 3031 | * This is a *big* race. The OCFS2_LOCK_PENDING flag | ||
| 3032 | * exists entirely for one reason - another thread has set | ||
| 3033 | * OCFS2_LOCK_BUSY, but has *NOT* yet called dlm_lock(). | ||
| 3034 | * | ||
| 3035 | * If we do ocfs2_cancel_convert() before the other thread | ||
| 3036 | * calls dlm_lock(), our cancel will do nothing. We will | ||
| 3037 | * get no ast, and we will have no way of knowing the | ||
| 3038 | * cancel failed. Meanwhile, the other thread will call | ||
| 3039 | * into dlm_lock() and wait...forever. | ||
| 3040 | * | ||
| 3041 | * Why forever? Because another node has asked for the | ||
| 3042 | * lock first; that's why we're here in unblock_lock(). | ||
| 3043 | * | ||
| 3044 | * The solution is OCFS2_LOCK_PENDING. When PENDING is | ||
| 3045 | * set, we just requeue the unblock. Only when the other | ||
| 3046 | * thread has called dlm_lock() and cleared PENDING will | ||
| 3047 | * we then cancel their request. | ||
| 3048 | * | ||
| 3049 | * All callers of dlm_lock() must set OCFS2_DLM_PENDING | ||
| 3050 | * at the same time they set OCFS2_DLM_BUSY. They must | ||
| 3051 | * clear OCFS2_DLM_PENDING after dlm_lock() returns. | ||
| 3052 | */ | ||
| 3053 | if (lockres->l_flags & OCFS2_LOCK_PENDING) | ||
| 3054 | goto leave_requeue; | ||
| 3055 | |||
| 2942 | ctl->requeue = 1; | 3056 | ctl->requeue = 1; |
| 2943 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | 3057 | ret = ocfs2_prepare_cancel_convert(osb, lockres); |
| 2944 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3058 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| @@ -2952,13 +3066,13 @@ recheck: | |||
| 2952 | 3066 | ||
| 2953 | /* if we're blocking an exclusive and we have *any* holders, | 3067 | /* if we're blocking an exclusive and we have *any* holders, |
| 2954 | * then requeue. */ | 3068 | * then requeue. */ |
| 2955 | if ((lockres->l_blocking == LKM_EXMODE) | 3069 | if ((lockres->l_blocking == DLM_LOCK_EX) |
| 2956 | && (lockres->l_ex_holders || lockres->l_ro_holders)) | 3070 | && (lockres->l_ex_holders || lockres->l_ro_holders)) |
| 2957 | goto leave_requeue; | 3071 | goto leave_requeue; |
| 2958 | 3072 | ||
| 2959 | /* If it's a PR we're blocking, then only | 3073 | /* If it's a PR we're blocking, then only |
| 2960 | * requeue if we've got any EX holders */ | 3074 | * requeue if we've got any EX holders */ |
| 2961 | if (lockres->l_blocking == LKM_PRMODE && | 3075 | if (lockres->l_blocking == DLM_LOCK_PR && |
| 2962 | lockres->l_ex_holders) | 3076 | lockres->l_ex_holders) |
| 2963 | goto leave_requeue; | 3077 | goto leave_requeue; |
| 2964 | 3078 | ||
| @@ -3005,7 +3119,7 @@ downconvert: | |||
| 3005 | ctl->requeue = 0; | 3119 | ctl->requeue = 0; |
| 3006 | 3120 | ||
| 3007 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { | 3121 | if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) { |
| 3008 | if (lockres->l_level == LKM_EXMODE) | 3122 | if (lockres->l_level == DLM_LOCK_EX) |
| 3009 | set_lvb = 1; | 3123 | set_lvb = 1; |
| 3010 | 3124 | ||
| 3011 | /* | 3125 | /* |
| @@ -3018,9 +3132,11 @@ downconvert: | |||
| 3018 | lockres->l_ops->set_lvb(lockres); | 3132 | lockres->l_ops->set_lvb(lockres); |
| 3019 | } | 3133 | } |
| 3020 | 3134 | ||
| 3021 | ocfs2_prepare_downconvert(lockres, new_level); | 3135 | gen = ocfs2_prepare_downconvert(lockres, new_level); |
| 3022 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3136 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
| 3023 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb); | 3137 | ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb, |
| 3138 | gen); | ||
| 3139 | |||
| 3024 | leave: | 3140 | leave: |
| 3025 | mlog_exit(ret); | 3141 | mlog_exit(ret); |
| 3026 | return ret; | 3142 | return ret; |
| @@ -3059,7 +3175,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
| 3059 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 3175 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
| 3060 | } | 3176 | } |
| 3061 | sync_mapping_buffers(mapping); | 3177 | sync_mapping_buffers(mapping); |
| 3062 | if (blocking == LKM_EXMODE) { | 3178 | if (blocking == DLM_LOCK_EX) { |
| 3063 | truncate_inode_pages(mapping, 0); | 3179 | truncate_inode_pages(mapping, 0); |
| 3064 | } else { | 3180 | } else { |
| 3065 | /* We only need to wait on the I/O if we're not also | 3181 | /* We only need to wait on the I/O if we're not also |
| @@ -3080,8 +3196,8 @@ static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, | |||
| 3080 | struct inode *inode = ocfs2_lock_res_inode(lockres); | 3196 | struct inode *inode = ocfs2_lock_res_inode(lockres); |
| 3081 | int checkpointed = ocfs2_inode_fully_checkpointed(inode); | 3197 | int checkpointed = ocfs2_inode_fully_checkpointed(inode); |
| 3082 | 3198 | ||
| 3083 | BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE); | 3199 | BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); |
| 3084 | BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed); | 3200 | BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); |
| 3085 | 3201 | ||
| 3086 | if (checkpointed) | 3202 | if (checkpointed) |
| 3087 | return 1; | 3203 | return 1; |
| @@ -3145,7 +3261,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | |||
| 3145 | * valid. The downconvert code will retain a PR for this node, | 3261 | * valid. The downconvert code will retain a PR for this node, |
| 3146 | * so there's no further work to do. | 3262 | * so there's no further work to do. |
| 3147 | */ | 3263 | */ |
| 3148 | if (blocking == LKM_PRMODE) | 3264 | if (blocking == DLM_LOCK_PR) |
| 3149 | return UNBLOCK_CONTINUE; | 3265 | return UNBLOCK_CONTINUE; |
| 3150 | 3266 | ||
| 3151 | /* | 3267 | /* |
| @@ -3219,6 +3335,45 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, | |||
| 3219 | return UNBLOCK_CONTINUE_POST; | 3335 | return UNBLOCK_CONTINUE_POST; |
| 3220 | } | 3336 | } |
| 3221 | 3337 | ||
| 3338 | /* | ||
| 3339 | * This is the filesystem locking protocol. It provides the lock handling | ||
| 3340 | * hooks for the underlying DLM. It has a maximum version number. | ||
| 3341 | * The version number allows interoperability with systems running at | ||
| 3342 | * the same major number and an equal or smaller minor number. | ||
| 3343 | * | ||
| 3344 | * Whenever the filesystem does new things with locks (adds or removes a | ||
| 3345 | * lock, orders them differently, does different things underneath a lock), | ||
| 3346 | * the version must be changed. The protocol is negotiated when joining | ||
| 3347 | * the dlm domain. A node may join the domain if its major version is | ||
| 3348 | * identical to all other nodes and its minor version is greater than | ||
| 3349 | * or equal to all other nodes. When its minor version is greater than | ||
| 3350 | * the other nodes, it will run at the minor version specified by the | ||
| 3351 | * other nodes. | ||
| 3352 | * | ||
| 3353 | * If a locking change is made that will not be compatible with older | ||
| 3354 | * versions, the major number must be increased and the minor version set | ||
| 3355 | * to zero. If a change merely adds a behavior that can be disabled when | ||
| 3356 | * speaking to older versions, the minor version must be increased. If a | ||
| 3357 | * change adds a fully backwards compatible change (eg, LVB changes that | ||
| 3358 | * are just ignored by older versions), the version does not need to be | ||
| 3359 | * updated. | ||
| 3360 | */ | ||
| 3361 | static struct ocfs2_locking_protocol lproto = { | ||
| 3362 | .lp_max_version = { | ||
| 3363 | .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, | ||
| 3364 | .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, | ||
| 3365 | }, | ||
| 3366 | .lp_lock_ast = ocfs2_locking_ast, | ||
| 3367 | .lp_blocking_ast = ocfs2_blocking_ast, | ||
| 3368 | .lp_unlock_ast = ocfs2_unlock_ast, | ||
| 3369 | }; | ||
| 3370 | |||
| 3371 | void ocfs2_set_locking_protocol(void) | ||
| 3372 | { | ||
| 3373 | ocfs2_stack_glue_set_locking_protocol(&lproto); | ||
| 3374 | } | ||
| 3375 | |||
| 3376 | |||
| 3222 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 3377 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, |
| 3223 | struct ocfs2_lock_res *lockres) | 3378 | struct ocfs2_lock_res *lockres) |
| 3224 | { | 3379 | { |
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index e3cf902404b4..2bb01f09c1b1 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
| @@ -58,7 +58,7 @@ struct ocfs2_meta_lvb { | |||
| 58 | #define OCFS2_LOCK_NONBLOCK (0x04) | 58 | #define OCFS2_LOCK_NONBLOCK (0x04) |
| 59 | 59 | ||
| 60 | int ocfs2_dlm_init(struct ocfs2_super *osb); | 60 | int ocfs2_dlm_init(struct ocfs2_super *osb); |
| 61 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb); | 61 | void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending); |
| 62 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); | 62 | void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); |
| 63 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | 63 | void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, |
| 64 | enum ocfs2_lock_type type, | 64 | enum ocfs2_lock_type type, |
| @@ -114,5 +114,6 @@ void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb); | |||
| 114 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); | 114 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); |
| 115 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); | 115 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); |
| 116 | 116 | ||
| 117 | extern const struct dlm_protocol_version ocfs2_locking_protocol; | 117 | /* To set the locking protocol on module initialization */ |
| 118 | void ocfs2_set_locking_protocol(void); | ||
| 118 | #endif /* DLMGLUE_H */ | 119 | #endif /* DLMGLUE_H */ |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ed5d5232e85d..9154c82d3258 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -2242,7 +2242,7 @@ const struct file_operations ocfs2_fops = { | |||
| 2242 | .open = ocfs2_file_open, | 2242 | .open = ocfs2_file_open, |
| 2243 | .aio_read = ocfs2_file_aio_read, | 2243 | .aio_read = ocfs2_file_aio_read, |
| 2244 | .aio_write = ocfs2_file_aio_write, | 2244 | .aio_write = ocfs2_file_aio_write, |
| 2245 | .ioctl = ocfs2_ioctl, | 2245 | .unlocked_ioctl = ocfs2_ioctl, |
| 2246 | #ifdef CONFIG_COMPAT | 2246 | #ifdef CONFIG_COMPAT |
| 2247 | .compat_ioctl = ocfs2_compat_ioctl, | 2247 | .compat_ioctl = ocfs2_compat_ioctl, |
| 2248 | #endif | 2248 | #endif |
| @@ -2258,7 +2258,7 @@ const struct file_operations ocfs2_dops = { | |||
| 2258 | .fsync = ocfs2_sync_file, | 2258 | .fsync = ocfs2_sync_file, |
| 2259 | .release = ocfs2_dir_release, | 2259 | .release = ocfs2_dir_release, |
| 2260 | .open = ocfs2_dir_open, | 2260 | .open = ocfs2_dir_open, |
| 2261 | .ioctl = ocfs2_ioctl, | 2261 | .unlocked_ioctl = ocfs2_ioctl, |
| 2262 | #ifdef CONFIG_COMPAT | 2262 | #ifdef CONFIG_COMPAT |
| 2263 | .compat_ioctl = ocfs2_compat_ioctl, | 2263 | .compat_ioctl = ocfs2_compat_ioctl, |
| 2264 | #endif | 2264 | #endif |
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 0758daf64da0..c6e7213db868 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c | |||
| @@ -28,9 +28,6 @@ | |||
| 28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
| 29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
| 30 | #include <linux/highmem.h> | 30 | #include <linux/highmem.h> |
| 31 | #include <linux/kmod.h> | ||
| 32 | |||
| 33 | #include <dlm/dlmapi.h> | ||
| 34 | 31 | ||
| 35 | #define MLOG_MASK_PREFIX ML_SUPER | 32 | #define MLOG_MASK_PREFIX ML_SUPER |
| 36 | #include <cluster/masklog.h> | 33 | #include <cluster/masklog.h> |
| @@ -48,7 +45,6 @@ static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, | |||
| 48 | int bit); | 45 | int bit); |
| 49 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, | 46 | static inline void __ocfs2_node_map_clear_bit(struct ocfs2_node_map *map, |
| 50 | int bit); | 47 | int bit); |
| 51 | static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map); | ||
| 52 | 48 | ||
| 53 | /* special case -1 for now | 49 | /* special case -1 for now |
| 54 | * TODO: should *really* make sure the calling func never passes -1!! */ | 50 | * TODO: should *really* make sure the calling func never passes -1!! */ |
| @@ -62,23 +58,23 @@ static void ocfs2_node_map_init(struct ocfs2_node_map *map) | |||
| 62 | void ocfs2_init_node_maps(struct ocfs2_super *osb) | 58 | void ocfs2_init_node_maps(struct ocfs2_super *osb) |
| 63 | { | 59 | { |
| 64 | spin_lock_init(&osb->node_map_lock); | 60 | spin_lock_init(&osb->node_map_lock); |
| 65 | ocfs2_node_map_init(&osb->recovery_map); | ||
| 66 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); | 61 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); |
| 67 | } | 62 | } |
| 68 | 63 | ||
| 69 | static void ocfs2_do_node_down(int node_num, | 64 | void ocfs2_do_node_down(int node_num, void *data) |
| 70 | struct ocfs2_super *osb) | ||
| 71 | { | 65 | { |
| 66 | struct ocfs2_super *osb = data; | ||
| 67 | |||
| 72 | BUG_ON(osb->node_num == node_num); | 68 | BUG_ON(osb->node_num == node_num); |
| 73 | 69 | ||
| 74 | mlog(0, "ocfs2: node down event for %d\n", node_num); | 70 | mlog(0, "ocfs2: node down event for %d\n", node_num); |
| 75 | 71 | ||
| 76 | if (!osb->dlm) { | 72 | if (!osb->cconn) { |
| 77 | /* | 73 | /* |
| 78 | * No DLM means we're not even ready to participate yet. | 74 | * No cluster connection means we're not even ready to |
| 79 | * We check the slots after the DLM comes up, so we will | 75 | * participate yet. We check the slots after the cluster |
| 80 | * notice the node death then. We can safely ignore it | 76 | * comes up, so we will notice the node death then. We |
| 81 | * here. | 77 | * can safely ignore it here. |
| 82 | */ | 78 | */ |
| 83 | return; | 79 | return; |
| 84 | } | 80 | } |
| @@ -86,61 +82,6 @@ static void ocfs2_do_node_down(int node_num, | |||
| 86 | ocfs2_recovery_thread(osb, node_num); | 82 | ocfs2_recovery_thread(osb, node_num); |
| 87 | } | 83 | } |
| 88 | 84 | ||
| 89 | /* Called from the dlm when it's about to evict a node. We may also | ||
| 90 | * get a heartbeat callback later. */ | ||
| 91 | static void ocfs2_dlm_eviction_cb(int node_num, | ||
| 92 | void *data) | ||
| 93 | { | ||
| 94 | struct ocfs2_super *osb = (struct ocfs2_super *) data; | ||
| 95 | struct super_block *sb = osb->sb; | ||
| 96 | |||
| 97 | mlog(ML_NOTICE, "device (%u,%u): dlm has evicted node %d\n", | ||
| 98 | MAJOR(sb->s_dev), MINOR(sb->s_dev), node_num); | ||
| 99 | |||
| 100 | ocfs2_do_node_down(node_num, osb); | ||
| 101 | } | ||
| 102 | |||
| 103 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb) | ||
| 104 | { | ||
| 105 | /* Not exactly a heartbeat callback, but leads to essentially | ||
| 106 | * the same path so we set it up here. */ | ||
| 107 | dlm_setup_eviction_cb(&osb->osb_eviction_cb, | ||
| 108 | ocfs2_dlm_eviction_cb, | ||
| 109 | osb); | ||
| 110 | } | ||
| 111 | |||
| 112 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb) | ||
| 113 | { | ||
| 114 | int ret; | ||
| 115 | char *argv[5], *envp[3]; | ||
| 116 | |||
| 117 | if (ocfs2_mount_local(osb)) | ||
| 118 | return; | ||
| 119 | |||
| 120 | if (!osb->uuid_str) { | ||
| 121 | /* This can happen if we don't get far enough in mount... */ | ||
| 122 | mlog(0, "No UUID with which to stop heartbeat!\n\n"); | ||
| 123 | return; | ||
| 124 | } | ||
| 125 | |||
| 126 | argv[0] = (char *)o2nm_get_hb_ctl_path(); | ||
| 127 | argv[1] = "-K"; | ||
| 128 | argv[2] = "-u"; | ||
| 129 | argv[3] = osb->uuid_str; | ||
| 130 | argv[4] = NULL; | ||
| 131 | |||
| 132 | mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]); | ||
| 133 | |||
| 134 | /* minimal command environment taken from cpu_run_sbin_hotplug */ | ||
| 135 | envp[0] = "HOME=/"; | ||
| 136 | envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; | ||
| 137 | envp[2] = NULL; | ||
| 138 | |||
| 139 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | ||
| 140 | if (ret < 0) | ||
| 141 | mlog_errno(ret); | ||
| 142 | } | ||
| 143 | |||
| 144 | static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, | 85 | static inline void __ocfs2_node_map_set_bit(struct ocfs2_node_map *map, |
| 145 | int bit) | 86 | int bit) |
| 146 | { | 87 | { |
| @@ -192,112 +133,3 @@ int ocfs2_node_map_test_bit(struct ocfs2_super *osb, | |||
| 192 | return ret; | 133 | return ret; |
| 193 | } | 134 | } |
| 194 | 135 | ||
| 195 | static inline int __ocfs2_node_map_is_empty(struct ocfs2_node_map *map) | ||
| 196 | { | ||
| 197 | int bit; | ||
| 198 | bit = find_next_bit(map->map, map->num_nodes, 0); | ||
| 199 | if (bit < map->num_nodes) | ||
| 200 | return 0; | ||
| 201 | return 1; | ||
| 202 | } | ||
| 203 | |||
| 204 | int ocfs2_node_map_is_empty(struct ocfs2_super *osb, | ||
| 205 | struct ocfs2_node_map *map) | ||
| 206 | { | ||
| 207 | int ret; | ||
| 208 | BUG_ON(map->num_nodes == 0); | ||
| 209 | spin_lock(&osb->node_map_lock); | ||
| 210 | ret = __ocfs2_node_map_is_empty(map); | ||
| 211 | spin_unlock(&osb->node_map_lock); | ||
| 212 | return ret; | ||
| 213 | } | ||
| 214 | |||
| 215 | #if 0 | ||
| 216 | |||
| 217 | static void __ocfs2_node_map_dup(struct ocfs2_node_map *target, | ||
| 218 | struct ocfs2_node_map *from) | ||
| 219 | { | ||
| 220 | BUG_ON(from->num_nodes == 0); | ||
| 221 | ocfs2_node_map_init(target); | ||
| 222 | __ocfs2_node_map_set(target, from); | ||
| 223 | } | ||
| 224 | |||
| 225 | /* returns 1 if bit is the only bit set in target, 0 otherwise */ | ||
| 226 | int ocfs2_node_map_is_only(struct ocfs2_super *osb, | ||
| 227 | struct ocfs2_node_map *target, | ||
| 228 | int bit) | ||
| 229 | { | ||
| 230 | struct ocfs2_node_map temp; | ||
| 231 | int ret; | ||
| 232 | |||
| 233 | spin_lock(&osb->node_map_lock); | ||
| 234 | __ocfs2_node_map_dup(&temp, target); | ||
| 235 | __ocfs2_node_map_clear_bit(&temp, bit); | ||
| 236 | ret = __ocfs2_node_map_is_empty(&temp); | ||
| 237 | spin_unlock(&osb->node_map_lock); | ||
| 238 | |||
| 239 | return ret; | ||
| 240 | } | ||
| 241 | |||
| 242 | static void __ocfs2_node_map_set(struct ocfs2_node_map *target, | ||
| 243 | struct ocfs2_node_map *from) | ||
| 244 | { | ||
| 245 | int num_longs, i; | ||
| 246 | |||
| 247 | BUG_ON(target->num_nodes != from->num_nodes); | ||
| 248 | BUG_ON(target->num_nodes == 0); | ||
| 249 | |||
| 250 | num_longs = BITS_TO_LONGS(target->num_nodes); | ||
| 251 | for (i = 0; i < num_longs; i++) | ||
| 252 | target->map[i] = from->map[i]; | ||
| 253 | } | ||
| 254 | |||
| 255 | #endif /* 0 */ | ||
| 256 | |||
| 257 | /* Returns whether the recovery bit was actually set - it may not be | ||
| 258 | * if a node is still marked as needing recovery */ | ||
| 259 | int ocfs2_recovery_map_set(struct ocfs2_super *osb, | ||
| 260 | int num) | ||
| 261 | { | ||
| 262 | int set = 0; | ||
| 263 | |||
| 264 | spin_lock(&osb->node_map_lock); | ||
| 265 | |||
| 266 | if (!test_bit(num, osb->recovery_map.map)) { | ||
| 267 | __ocfs2_node_map_set_bit(&osb->recovery_map, num); | ||
| 268 | set = 1; | ||
| 269 | } | ||
| 270 | |||
| 271 | spin_unlock(&osb->node_map_lock); | ||
| 272 | |||
| 273 | return set; | ||
| 274 | } | ||
| 275 | |||
| 276 | void ocfs2_recovery_map_clear(struct ocfs2_super *osb, | ||
| 277 | int num) | ||
| 278 | { | ||
| 279 | ocfs2_node_map_clear_bit(osb, &osb->recovery_map, num); | ||
| 280 | } | ||
| 281 | |||
| 282 | int ocfs2_node_map_iterate(struct ocfs2_super *osb, | ||
| 283 | struct ocfs2_node_map *map, | ||
| 284 | int idx) | ||
| 285 | { | ||
| 286 | int i = idx; | ||
| 287 | |||
| 288 | idx = O2NM_INVALID_NODE_NUM; | ||
| 289 | spin_lock(&osb->node_map_lock); | ||
| 290 | if ((i != O2NM_INVALID_NODE_NUM) && | ||
| 291 | (i >= 0) && | ||
| 292 | (i < map->num_nodes)) { | ||
| 293 | while(i < map->num_nodes) { | ||
| 294 | if (test_bit(i, map->map)) { | ||
| 295 | idx = i; | ||
| 296 | break; | ||
| 297 | } | ||
| 298 | i++; | ||
| 299 | } | ||
| 300 | } | ||
| 301 | spin_unlock(&osb->node_map_lock); | ||
| 302 | return idx; | ||
| 303 | } | ||
diff --git a/fs/ocfs2/heartbeat.h b/fs/ocfs2/heartbeat.h index eac63aed7611..74b9c5dda28d 100644 --- a/fs/ocfs2/heartbeat.h +++ b/fs/ocfs2/heartbeat.h | |||
| @@ -28,13 +28,10 @@ | |||
| 28 | 28 | ||
| 29 | void ocfs2_init_node_maps(struct ocfs2_super *osb); | 29 | void ocfs2_init_node_maps(struct ocfs2_super *osb); |
| 30 | 30 | ||
| 31 | void ocfs2_setup_hb_callbacks(struct ocfs2_super *osb); | 31 | void ocfs2_do_node_down(int node_num, void *data); |
| 32 | void ocfs2_stop_heartbeat(struct ocfs2_super *osb); | ||
| 33 | 32 | ||
| 34 | /* node map functions - used to keep track of mounted and in-recovery | 33 | /* node map functions - used to keep track of mounted and in-recovery |
| 35 | * nodes. */ | 34 | * nodes. */ |
| 36 | int ocfs2_node_map_is_empty(struct ocfs2_super *osb, | ||
| 37 | struct ocfs2_node_map *map); | ||
| 38 | void ocfs2_node_map_set_bit(struct ocfs2_super *osb, | 35 | void ocfs2_node_map_set_bit(struct ocfs2_super *osb, |
| 39 | struct ocfs2_node_map *map, | 36 | struct ocfs2_node_map *map, |
| 40 | int bit); | 37 | int bit); |
| @@ -44,17 +41,5 @@ void ocfs2_node_map_clear_bit(struct ocfs2_super *osb, | |||
| 44 | int ocfs2_node_map_test_bit(struct ocfs2_super *osb, | 41 | int ocfs2_node_map_test_bit(struct ocfs2_super *osb, |
| 45 | struct ocfs2_node_map *map, | 42 | struct ocfs2_node_map *map, |
| 46 | int bit); | 43 | int bit); |
| 47 | int ocfs2_node_map_iterate(struct ocfs2_super *osb, | ||
| 48 | struct ocfs2_node_map *map, | ||
| 49 | int idx); | ||
| 50 | static inline int ocfs2_node_map_first_set_bit(struct ocfs2_super *osb, | ||
| 51 | struct ocfs2_node_map *map) | ||
| 52 | { | ||
| 53 | return ocfs2_node_map_iterate(osb, map, 0); | ||
| 54 | } | ||
| 55 | int ocfs2_recovery_map_set(struct ocfs2_super *osb, | ||
| 56 | int num); | ||
| 57 | void ocfs2_recovery_map_clear(struct ocfs2_super *osb, | ||
| 58 | int num); | ||
| 59 | 44 | ||
| 60 | #endif /* OCFS2_HEARTBEAT_H */ | 45 | #endif /* OCFS2_HEARTBEAT_H */ |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 5177fba5162b..b413166dd163 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | 7 | ||
| 8 | #include <linux/fs.h> | 8 | #include <linux/fs.h> |
| 9 | #include <linux/mount.h> | 9 | #include <linux/mount.h> |
| 10 | #include <linux/smp_lock.h> | ||
| 10 | 11 | ||
| 11 | #define MLOG_MASK_PREFIX ML_INODE | 12 | #define MLOG_MASK_PREFIX ML_INODE |
| 12 | #include <cluster/masklog.h> | 13 | #include <cluster/masklog.h> |
| @@ -112,9 +113,9 @@ bail: | |||
| 112 | return status; | 113 | return status; |
| 113 | } | 114 | } |
| 114 | 115 | ||
| 115 | int ocfs2_ioctl(struct inode * inode, struct file * filp, | 116 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
| 116 | unsigned int cmd, unsigned long arg) | ||
| 117 | { | 117 | { |
| 118 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
| 118 | unsigned int flags; | 119 | unsigned int flags; |
| 119 | int new_clusters; | 120 | int new_clusters; |
| 120 | int status; | 121 | int status; |
| @@ -168,9 +169,6 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp, | |||
| 168 | #ifdef CONFIG_COMPAT | 169 | #ifdef CONFIG_COMPAT |
| 169 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | 170 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
| 170 | { | 171 | { |
| 171 | struct inode *inode = file->f_path.dentry->d_inode; | ||
| 172 | int ret; | ||
| 173 | |||
| 174 | switch (cmd) { | 172 | switch (cmd) { |
| 175 | case OCFS2_IOC32_GETFLAGS: | 173 | case OCFS2_IOC32_GETFLAGS: |
| 176 | cmd = OCFS2_IOC_GETFLAGS; | 174 | cmd = OCFS2_IOC_GETFLAGS; |
| @@ -190,9 +188,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 190 | return -ENOIOCTLCMD; | 188 | return -ENOIOCTLCMD; |
| 191 | } | 189 | } |
| 192 | 190 | ||
| 193 | lock_kernel(); | 191 | return ocfs2_ioctl(file, cmd, arg); |
| 194 | ret = ocfs2_ioctl(inode, file, cmd, arg); | ||
| 195 | unlock_kernel(); | ||
| 196 | return ret; | ||
| 197 | } | 192 | } |
| 198 | #endif | 193 | #endif |
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h index 4d6c4f430d0d..cf9a5ee30fef 100644 --- a/fs/ocfs2/ioctl.h +++ b/fs/ocfs2/ioctl.h | |||
| @@ -10,8 +10,7 @@ | |||
| 10 | #ifndef OCFS2_IOCTL_H | 10 | #ifndef OCFS2_IOCTL_H |
| 11 | #define OCFS2_IOCTL_H | 11 | #define OCFS2_IOCTL_H |
| 12 | 12 | ||
| 13 | int ocfs2_ioctl(struct inode * inode, struct file * filp, | 13 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); |
| 14 | unsigned int cmd, unsigned long arg); | ||
| 15 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg); | 14 | long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg); |
| 16 | 15 | ||
| 17 | #endif /* OCFS2_IOCTL_H */ | 16 | #endif /* OCFS2_IOCTL_H */ |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index f31c7e8c19c3..9698338adc39 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -64,6 +64,137 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 64 | int slot); | 64 | int slot); |
| 65 | static int ocfs2_commit_thread(void *arg); | 65 | static int ocfs2_commit_thread(void *arg); |
| 66 | 66 | ||
| 67 | |||
| 68 | /* | ||
| 69 | * The recovery_list is a simple linked list of node numbers to recover. | ||
| 70 | * It is protected by the recovery_lock. | ||
| 71 | */ | ||
| 72 | |||
| 73 | struct ocfs2_recovery_map { | ||
| 74 | unsigned int rm_used; | ||
| 75 | unsigned int *rm_entries; | ||
| 76 | }; | ||
| 77 | |||
| 78 | int ocfs2_recovery_init(struct ocfs2_super *osb) | ||
| 79 | { | ||
| 80 | struct ocfs2_recovery_map *rm; | ||
| 81 | |||
| 82 | mutex_init(&osb->recovery_lock); | ||
| 83 | osb->disable_recovery = 0; | ||
| 84 | osb->recovery_thread_task = NULL; | ||
| 85 | init_waitqueue_head(&osb->recovery_event); | ||
| 86 | |||
| 87 | rm = kzalloc(sizeof(struct ocfs2_recovery_map) + | ||
| 88 | osb->max_slots * sizeof(unsigned int), | ||
| 89 | GFP_KERNEL); | ||
| 90 | if (!rm) { | ||
| 91 | mlog_errno(-ENOMEM); | ||
| 92 | return -ENOMEM; | ||
| 93 | } | ||
| 94 | |||
| 95 | rm->rm_entries = (unsigned int *)((char *)rm + | ||
| 96 | sizeof(struct ocfs2_recovery_map)); | ||
| 97 | osb->recovery_map = rm; | ||
| 98 | |||
| 99 | return 0; | ||
| 100 | } | ||
| 101 | |||
| 102 | /* we can't grab the goofy sem lock from inside wait_event, so we use | ||
| 103 | * memory barriers to make sure that we'll see the null task before | ||
| 104 | * being woken up */ | ||
| 105 | static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) | ||
| 106 | { | ||
| 107 | mb(); | ||
| 108 | return osb->recovery_thread_task != NULL; | ||
| 109 | } | ||
| 110 | |||
| 111 | void ocfs2_recovery_exit(struct ocfs2_super *osb) | ||
| 112 | { | ||
| 113 | struct ocfs2_recovery_map *rm; | ||
| 114 | |||
| 115 | /* disable any new recovery threads and wait for any currently | ||
| 116 | * running ones to exit. Do this before setting the vol_state. */ | ||
| 117 | mutex_lock(&osb->recovery_lock); | ||
| 118 | osb->disable_recovery = 1; | ||
| 119 | mutex_unlock(&osb->recovery_lock); | ||
| 120 | wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); | ||
| 121 | |||
| 122 | /* At this point, we know that no more recovery threads can be | ||
| 123 | * launched, so wait for any recovery completion work to | ||
| 124 | * complete. */ | ||
| 125 | flush_workqueue(ocfs2_wq); | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Now that recovery is shut down, and the osb is about to be | ||
| 129 | * freed, the osb_lock is not taken here. | ||
| 130 | */ | ||
| 131 | rm = osb->recovery_map; | ||
| 132 | /* XXX: Should we bug if there are dirty entries? */ | ||
| 133 | |||
| 134 | kfree(rm); | ||
| 135 | } | ||
| 136 | |||
| 137 | static int __ocfs2_recovery_map_test(struct ocfs2_super *osb, | ||
| 138 | unsigned int node_num) | ||
| 139 | { | ||
| 140 | int i; | ||
| 141 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 142 | |||
| 143 | assert_spin_locked(&osb->osb_lock); | ||
| 144 | |||
| 145 | for (i = 0; i < rm->rm_used; i++) { | ||
| 146 | if (rm->rm_entries[i] == node_num) | ||
| 147 | return 1; | ||
| 148 | } | ||
| 149 | |||
| 150 | return 0; | ||
| 151 | } | ||
| 152 | |||
| 153 | /* Behaves like test-and-set. Returns the previous value */ | ||
| 154 | static int ocfs2_recovery_map_set(struct ocfs2_super *osb, | ||
| 155 | unsigned int node_num) | ||
| 156 | { | ||
| 157 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 158 | |||
| 159 | spin_lock(&osb->osb_lock); | ||
| 160 | if (__ocfs2_recovery_map_test(osb, node_num)) { | ||
| 161 | spin_unlock(&osb->osb_lock); | ||
| 162 | return 1; | ||
| 163 | } | ||
| 164 | |||
| 165 | /* XXX: Can this be exploited? Not from o2dlm... */ | ||
| 166 | BUG_ON(rm->rm_used >= osb->max_slots); | ||
| 167 | |||
| 168 | rm->rm_entries[rm->rm_used] = node_num; | ||
| 169 | rm->rm_used++; | ||
| 170 | spin_unlock(&osb->osb_lock); | ||
| 171 | |||
| 172 | return 0; | ||
| 173 | } | ||
| 174 | |||
| 175 | static void ocfs2_recovery_map_clear(struct ocfs2_super *osb, | ||
| 176 | unsigned int node_num) | ||
| 177 | { | ||
| 178 | int i; | ||
| 179 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 180 | |||
| 181 | spin_lock(&osb->osb_lock); | ||
| 182 | |||
| 183 | for (i = 0; i < rm->rm_used; i++) { | ||
| 184 | if (rm->rm_entries[i] == node_num) | ||
| 185 | break; | ||
| 186 | } | ||
| 187 | |||
| 188 | if (i < rm->rm_used) { | ||
| 189 | /* XXX: be careful with the pointer math */ | ||
| 190 | memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]), | ||
| 191 | (rm->rm_used - i - 1) * sizeof(unsigned int)); | ||
| 192 | rm->rm_used--; | ||
| 193 | } | ||
| 194 | |||
| 195 | spin_unlock(&osb->osb_lock); | ||
| 196 | } | ||
| 197 | |||
| 67 | static int ocfs2_commit_cache(struct ocfs2_super *osb) | 198 | static int ocfs2_commit_cache(struct ocfs2_super *osb) |
| 68 | { | 199 | { |
| 69 | int status = 0; | 200 | int status = 0; |
| @@ -586,8 +717,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local) | |||
| 586 | 717 | ||
| 587 | mlog_entry_void(); | 718 | mlog_entry_void(); |
| 588 | 719 | ||
| 589 | if (!journal) | 720 | BUG_ON(!journal); |
| 590 | BUG(); | ||
| 591 | 721 | ||
| 592 | osb = journal->j_osb; | 722 | osb = journal->j_osb; |
| 593 | 723 | ||
| @@ -650,6 +780,23 @@ bail: | |||
| 650 | return status; | 780 | return status; |
| 651 | } | 781 | } |
| 652 | 782 | ||
| 783 | static int ocfs2_recovery_completed(struct ocfs2_super *osb) | ||
| 784 | { | ||
| 785 | int empty; | ||
| 786 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 787 | |||
| 788 | spin_lock(&osb->osb_lock); | ||
| 789 | empty = (rm->rm_used == 0); | ||
| 790 | spin_unlock(&osb->osb_lock); | ||
| 791 | |||
| 792 | return empty; | ||
| 793 | } | ||
| 794 | |||
| 795 | void ocfs2_wait_for_recovery(struct ocfs2_super *osb) | ||
| 796 | { | ||
| 797 | wait_event(osb->recovery_event, ocfs2_recovery_completed(osb)); | ||
| 798 | } | ||
| 799 | |||
| 653 | /* | 800 | /* |
| 654 | * JBD Might read a cached version of another nodes journal file. We | 801 | * JBD Might read a cached version of another nodes journal file. We |
| 655 | * don't want this as this file changes often and we get no | 802 | * don't want this as this file changes often and we get no |
| @@ -848,6 +995,7 @@ static int __ocfs2_recovery_thread(void *arg) | |||
| 848 | { | 995 | { |
| 849 | int status, node_num; | 996 | int status, node_num; |
| 850 | struct ocfs2_super *osb = arg; | 997 | struct ocfs2_super *osb = arg; |
| 998 | struct ocfs2_recovery_map *rm = osb->recovery_map; | ||
| 851 | 999 | ||
| 852 | mlog_entry_void(); | 1000 | mlog_entry_void(); |
| 853 | 1001 | ||
| @@ -863,26 +1011,29 @@ restart: | |||
| 863 | goto bail; | 1011 | goto bail; |
| 864 | } | 1012 | } |
| 865 | 1013 | ||
| 866 | while(!ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { | 1014 | spin_lock(&osb->osb_lock); |
| 867 | node_num = ocfs2_node_map_first_set_bit(osb, | 1015 | while (rm->rm_used) { |
| 868 | &osb->recovery_map); | 1016 | /* It's always safe to remove entry zero, as we won't |
| 869 | if (node_num == O2NM_INVALID_NODE_NUM) { | 1017 | * clear it until ocfs2_recover_node() has succeeded. */ |
| 870 | mlog(0, "Out of nodes to recover.\n"); | 1018 | node_num = rm->rm_entries[0]; |
| 871 | break; | 1019 | spin_unlock(&osb->osb_lock); |
| 872 | } | ||
| 873 | 1020 | ||
| 874 | status = ocfs2_recover_node(osb, node_num); | 1021 | status = ocfs2_recover_node(osb, node_num); |
| 875 | if (status < 0) { | 1022 | if (!status) { |
| 1023 | ocfs2_recovery_map_clear(osb, node_num); | ||
| 1024 | } else { | ||
| 876 | mlog(ML_ERROR, | 1025 | mlog(ML_ERROR, |
| 877 | "Error %d recovering node %d on device (%u,%u)!\n", | 1026 | "Error %d recovering node %d on device (%u,%u)!\n", |
| 878 | status, node_num, | 1027 | status, node_num, |
| 879 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 1028 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
| 880 | mlog(ML_ERROR, "Volume requires unmount.\n"); | 1029 | mlog(ML_ERROR, "Volume requires unmount.\n"); |
| 881 | continue; | ||
| 882 | } | 1030 | } |
| 883 | 1031 | ||
| 884 | ocfs2_recovery_map_clear(osb, node_num); | 1032 | spin_lock(&osb->osb_lock); |
| 885 | } | 1033 | } |
| 1034 | spin_unlock(&osb->osb_lock); | ||
| 1035 | mlog(0, "All nodes recovered\n"); | ||
| 1036 | |||
| 886 | ocfs2_super_unlock(osb, 1); | 1037 | ocfs2_super_unlock(osb, 1); |
| 887 | 1038 | ||
| 888 | /* We always run recovery on our own orphan dir - the dead | 1039 | /* We always run recovery on our own orphan dir - the dead |
| @@ -893,8 +1044,7 @@ restart: | |||
| 893 | 1044 | ||
| 894 | bail: | 1045 | bail: |
| 895 | mutex_lock(&osb->recovery_lock); | 1046 | mutex_lock(&osb->recovery_lock); |
| 896 | if (!status && | 1047 | if (!status && !ocfs2_recovery_completed(osb)) { |
| 897 | !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { | ||
| 898 | mutex_unlock(&osb->recovery_lock); | 1048 | mutex_unlock(&osb->recovery_lock); |
| 899 | goto restart; | 1049 | goto restart; |
| 900 | } | 1050 | } |
| @@ -924,8 +1074,8 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) | |||
| 924 | 1074 | ||
| 925 | /* People waiting on recovery will wait on | 1075 | /* People waiting on recovery will wait on |
| 926 | * the recovery map to empty. */ | 1076 | * the recovery map to empty. */ |
| 927 | if (!ocfs2_recovery_map_set(osb, node_num)) | 1077 | if (ocfs2_recovery_map_set(osb, node_num)) |
| 928 | mlog(0, "node %d already be in recovery.\n", node_num); | 1078 | mlog(0, "node %d already in recovery map.\n", node_num); |
| 929 | 1079 | ||
| 930 | mlog(0, "starting recovery thread...\n"); | 1080 | mlog(0, "starting recovery thread...\n"); |
| 931 | 1081 | ||
| @@ -1079,7 +1229,6 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, | |||
| 1079 | { | 1229 | { |
| 1080 | int status = 0; | 1230 | int status = 0; |
| 1081 | int slot_num; | 1231 | int slot_num; |
| 1082 | struct ocfs2_slot_info *si = osb->slot_info; | ||
| 1083 | struct ocfs2_dinode *la_copy = NULL; | 1232 | struct ocfs2_dinode *la_copy = NULL; |
| 1084 | struct ocfs2_dinode *tl_copy = NULL; | 1233 | struct ocfs2_dinode *tl_copy = NULL; |
| 1085 | 1234 | ||
| @@ -1092,8 +1241,8 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, | |||
| 1092 | * case we should've called ocfs2_journal_load instead. */ | 1241 | * case we should've called ocfs2_journal_load instead. */ |
| 1093 | BUG_ON(osb->node_num == node_num); | 1242 | BUG_ON(osb->node_num == node_num); |
| 1094 | 1243 | ||
| 1095 | slot_num = ocfs2_node_num_to_slot(si, node_num); | 1244 | slot_num = ocfs2_node_num_to_slot(osb, node_num); |
| 1096 | if (slot_num == OCFS2_INVALID_SLOT) { | 1245 | if (slot_num == -ENOENT) { |
| 1097 | status = 0; | 1246 | status = 0; |
| 1098 | mlog(0, "no slot for this node, so no recovery required.\n"); | 1247 | mlog(0, "no slot for this node, so no recovery required.\n"); |
| 1099 | goto done; | 1248 | goto done; |
| @@ -1123,8 +1272,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, | |||
| 1123 | 1272 | ||
| 1124 | /* Likewise, this would be a strange but ultimately not so | 1273 | /* Likewise, this would be a strange but ultimately not so |
| 1125 | * harmful place to get an error... */ | 1274 | * harmful place to get an error... */ |
| 1126 | ocfs2_clear_slot(si, slot_num); | 1275 | status = ocfs2_clear_slot(osb, slot_num); |
| 1127 | status = ocfs2_update_disk_slots(osb, si); | ||
| 1128 | if (status < 0) | 1276 | if (status < 0) |
| 1129 | mlog_errno(status); | 1277 | mlog_errno(status); |
| 1130 | 1278 | ||
| @@ -1184,23 +1332,24 @@ bail: | |||
| 1184 | * slot info struct has been updated from disk. */ | 1332 | * slot info struct has been updated from disk. */ |
| 1185 | int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | 1333 | int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) |
| 1186 | { | 1334 | { |
| 1187 | int status, i, node_num; | 1335 | unsigned int node_num; |
| 1188 | struct ocfs2_slot_info *si = osb->slot_info; | 1336 | int status, i; |
| 1189 | 1337 | ||
| 1190 | /* This is called with the super block cluster lock, so we | 1338 | /* This is called with the super block cluster lock, so we |
| 1191 | * know that the slot map can't change underneath us. */ | 1339 | * know that the slot map can't change underneath us. */ |
| 1192 | 1340 | ||
| 1193 | spin_lock(&si->si_lock); | 1341 | spin_lock(&osb->osb_lock); |
| 1194 | for(i = 0; i < si->si_num_slots; i++) { | 1342 | for (i = 0; i < osb->max_slots; i++) { |
| 1195 | if (i == osb->slot_num) | 1343 | if (i == osb->slot_num) |
| 1196 | continue; | 1344 | continue; |
| 1197 | if (ocfs2_is_empty_slot(si, i)) | 1345 | |
| 1346 | status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); | ||
| 1347 | if (status == -ENOENT) | ||
| 1198 | continue; | 1348 | continue; |
| 1199 | 1349 | ||
| 1200 | node_num = si->si_global_node_nums[i]; | 1350 | if (__ocfs2_recovery_map_test(osb, node_num)) |
| 1201 | if (ocfs2_node_map_test_bit(osb, &osb->recovery_map, node_num)) | ||
| 1202 | continue; | 1351 | continue; |
| 1203 | spin_unlock(&si->si_lock); | 1352 | spin_unlock(&osb->osb_lock); |
| 1204 | 1353 | ||
| 1205 | /* Ok, we have a slot occupied by another node which | 1354 | /* Ok, we have a slot occupied by another node which |
| 1206 | * is not in the recovery map. We trylock his journal | 1355 | * is not in the recovery map. We trylock his journal |
| @@ -1216,9 +1365,9 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
| 1216 | goto bail; | 1365 | goto bail; |
| 1217 | } | 1366 | } |
| 1218 | 1367 | ||
| 1219 | spin_lock(&si->si_lock); | 1368 | spin_lock(&osb->osb_lock); |
| 1220 | } | 1369 | } |
| 1221 | spin_unlock(&si->si_lock); | 1370 | spin_unlock(&osb->osb_lock); |
| 1222 | 1371 | ||
| 1223 | status = 0; | 1372 | status = 0; |
| 1224 | bail: | 1373 | bail: |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 220f3e818e78..db82be2532ed 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
| @@ -134,6 +134,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, | |||
| 134 | 134 | ||
| 135 | /* Exported only for the journal struct init code in super.c. Do not call. */ | 135 | /* Exported only for the journal struct init code in super.c. Do not call. */ |
| 136 | void ocfs2_complete_recovery(struct work_struct *work); | 136 | void ocfs2_complete_recovery(struct work_struct *work); |
| 137 | void ocfs2_wait_for_recovery(struct ocfs2_super *osb); | ||
| 138 | |||
| 139 | int ocfs2_recovery_init(struct ocfs2_super *osb); | ||
| 140 | void ocfs2_recovery_exit(struct ocfs2_super *osb); | ||
| 137 | 141 | ||
| 138 | /* | 142 | /* |
| 139 | * Journal Control: | 143 | * Journal Control: |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index ab83fd562429..ce0dc147602a 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
| @@ -447,6 +447,8 @@ out_mutex: | |||
| 447 | iput(main_bm_inode); | 447 | iput(main_bm_inode); |
| 448 | 448 | ||
| 449 | out: | 449 | out: |
| 450 | if (!status) | ||
| 451 | ocfs2_init_inode_steal_slot(osb); | ||
| 450 | mlog_exit(status); | 452 | mlog_exit(status); |
| 451 | return status; | 453 | return status; |
| 452 | } | 454 | } |
| @@ -523,6 +525,8 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
| 523 | } | 525 | } |
| 524 | 526 | ||
| 525 | ac->ac_inode = local_alloc_inode; | 527 | ac->ac_inode = local_alloc_inode; |
| 528 | /* We should never use localalloc from another slot */ | ||
| 529 | ac->ac_alloc_slot = osb->slot_num; | ||
| 526 | ac->ac_which = OCFS2_AC_USE_LOCAL; | 530 | ac->ac_which = OCFS2_AC_USE_LOCAL; |
| 527 | get_bh(osb->local_alloc_bh); | 531 | get_bh(osb->local_alloc_bh); |
| 528 | ac->ac_bh = osb->local_alloc_bh; | 532 | ac->ac_bh = osb->local_alloc_bh; |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index ae9ad9587516..d5d808fe0140 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -424,7 +424,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, | |||
| 424 | fe->i_fs_generation = cpu_to_le32(osb->fs_generation); | 424 | fe->i_fs_generation = cpu_to_le32(osb->fs_generation); |
| 425 | fe->i_blkno = cpu_to_le64(fe_blkno); | 425 | fe->i_blkno = cpu_to_le64(fe_blkno); |
| 426 | fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); | 426 | fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); |
| 427 | fe->i_suballoc_slot = cpu_to_le16(osb->slot_num); | 427 | fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); |
| 428 | fe->i_uid = cpu_to_le32(current->fsuid); | 428 | fe->i_uid = cpu_to_le32(current->fsuid); |
| 429 | if (dir->i_mode & S_ISGID) { | 429 | if (dir->i_mode & S_ISGID) { |
| 430 | fe->i_gid = cpu_to_le32(dir->i_gid); | 430 | fe->i_gid = cpu_to_le32(dir->i_gid); |
| @@ -997,7 +997,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
| 997 | * | 997 | * |
| 998 | * And that's why, just like the VFS, we need a file system | 998 | * And that's why, just like the VFS, we need a file system |
| 999 | * rename lock. */ | 999 | * rename lock. */ |
| 1000 | if (old_dentry != new_dentry) { | 1000 | if (old_dir != new_dir && S_ISDIR(old_inode->i_mode)) { |
| 1001 | status = ocfs2_rename_lock(osb); | 1001 | status = ocfs2_rename_lock(osb); |
| 1002 | if (status < 0) { | 1002 | if (status < 0) { |
| 1003 | mlog_errno(status); | 1003 | mlog_errno(status); |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 6546cef212e3..31692379c170 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
| @@ -36,11 +36,8 @@ | |||
| 36 | #include <linux/mutex.h> | 36 | #include <linux/mutex.h> |
| 37 | #include <linux/jbd.h> | 37 | #include <linux/jbd.h> |
| 38 | 38 | ||
| 39 | #include "cluster/nodemanager.h" | 39 | /* For union ocfs2_dlm_lksb */ |
| 40 | #include "cluster/heartbeat.h" | 40 | #include "stackglue.h" |
| 41 | #include "cluster/tcp.h" | ||
| 42 | |||
| 43 | #include "dlm/dlmapi.h" | ||
| 44 | 41 | ||
| 45 | #include "ocfs2_fs.h" | 42 | #include "ocfs2_fs.h" |
| 46 | #include "ocfs2_lockid.h" | 43 | #include "ocfs2_lockid.h" |
| @@ -101,6 +98,9 @@ enum ocfs2_unlock_action { | |||
| 101 | * dropped. */ | 98 | * dropped. */ |
| 102 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ | 99 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ |
| 103 | #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ | 100 | #define OCFS2_LOCK_NOCACHE (0x00000200) /* don't use a holder count */ |
| 101 | #define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a | ||
| 102 | call to dlm_lock. Only | ||
| 103 | exists with BUSY set. */ | ||
| 104 | 104 | ||
| 105 | struct ocfs2_lock_res_ops; | 105 | struct ocfs2_lock_res_ops; |
| 106 | 106 | ||
| @@ -120,13 +120,14 @@ struct ocfs2_lock_res { | |||
| 120 | int l_level; | 120 | int l_level; |
| 121 | unsigned int l_ro_holders; | 121 | unsigned int l_ro_holders; |
| 122 | unsigned int l_ex_holders; | 122 | unsigned int l_ex_holders; |
| 123 | struct dlm_lockstatus l_lksb; | 123 | union ocfs2_dlm_lksb l_lksb; |
| 124 | 124 | ||
| 125 | /* used from AST/BAST funcs. */ | 125 | /* used from AST/BAST funcs. */ |
| 126 | enum ocfs2_ast_action l_action; | 126 | enum ocfs2_ast_action l_action; |
| 127 | enum ocfs2_unlock_action l_unlock_action; | 127 | enum ocfs2_unlock_action l_unlock_action; |
| 128 | int l_requested; | 128 | int l_requested; |
| 129 | int l_blocking; | 129 | int l_blocking; |
| 130 | unsigned int l_pending_gen; | ||
| 130 | 131 | ||
| 131 | wait_queue_head_t l_event; | 132 | wait_queue_head_t l_event; |
| 132 | 133 | ||
| @@ -179,6 +180,8 @@ enum ocfs2_mount_options | |||
| 179 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 | 180 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 |
| 180 | 181 | ||
| 181 | struct ocfs2_journal; | 182 | struct ocfs2_journal; |
| 183 | struct ocfs2_slot_info; | ||
| 184 | struct ocfs2_recovery_map; | ||
| 182 | struct ocfs2_super | 185 | struct ocfs2_super |
| 183 | { | 186 | { |
| 184 | struct task_struct *commit_task; | 187 | struct task_struct *commit_task; |
| @@ -190,7 +193,6 @@ struct ocfs2_super | |||
| 190 | struct ocfs2_slot_info *slot_info; | 193 | struct ocfs2_slot_info *slot_info; |
| 191 | 194 | ||
| 192 | spinlock_t node_map_lock; | 195 | spinlock_t node_map_lock; |
| 193 | struct ocfs2_node_map recovery_map; | ||
| 194 | 196 | ||
| 195 | u64 root_blkno; | 197 | u64 root_blkno; |
| 196 | u64 system_dir_blkno; | 198 | u64 system_dir_blkno; |
| @@ -206,25 +208,29 @@ struct ocfs2_super | |||
| 206 | u32 s_feature_incompat; | 208 | u32 s_feature_incompat; |
| 207 | u32 s_feature_ro_compat; | 209 | u32 s_feature_ro_compat; |
| 208 | 210 | ||
| 209 | /* Protects s_next_generaion, osb_flags. Could protect more on | 211 | /* Protects s_next_generation, osb_flags and s_inode_steal_slot. |
| 210 | * osb as it's very short lived. */ | 212 | * Could protect more on osb as it's very short lived. |
| 213 | */ | ||
| 211 | spinlock_t osb_lock; | 214 | spinlock_t osb_lock; |
| 212 | u32 s_next_generation; | 215 | u32 s_next_generation; |
| 213 | unsigned long osb_flags; | 216 | unsigned long osb_flags; |
| 217 | s16 s_inode_steal_slot; | ||
| 218 | atomic_t s_num_inodes_stolen; | ||
| 214 | 219 | ||
| 215 | unsigned long s_mount_opt; | 220 | unsigned long s_mount_opt; |
| 216 | unsigned int s_atime_quantum; | 221 | unsigned int s_atime_quantum; |
| 217 | 222 | ||
| 218 | u16 max_slots; | 223 | unsigned int max_slots; |
| 219 | s16 node_num; | 224 | unsigned int node_num; |
| 220 | s16 slot_num; | 225 | int slot_num; |
| 221 | s16 preferred_slot; | 226 | int preferred_slot; |
| 222 | int s_sectsize_bits; | 227 | int s_sectsize_bits; |
| 223 | int s_clustersize; | 228 | int s_clustersize; |
| 224 | int s_clustersize_bits; | 229 | int s_clustersize_bits; |
| 225 | 230 | ||
| 226 | atomic_t vol_state; | 231 | atomic_t vol_state; |
| 227 | struct mutex recovery_lock; | 232 | struct mutex recovery_lock; |
| 233 | struct ocfs2_recovery_map *recovery_map; | ||
| 228 | struct task_struct *recovery_thread_task; | 234 | struct task_struct *recovery_thread_task; |
| 229 | int disable_recovery; | 235 | int disable_recovery; |
| 230 | wait_queue_head_t checkpoint_event; | 236 | wait_queue_head_t checkpoint_event; |
| @@ -245,12 +251,11 @@ struct ocfs2_super | |||
| 245 | struct ocfs2_alloc_stats alloc_stats; | 251 | struct ocfs2_alloc_stats alloc_stats; |
| 246 | char dev_str[20]; /* "major,minor" of the device */ | 252 | char dev_str[20]; /* "major,minor" of the device */ |
| 247 | 253 | ||
| 248 | struct dlm_ctxt *dlm; | 254 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; |
| 255 | struct ocfs2_cluster_connection *cconn; | ||
| 249 | struct ocfs2_lock_res osb_super_lockres; | 256 | struct ocfs2_lock_res osb_super_lockres; |
| 250 | struct ocfs2_lock_res osb_rename_lockres; | 257 | struct ocfs2_lock_res osb_rename_lockres; |
| 251 | struct dlm_eviction_cb osb_eviction_cb; | ||
| 252 | struct ocfs2_dlm_debug *osb_dlm_debug; | 258 | struct ocfs2_dlm_debug *osb_dlm_debug; |
| 253 | struct dlm_protocol_version osb_locking_proto; | ||
| 254 | 259 | ||
| 255 | struct dentry *osb_debug_root; | 260 | struct dentry *osb_debug_root; |
| 256 | 261 | ||
| @@ -367,11 +372,24 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) | |||
| 367 | return ret; | 372 | return ret; |
| 368 | } | 373 | } |
| 369 | 374 | ||
| 375 | static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) | ||
| 376 | { | ||
| 377 | return (osb->s_feature_incompat & | ||
| 378 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK); | ||
| 379 | } | ||
| 380 | |||
| 370 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) | 381 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) |
| 371 | { | 382 | { |
| 372 | return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); | 383 | return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); |
| 373 | } | 384 | } |
| 374 | 385 | ||
| 386 | static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) | ||
| 387 | { | ||
| 388 | return (osb->s_feature_incompat & | ||
| 389 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP); | ||
| 390 | } | ||
| 391 | |||
| 392 | |||
| 375 | #define OCFS2_IS_VALID_DINODE(ptr) \ | 393 | #define OCFS2_IS_VALID_DINODE(ptr) \ |
| 376 | (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) | 394 | (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) |
| 377 | 395 | ||
| @@ -522,6 +540,33 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb) | |||
| 522 | return pages_per_cluster; | 540 | return pages_per_cluster; |
| 523 | } | 541 | } |
| 524 | 542 | ||
| 543 | static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) | ||
| 544 | { | ||
| 545 | spin_lock(&osb->osb_lock); | ||
| 546 | osb->s_inode_steal_slot = OCFS2_INVALID_SLOT; | ||
| 547 | spin_unlock(&osb->osb_lock); | ||
| 548 | atomic_set(&osb->s_num_inodes_stolen, 0); | ||
| 549 | } | ||
| 550 | |||
| 551 | static inline void ocfs2_set_inode_steal_slot(struct ocfs2_super *osb, | ||
| 552 | s16 slot) | ||
| 553 | { | ||
| 554 | spin_lock(&osb->osb_lock); | ||
| 555 | osb->s_inode_steal_slot = slot; | ||
| 556 | spin_unlock(&osb->osb_lock); | ||
| 557 | } | ||
| 558 | |||
| 559 | static inline s16 ocfs2_get_inode_steal_slot(struct ocfs2_super *osb) | ||
| 560 | { | ||
| 561 | s16 slot; | ||
| 562 | |||
| 563 | spin_lock(&osb->osb_lock); | ||
| 564 | slot = osb->s_inode_steal_slot; | ||
| 565 | spin_unlock(&osb->osb_lock); | ||
| 566 | |||
| 567 | return slot; | ||
| 568 | } | ||
| 569 | |||
| 525 | #define ocfs2_set_bit ext2_set_bit | 570 | #define ocfs2_set_bit ext2_set_bit |
| 526 | #define ocfs2_clear_bit ext2_clear_bit | 571 | #define ocfs2_clear_bit ext2_clear_bit |
| 527 | #define ocfs2_test_bit ext2_test_bit | 572 | #define ocfs2_test_bit ext2_test_bit |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 3633edd3982f..52c426665154 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
| @@ -88,7 +88,9 @@ | |||
| 88 | #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB | 88 | #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB |
| 89 | #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ | 89 | #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ |
| 90 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ | 90 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ |
| 91 | | OCFS2_FEATURE_INCOMPAT_INLINE_DATA) | 91 | | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ |
| 92 | | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ | ||
| 93 | | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK) | ||
| 92 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN | 94 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN |
| 93 | 95 | ||
| 94 | /* | 96 | /* |
| @@ -125,6 +127,21 @@ | |||
| 125 | /* Support for data packed into inode blocks */ | 127 | /* Support for data packed into inode blocks */ |
| 126 | #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 | 128 | #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 |
| 127 | 129 | ||
| 130 | /* Support for the extended slot map */ | ||
| 131 | #define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100 | ||
| 132 | |||
| 133 | |||
| 134 | /* | ||
| 135 | * Support for alternate, userspace cluster stacks. If set, the superblock | ||
| 136 | * field s_cluster_info contains a tag for the alternate stack in use as | ||
| 137 | * well as the name of the cluster being joined. | ||
| 138 | * mount.ocfs2 must pass in a matching stack name. | ||
| 139 | * | ||
| 140 | * If not set, the classic stack will be used. This is compatbile with | ||
| 141 | * all older versions. | ||
| 142 | */ | ||
| 143 | #define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080 | ||
| 144 | |||
| 128 | /* | 145 | /* |
| 129 | * backup superblock flag is used to indicate that this volume | 146 | * backup superblock flag is used to indicate that this volume |
| 130 | * has backup superblocks. | 147 | * has backup superblocks. |
| @@ -267,6 +284,10 @@ struct ocfs2_new_group_input { | |||
| 267 | #define OCFS2_VOL_UUID_LEN 16 | 284 | #define OCFS2_VOL_UUID_LEN 16 |
| 268 | #define OCFS2_MAX_VOL_LABEL_LEN 64 | 285 | #define OCFS2_MAX_VOL_LABEL_LEN 64 |
| 269 | 286 | ||
| 287 | /* The alternate, userspace stack fields */ | ||
| 288 | #define OCFS2_STACK_LABEL_LEN 4 | ||
| 289 | #define OCFS2_CLUSTER_NAME_LEN 16 | ||
| 290 | |||
| 270 | /* Journal limits (in bytes) */ | 291 | /* Journal limits (in bytes) */ |
| 271 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) | 292 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) |
| 272 | 293 | ||
| @@ -475,6 +496,47 @@ struct ocfs2_extent_block | |||
| 475 | }; | 496 | }; |
| 476 | 497 | ||
| 477 | /* | 498 | /* |
| 499 | * On disk slot map for OCFS2. This defines the contents of the "slot_map" | ||
| 500 | * system file. A slot is valid if it contains a node number >= 0. The | ||
| 501 | * value -1 (0xFFFF) is OCFS2_INVALID_SLOT. This marks a slot empty. | ||
| 502 | */ | ||
| 503 | struct ocfs2_slot_map { | ||
| 504 | /*00*/ __le16 sm_slots[0]; | ||
| 505 | /* | ||
| 506 | * Actual on-disk size is one block. OCFS2_MAX_SLOTS is 255, | ||
| 507 | * 255 * sizeof(__le16) == 512B, within the 512B block minimum blocksize. | ||
| 508 | */ | ||
| 509 | }; | ||
| 510 | |||
| 511 | struct ocfs2_extended_slot { | ||
| 512 | /*00*/ __u8 es_valid; | ||
| 513 | __u8 es_reserved1[3]; | ||
| 514 | __le32 es_node_num; | ||
| 515 | /*10*/ | ||
| 516 | }; | ||
| 517 | |||
| 518 | /* | ||
| 519 | * The extended slot map, used when OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP | ||
| 520 | * is set. It separates out the valid marker from the node number, and | ||
| 521 | * has room to grow. Unlike the old slot map, this format is defined by | ||
| 522 | * i_size. | ||
| 523 | */ | ||
| 524 | struct ocfs2_slot_map_extended { | ||
| 525 | /*00*/ struct ocfs2_extended_slot se_slots[0]; | ||
| 526 | /* | ||
| 527 | * Actual size is i_size of the slot_map system file. It should | ||
| 528 | * match s_max_slots * sizeof(struct ocfs2_extended_slot) | ||
| 529 | */ | ||
| 530 | }; | ||
| 531 | |||
| 532 | struct ocfs2_cluster_info { | ||
| 533 | /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; | ||
| 534 | __le32 ci_reserved; | ||
| 535 | /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; | ||
| 536 | /*18*/ | ||
| 537 | }; | ||
| 538 | |||
| 539 | /* | ||
| 478 | * On disk superblock for OCFS2 | 540 | * On disk superblock for OCFS2 |
| 479 | * Note that it is contained inside an ocfs2_dinode, so all offsets | 541 | * Note that it is contained inside an ocfs2_dinode, so all offsets |
| 480 | * are relative to the start of ocfs2_dinode.id2. | 542 | * are relative to the start of ocfs2_dinode.id2. |
| @@ -506,7 +568,20 @@ struct ocfs2_super_block { | |||
| 506 | * group header */ | 568 | * group header */ |
| 507 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ | 569 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ |
| 508 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ | 570 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ |
| 509 | /*A0*/ | 571 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace |
| 572 | stack. Only valid | ||
| 573 | with INCOMPAT flag. */ | ||
| 574 | /*B8*/ __le64 s_reserved2[17]; /* Fill out superblock */ | ||
| 575 | /*140*/ | ||
| 576 | |||
| 577 | /* | ||
| 578 | * NOTE: As stated above, all offsets are relative to | ||
| 579 | * ocfs2_dinode.id2, which is at 0xC0 in the inode. | ||
| 580 | * 0xC0 + 0x140 = 0x200 or 512 bytes. A superblock must fit within | ||
| 581 | * our smallest blocksize, which is 512 bytes. To ensure this, | ||
| 582 | * we reserve the space in s_reserved2. Anything past s_reserved2 | ||
| 583 | * will not be available on the smallest blocksize. | ||
| 584 | */ | ||
| 510 | }; | 585 | }; |
| 511 | 586 | ||
| 512 | /* | 587 | /* |
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index 86f3e3799c2b..82c200f7a8f1 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h | |||
| @@ -100,7 +100,7 @@ static char *ocfs2_lock_type_strings[] = { | |||
| 100 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) | 100 | static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) |
| 101 | { | 101 | { |
| 102 | #ifdef __KERNEL__ | 102 | #ifdef __KERNEL__ |
| 103 | mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type); | 103 | BUG_ON(type >= OCFS2_NUM_LOCK_TYPES); |
| 104 | #endif | 104 | #endif |
| 105 | return ocfs2_lock_type_strings[type]; | 105 | return ocfs2_lock_type_strings[type]; |
| 106 | } | 106 | } |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index 3a50ce555e64..bb5ff8939bf1 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
| @@ -42,81 +42,244 @@ | |||
| 42 | 42 | ||
| 43 | #include "buffer_head_io.h" | 43 | #include "buffer_head_io.h" |
| 44 | 44 | ||
| 45 | static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | 45 | |
| 46 | s16 global); | 46 | struct ocfs2_slot { |
| 47 | static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, | 47 | int sl_valid; |
| 48 | s16 slot_num, | 48 | unsigned int sl_node_num; |
| 49 | s16 node_num); | 49 | }; |
| 50 | 50 | ||
| 51 | /* post the slot information on disk into our slot_info struct. */ | 51 | struct ocfs2_slot_info { |
| 52 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si) | 52 | int si_extended; |
| 53 | int si_slots_per_block; | ||
| 54 | struct inode *si_inode; | ||
| 55 | unsigned int si_blocks; | ||
| 56 | struct buffer_head **si_bh; | ||
| 57 | unsigned int si_num_slots; | ||
| 58 | struct ocfs2_slot *si_slots; | ||
| 59 | }; | ||
| 60 | |||
| 61 | |||
| 62 | static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | ||
| 63 | unsigned int node_num); | ||
| 64 | |||
| 65 | static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si, | ||
| 66 | int slot_num) | ||
| 67 | { | ||
| 68 | BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots)); | ||
| 69 | si->si_slots[slot_num].sl_valid = 0; | ||
| 70 | } | ||
| 71 | |||
| 72 | static void ocfs2_set_slot(struct ocfs2_slot_info *si, | ||
| 73 | int slot_num, unsigned int node_num) | ||
| 74 | { | ||
| 75 | BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots)); | ||
| 76 | |||
| 77 | si->si_slots[slot_num].sl_valid = 1; | ||
| 78 | si->si_slots[slot_num].sl_node_num = node_num; | ||
| 79 | } | ||
| 80 | |||
| 81 | /* This version is for the extended slot map */ | ||
| 82 | static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si) | ||
| 83 | { | ||
| 84 | int b, i, slotno; | ||
| 85 | struct ocfs2_slot_map_extended *se; | ||
| 86 | |||
| 87 | slotno = 0; | ||
| 88 | for (b = 0; b < si->si_blocks; b++) { | ||
| 89 | se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data; | ||
| 90 | for (i = 0; | ||
| 91 | (i < si->si_slots_per_block) && | ||
| 92 | (slotno < si->si_num_slots); | ||
| 93 | i++, slotno++) { | ||
| 94 | if (se->se_slots[i].es_valid) | ||
| 95 | ocfs2_set_slot(si, slotno, | ||
| 96 | le32_to_cpu(se->se_slots[i].es_node_num)); | ||
| 97 | else | ||
| 98 | ocfs2_invalidate_slot(si, slotno); | ||
| 99 | } | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | /* | ||
| 104 | * Post the slot information on disk into our slot_info struct. | ||
| 105 | * Must be protected by osb_lock. | ||
| 106 | */ | ||
| 107 | static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si) | ||
| 53 | { | 108 | { |
| 54 | int i; | 109 | int i; |
| 55 | __le16 *disk_info; | 110 | struct ocfs2_slot_map *sm; |
| 56 | 111 | ||
| 57 | /* we don't read the slot block here as ocfs2_super_lock | 112 | sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data; |
| 58 | * should've made sure we have the most recent copy. */ | ||
| 59 | spin_lock(&si->si_lock); | ||
| 60 | disk_info = (__le16 *) si->si_bh->b_data; | ||
| 61 | 113 | ||
| 62 | for (i = 0; i < si->si_size; i++) | 114 | for (i = 0; i < si->si_num_slots; i++) { |
| 63 | si->si_global_node_nums[i] = le16_to_cpu(disk_info[i]); | 115 | if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT) |
| 116 | ocfs2_invalidate_slot(si, i); | ||
| 117 | else | ||
| 118 | ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i])); | ||
| 119 | } | ||
| 120 | } | ||
| 64 | 121 | ||
| 65 | spin_unlock(&si->si_lock); | 122 | static void ocfs2_update_slot_info(struct ocfs2_slot_info *si) |
| 123 | { | ||
| 124 | /* | ||
| 125 | * The slot data will have been refreshed when ocfs2_super_lock | ||
| 126 | * was taken. | ||
| 127 | */ | ||
| 128 | if (si->si_extended) | ||
| 129 | ocfs2_update_slot_info_extended(si); | ||
| 130 | else | ||
| 131 | ocfs2_update_slot_info_old(si); | ||
| 132 | } | ||
| 133 | |||
| 134 | int ocfs2_refresh_slot_info(struct ocfs2_super *osb) | ||
| 135 | { | ||
| 136 | int ret; | ||
| 137 | struct ocfs2_slot_info *si = osb->slot_info; | ||
| 138 | |||
| 139 | if (si == NULL) | ||
| 140 | return 0; | ||
| 141 | |||
| 142 | BUG_ON(si->si_blocks == 0); | ||
| 143 | BUG_ON(si->si_bh == NULL); | ||
| 144 | |||
| 145 | mlog(0, "Refreshing slot map, reading %u block(s)\n", | ||
| 146 | si->si_blocks); | ||
| 147 | |||
| 148 | /* | ||
| 149 | * We pass -1 as blocknr because we expect all of si->si_bh to | ||
| 150 | * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If | ||
| 151 | * this is not true, the read of -1 (UINT64_MAX) will fail. | ||
| 152 | */ | ||
| 153 | ret = ocfs2_read_blocks(osb, -1, si->si_blocks, si->si_bh, 0, | ||
| 154 | si->si_inode); | ||
| 155 | if (ret == 0) { | ||
| 156 | spin_lock(&osb->osb_lock); | ||
| 157 | ocfs2_update_slot_info(si); | ||
| 158 | spin_unlock(&osb->osb_lock); | ||
| 159 | } | ||
| 160 | |||
| 161 | return ret; | ||
| 66 | } | 162 | } |
| 67 | 163 | ||
| 68 | /* post the our slot info stuff into it's destination bh and write it | 164 | /* post the our slot info stuff into it's destination bh and write it |
| 69 | * out. */ | 165 | * out. */ |
| 70 | int ocfs2_update_disk_slots(struct ocfs2_super *osb, | 166 | static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si, |
| 71 | struct ocfs2_slot_info *si) | 167 | int slot_num, |
| 168 | struct buffer_head **bh) | ||
| 72 | { | 169 | { |
| 73 | int status, i; | 170 | int blkind = slot_num / si->si_slots_per_block; |
| 74 | __le16 *disk_info = (__le16 *) si->si_bh->b_data; | 171 | int slotno = slot_num % si->si_slots_per_block; |
| 172 | struct ocfs2_slot_map_extended *se; | ||
| 173 | |||
| 174 | BUG_ON(blkind >= si->si_blocks); | ||
| 175 | |||
| 176 | se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data; | ||
| 177 | se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid; | ||
| 178 | if (si->si_slots[slot_num].sl_valid) | ||
| 179 | se->se_slots[slotno].es_node_num = | ||
| 180 | cpu_to_le32(si->si_slots[slot_num].sl_node_num); | ||
| 181 | *bh = si->si_bh[blkind]; | ||
| 182 | } | ||
| 75 | 183 | ||
| 76 | spin_lock(&si->si_lock); | 184 | static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si, |
| 77 | for (i = 0; i < si->si_size; i++) | 185 | int slot_num, |
| 78 | disk_info[i] = cpu_to_le16(si->si_global_node_nums[i]); | 186 | struct buffer_head **bh) |
| 79 | spin_unlock(&si->si_lock); | 187 | { |
| 188 | int i; | ||
| 189 | struct ocfs2_slot_map *sm; | ||
| 190 | |||
| 191 | sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data; | ||
| 192 | for (i = 0; i < si->si_num_slots; i++) { | ||
| 193 | if (si->si_slots[i].sl_valid) | ||
| 194 | sm->sm_slots[i] = | ||
| 195 | cpu_to_le16(si->si_slots[i].sl_node_num); | ||
| 196 | else | ||
| 197 | sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT); | ||
| 198 | } | ||
| 199 | *bh = si->si_bh[0]; | ||
| 200 | } | ||
| 201 | |||
| 202 | static int ocfs2_update_disk_slot(struct ocfs2_super *osb, | ||
| 203 | struct ocfs2_slot_info *si, | ||
| 204 | int slot_num) | ||
| 205 | { | ||
| 206 | int status; | ||
| 207 | struct buffer_head *bh; | ||
| 208 | |||
| 209 | spin_lock(&osb->osb_lock); | ||
| 210 | if (si->si_extended) | ||
| 211 | ocfs2_update_disk_slot_extended(si, slot_num, &bh); | ||
| 212 | else | ||
| 213 | ocfs2_update_disk_slot_old(si, slot_num, &bh); | ||
| 214 | spin_unlock(&osb->osb_lock); | ||
| 80 | 215 | ||
| 81 | status = ocfs2_write_block(osb, si->si_bh, si->si_inode); | 216 | status = ocfs2_write_block(osb, bh, si->si_inode); |
| 82 | if (status < 0) | 217 | if (status < 0) |
| 83 | mlog_errno(status); | 218 | mlog_errno(status); |
| 84 | 219 | ||
| 85 | return status; | 220 | return status; |
| 86 | } | 221 | } |
| 87 | 222 | ||
| 88 | /* try to find global node in the slot info. Returns | 223 | /* |
| 89 | * OCFS2_INVALID_SLOT if nothing is found. */ | 224 | * Calculate how many bytes are needed by the slot map. Returns |
| 90 | static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | 225 | * an error if the slot map file is too small. |
| 91 | s16 global) | 226 | */ |
| 227 | static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb, | ||
| 228 | struct inode *inode, | ||
| 229 | unsigned long long *bytes) | ||
| 92 | { | 230 | { |
| 93 | int i; | 231 | unsigned long long bytes_needed; |
| 94 | s16 ret = OCFS2_INVALID_SLOT; | 232 | |
| 233 | if (ocfs2_uses_extended_slot_map(osb)) { | ||
| 234 | bytes_needed = osb->max_slots * | ||
| 235 | sizeof(struct ocfs2_extended_slot); | ||
| 236 | } else { | ||
| 237 | bytes_needed = osb->max_slots * sizeof(__le16); | ||
| 238 | } | ||
| 239 | if (bytes_needed > i_size_read(inode)) { | ||
| 240 | mlog(ML_ERROR, | ||
| 241 | "Slot map file is too small! (size %llu, needed %llu)\n", | ||
| 242 | i_size_read(inode), bytes_needed); | ||
| 243 | return -ENOSPC; | ||
| 244 | } | ||
| 245 | |||
| 246 | *bytes = bytes_needed; | ||
| 247 | return 0; | ||
| 248 | } | ||
| 249 | |||
| 250 | /* try to find global node in the slot info. Returns -ENOENT | ||
| 251 | * if nothing is found. */ | ||
| 252 | static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | ||
| 253 | unsigned int node_num) | ||
| 254 | { | ||
| 255 | int i, ret = -ENOENT; | ||
| 95 | 256 | ||
| 96 | for(i = 0; i < si->si_num_slots; i++) { | 257 | for(i = 0; i < si->si_num_slots; i++) { |
| 97 | if (global == si->si_global_node_nums[i]) { | 258 | if (si->si_slots[i].sl_valid && |
| 98 | ret = (s16) i; | 259 | (node_num == si->si_slots[i].sl_node_num)) { |
| 260 | ret = i; | ||
| 99 | break; | 261 | break; |
| 100 | } | 262 | } |
| 101 | } | 263 | } |
| 264 | |||
| 102 | return ret; | 265 | return ret; |
| 103 | } | 266 | } |
| 104 | 267 | ||
| 105 | static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred) | 268 | static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, |
| 269 | int preferred) | ||
| 106 | { | 270 | { |
| 107 | int i; | 271 | int i, ret = -ENOSPC; |
| 108 | s16 ret = OCFS2_INVALID_SLOT; | ||
| 109 | 272 | ||
| 110 | if (preferred >= 0 && preferred < si->si_num_slots) { | 273 | if ((preferred >= 0) && (preferred < si->si_num_slots)) { |
| 111 | if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) { | 274 | if (!si->si_slots[preferred].sl_valid) { |
| 112 | ret = preferred; | 275 | ret = preferred; |
| 113 | goto out; | 276 | goto out; |
| 114 | } | 277 | } |
| 115 | } | 278 | } |
| 116 | 279 | ||
| 117 | for(i = 0; i < si->si_num_slots; i++) { | 280 | for(i = 0; i < si->si_num_slots; i++) { |
| 118 | if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) { | 281 | if (!si->si_slots[i].sl_valid) { |
| 119 | ret = (s16) i; | 282 | ret = i; |
| 120 | break; | 283 | break; |
| 121 | } | 284 | } |
| 122 | } | 285 | } |
| @@ -124,58 +287,155 @@ out: | |||
| 124 | return ret; | 287 | return ret; |
| 125 | } | 288 | } |
| 126 | 289 | ||
| 127 | s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | 290 | int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num) |
| 128 | s16 global) | ||
| 129 | { | 291 | { |
| 130 | s16 ret; | 292 | int slot; |
| 293 | struct ocfs2_slot_info *si = osb->slot_info; | ||
| 131 | 294 | ||
| 132 | spin_lock(&si->si_lock); | 295 | spin_lock(&osb->osb_lock); |
| 133 | ret = __ocfs2_node_num_to_slot(si, global); | 296 | slot = __ocfs2_node_num_to_slot(si, node_num); |
| 134 | spin_unlock(&si->si_lock); | 297 | spin_unlock(&osb->osb_lock); |
| 135 | return ret; | 298 | |
| 299 | return slot; | ||
| 300 | } | ||
| 301 | |||
| 302 | int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num, | ||
| 303 | unsigned int *node_num) | ||
| 304 | { | ||
| 305 | struct ocfs2_slot_info *si = osb->slot_info; | ||
| 306 | |||
| 307 | assert_spin_locked(&osb->osb_lock); | ||
| 308 | |||
| 309 | BUG_ON(slot_num < 0); | ||
| 310 | BUG_ON(slot_num > osb->max_slots); | ||
| 311 | |||
| 312 | if (!si->si_slots[slot_num].sl_valid) | ||
| 313 | return -ENOENT; | ||
| 314 | |||
| 315 | *node_num = si->si_slots[slot_num].sl_node_num; | ||
| 316 | return 0; | ||
| 136 | } | 317 | } |
| 137 | 318 | ||
| 138 | static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, | 319 | static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si) |
| 139 | s16 slot_num, | ||
| 140 | s16 node_num) | ||
| 141 | { | 320 | { |
| 142 | BUG_ON(slot_num == OCFS2_INVALID_SLOT); | 321 | unsigned int i; |
| 143 | BUG_ON(slot_num >= si->si_num_slots); | 322 | |
| 144 | BUG_ON((node_num != O2NM_INVALID_NODE_NUM) && | 323 | if (si == NULL) |
| 145 | (node_num >= O2NM_MAX_NODES)); | 324 | return; |
| 325 | |||
| 326 | if (si->si_inode) | ||
| 327 | iput(si->si_inode); | ||
| 328 | if (si->si_bh) { | ||
| 329 | for (i = 0; i < si->si_blocks; i++) { | ||
| 330 | if (si->si_bh[i]) { | ||
| 331 | brelse(si->si_bh[i]); | ||
| 332 | si->si_bh[i] = NULL; | ||
| 333 | } | ||
| 334 | } | ||
| 335 | kfree(si->si_bh); | ||
| 336 | } | ||
| 146 | 337 | ||
| 147 | si->si_global_node_nums[slot_num] = node_num; | 338 | kfree(si); |
| 148 | } | 339 | } |
| 149 | 340 | ||
| 150 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, | 341 | int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num) |
| 151 | s16 slot_num) | ||
| 152 | { | 342 | { |
| 153 | spin_lock(&si->si_lock); | 343 | struct ocfs2_slot_info *si = osb->slot_info; |
| 154 | __ocfs2_fill_slot(si, slot_num, OCFS2_INVALID_SLOT); | 344 | |
| 155 | spin_unlock(&si->si_lock); | 345 | if (si == NULL) |
| 346 | return 0; | ||
| 347 | |||
| 348 | spin_lock(&osb->osb_lock); | ||
| 349 | ocfs2_invalidate_slot(si, slot_num); | ||
| 350 | spin_unlock(&osb->osb_lock); | ||
| 351 | |||
| 352 | return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num); | ||
| 156 | } | 353 | } |
| 157 | 354 | ||
| 158 | int ocfs2_init_slot_info(struct ocfs2_super *osb) | 355 | static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, |
| 356 | struct ocfs2_slot_info *si) | ||
| 159 | { | 357 | { |
| 160 | int status, i; | 358 | int status = 0; |
| 161 | u64 blkno; | 359 | u64 blkno; |
| 360 | unsigned long long blocks, bytes; | ||
| 361 | unsigned int i; | ||
| 362 | struct buffer_head *bh; | ||
| 363 | |||
| 364 | status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes); | ||
| 365 | if (status) | ||
| 366 | goto bail; | ||
| 367 | |||
| 368 | blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes); | ||
| 369 | BUG_ON(blocks > UINT_MAX); | ||
| 370 | si->si_blocks = blocks; | ||
| 371 | if (!si->si_blocks) | ||
| 372 | goto bail; | ||
| 373 | |||
| 374 | if (si->si_extended) | ||
| 375 | si->si_slots_per_block = | ||
| 376 | (osb->sb->s_blocksize / | ||
| 377 | sizeof(struct ocfs2_extended_slot)); | ||
| 378 | else | ||
| 379 | si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16); | ||
| 380 | |||
| 381 | /* The size checks above should ensure this */ | ||
| 382 | BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks); | ||
| 383 | |||
| 384 | mlog(0, "Slot map needs %u buffers for %llu bytes\n", | ||
| 385 | si->si_blocks, bytes); | ||
| 386 | |||
| 387 | si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks, | ||
| 388 | GFP_KERNEL); | ||
| 389 | if (!si->si_bh) { | ||
| 390 | status = -ENOMEM; | ||
| 391 | mlog_errno(status); | ||
| 392 | goto bail; | ||
| 393 | } | ||
| 394 | |||
| 395 | for (i = 0; i < si->si_blocks; i++) { | ||
| 396 | status = ocfs2_extent_map_get_blocks(si->si_inode, i, | ||
| 397 | &blkno, NULL, NULL); | ||
| 398 | if (status < 0) { | ||
| 399 | mlog_errno(status); | ||
| 400 | goto bail; | ||
| 401 | } | ||
| 402 | |||
| 403 | mlog(0, "Reading slot map block %u at %llu\n", i, | ||
| 404 | (unsigned long long)blkno); | ||
| 405 | |||
| 406 | bh = NULL; /* Acquire a fresh bh */ | ||
| 407 | status = ocfs2_read_block(osb, blkno, &bh, 0, si->si_inode); | ||
| 408 | if (status < 0) { | ||
| 409 | mlog_errno(status); | ||
| 410 | goto bail; | ||
| 411 | } | ||
| 412 | |||
| 413 | si->si_bh[i] = bh; | ||
| 414 | } | ||
| 415 | |||
| 416 | bail: | ||
| 417 | return status; | ||
| 418 | } | ||
| 419 | |||
| 420 | int ocfs2_init_slot_info(struct ocfs2_super *osb) | ||
| 421 | { | ||
| 422 | int status; | ||
| 162 | struct inode *inode = NULL; | 423 | struct inode *inode = NULL; |
| 163 | struct buffer_head *bh = NULL; | ||
| 164 | struct ocfs2_slot_info *si; | 424 | struct ocfs2_slot_info *si; |
| 165 | 425 | ||
| 166 | si = kzalloc(sizeof(struct ocfs2_slot_info), GFP_KERNEL); | 426 | si = kzalloc(sizeof(struct ocfs2_slot_info) + |
| 427 | (sizeof(struct ocfs2_slot) * osb->max_slots), | ||
| 428 | GFP_KERNEL); | ||
| 167 | if (!si) { | 429 | if (!si) { |
| 168 | status = -ENOMEM; | 430 | status = -ENOMEM; |
| 169 | mlog_errno(status); | 431 | mlog_errno(status); |
| 170 | goto bail; | 432 | goto bail; |
| 171 | } | 433 | } |
| 172 | 434 | ||
| 173 | spin_lock_init(&si->si_lock); | 435 | si->si_extended = ocfs2_uses_extended_slot_map(osb); |
| 174 | si->si_num_slots = osb->max_slots; | 436 | si->si_num_slots = osb->max_slots; |
| 175 | si->si_size = OCFS2_MAX_SLOTS; | 437 | si->si_slots = (struct ocfs2_slot *)((char *)si + |
| 176 | 438 | sizeof(struct ocfs2_slot_info)); | |
| 177 | for(i = 0; i < si->si_num_slots; i++) | ||
| 178 | si->si_global_node_nums[i] = OCFS2_INVALID_SLOT; | ||
| 179 | 439 | ||
| 180 | inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE, | 440 | inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE, |
| 181 | OCFS2_INVALID_SLOT); | 441 | OCFS2_INVALID_SLOT); |
| @@ -185,61 +445,53 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb) | |||
| 185 | goto bail; | 445 | goto bail; |
| 186 | } | 446 | } |
| 187 | 447 | ||
| 188 | status = ocfs2_extent_map_get_blocks(inode, 0ULL, &blkno, NULL, NULL); | 448 | si->si_inode = inode; |
| 189 | if (status < 0) { | 449 | status = ocfs2_map_slot_buffers(osb, si); |
| 190 | mlog_errno(status); | ||
| 191 | goto bail; | ||
| 192 | } | ||
| 193 | |||
| 194 | status = ocfs2_read_block(osb, blkno, &bh, 0, inode); | ||
| 195 | if (status < 0) { | 450 | if (status < 0) { |
| 196 | mlog_errno(status); | 451 | mlog_errno(status); |
| 197 | goto bail; | 452 | goto bail; |
| 198 | } | 453 | } |
| 199 | 454 | ||
| 200 | si->si_inode = inode; | 455 | osb->slot_info = (struct ocfs2_slot_info *)si; |
| 201 | si->si_bh = bh; | ||
| 202 | osb->slot_info = si; | ||
| 203 | bail: | 456 | bail: |
| 204 | if (status < 0 && si) | 457 | if (status < 0 && si) |
| 205 | ocfs2_free_slot_info(si); | 458 | __ocfs2_free_slot_info(si); |
| 206 | 459 | ||
| 207 | return status; | 460 | return status; |
| 208 | } | 461 | } |
| 209 | 462 | ||
| 210 | void ocfs2_free_slot_info(struct ocfs2_slot_info *si) | 463 | void ocfs2_free_slot_info(struct ocfs2_super *osb) |
| 211 | { | 464 | { |
| 212 | if (si->si_inode) | 465 | struct ocfs2_slot_info *si = osb->slot_info; |
| 213 | iput(si->si_inode); | 466 | |
| 214 | if (si->si_bh) | 467 | osb->slot_info = NULL; |
| 215 | brelse(si->si_bh); | 468 | __ocfs2_free_slot_info(si); |
| 216 | kfree(si); | ||
| 217 | } | 469 | } |
| 218 | 470 | ||
| 219 | int ocfs2_find_slot(struct ocfs2_super *osb) | 471 | int ocfs2_find_slot(struct ocfs2_super *osb) |
| 220 | { | 472 | { |
| 221 | int status; | 473 | int status; |
| 222 | s16 slot; | 474 | int slot; |
| 223 | struct ocfs2_slot_info *si; | 475 | struct ocfs2_slot_info *si; |
| 224 | 476 | ||
| 225 | mlog_entry_void(); | 477 | mlog_entry_void(); |
| 226 | 478 | ||
| 227 | si = osb->slot_info; | 479 | si = osb->slot_info; |
| 228 | 480 | ||
| 481 | spin_lock(&osb->osb_lock); | ||
| 229 | ocfs2_update_slot_info(si); | 482 | ocfs2_update_slot_info(si); |
| 230 | 483 | ||
| 231 | spin_lock(&si->si_lock); | ||
| 232 | /* search for ourselves first and take the slot if it already | 484 | /* search for ourselves first and take the slot if it already |
| 233 | * exists. Perhaps we need to mark this in a variable for our | 485 | * exists. Perhaps we need to mark this in a variable for our |
| 234 | * own journal recovery? Possibly not, though we certainly | 486 | * own journal recovery? Possibly not, though we certainly |
| 235 | * need to warn to the user */ | 487 | * need to warn to the user */ |
| 236 | slot = __ocfs2_node_num_to_slot(si, osb->node_num); | 488 | slot = __ocfs2_node_num_to_slot(si, osb->node_num); |
| 237 | if (slot == OCFS2_INVALID_SLOT) { | 489 | if (slot < 0) { |
| 238 | /* if no slot yet, then just take 1st available | 490 | /* if no slot yet, then just take 1st available |
| 239 | * one. */ | 491 | * one. */ |
| 240 | slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); | 492 | slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); |
| 241 | if (slot == OCFS2_INVALID_SLOT) { | 493 | if (slot < 0) { |
| 242 | spin_unlock(&si->si_lock); | 494 | spin_unlock(&osb->osb_lock); |
| 243 | mlog(ML_ERROR, "no free slots available!\n"); | 495 | mlog(ML_ERROR, "no free slots available!\n"); |
| 244 | status = -EINVAL; | 496 | status = -EINVAL; |
| 245 | goto bail; | 497 | goto bail; |
| @@ -248,13 +500,13 @@ int ocfs2_find_slot(struct ocfs2_super *osb) | |||
| 248 | mlog(ML_NOTICE, "slot %d is already allocated to this node!\n", | 500 | mlog(ML_NOTICE, "slot %d is already allocated to this node!\n", |
| 249 | slot); | 501 | slot); |
| 250 | 502 | ||
| 251 | __ocfs2_fill_slot(si, slot, osb->node_num); | 503 | ocfs2_set_slot(si, slot, osb->node_num); |
| 252 | osb->slot_num = slot; | 504 | osb->slot_num = slot; |
| 253 | spin_unlock(&si->si_lock); | 505 | spin_unlock(&osb->osb_lock); |
| 254 | 506 | ||
| 255 | mlog(0, "taking node slot %d\n", osb->slot_num); | 507 | mlog(0, "taking node slot %d\n", osb->slot_num); |
| 256 | 508 | ||
| 257 | status = ocfs2_update_disk_slots(osb, si); | 509 | status = ocfs2_update_disk_slot(osb, si, osb->slot_num); |
| 258 | if (status < 0) | 510 | if (status < 0) |
| 259 | mlog_errno(status); | 511 | mlog_errno(status); |
| 260 | 512 | ||
| @@ -265,27 +517,27 @@ bail: | |||
| 265 | 517 | ||
| 266 | void ocfs2_put_slot(struct ocfs2_super *osb) | 518 | void ocfs2_put_slot(struct ocfs2_super *osb) |
| 267 | { | 519 | { |
| 268 | int status; | 520 | int status, slot_num; |
| 269 | struct ocfs2_slot_info *si = osb->slot_info; | 521 | struct ocfs2_slot_info *si = osb->slot_info; |
| 270 | 522 | ||
| 271 | if (!si) | 523 | if (!si) |
| 272 | return; | 524 | return; |
| 273 | 525 | ||
| 526 | spin_lock(&osb->osb_lock); | ||
| 274 | ocfs2_update_slot_info(si); | 527 | ocfs2_update_slot_info(si); |
| 275 | 528 | ||
| 276 | spin_lock(&si->si_lock); | 529 | slot_num = osb->slot_num; |
| 277 | __ocfs2_fill_slot(si, osb->slot_num, OCFS2_INVALID_SLOT); | 530 | ocfs2_invalidate_slot(si, osb->slot_num); |
| 278 | osb->slot_num = OCFS2_INVALID_SLOT; | 531 | osb->slot_num = OCFS2_INVALID_SLOT; |
| 279 | spin_unlock(&si->si_lock); | 532 | spin_unlock(&osb->osb_lock); |
| 280 | 533 | ||
| 281 | status = ocfs2_update_disk_slots(osb, si); | 534 | status = ocfs2_update_disk_slot(osb, si, slot_num); |
| 282 | if (status < 0) { | 535 | if (status < 0) { |
| 283 | mlog_errno(status); | 536 | mlog_errno(status); |
| 284 | goto bail; | 537 | goto bail; |
| 285 | } | 538 | } |
| 286 | 539 | ||
| 287 | bail: | 540 | bail: |
| 288 | osb->slot_info = NULL; | 541 | ocfs2_free_slot_info(osb); |
| 289 | ocfs2_free_slot_info(si); | ||
| 290 | } | 542 | } |
| 291 | 543 | ||
diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h index 1025872aaade..601c95fd7003 100644 --- a/fs/ocfs2/slot_map.h +++ b/fs/ocfs2/slot_map.h | |||
| @@ -27,38 +27,18 @@ | |||
| 27 | #ifndef SLOTMAP_H | 27 | #ifndef SLOTMAP_H |
| 28 | #define SLOTMAP_H | 28 | #define SLOTMAP_H |
| 29 | 29 | ||
| 30 | struct ocfs2_slot_info { | ||
| 31 | spinlock_t si_lock; | ||
| 32 | |||
| 33 | struct inode *si_inode; | ||
| 34 | struct buffer_head *si_bh; | ||
| 35 | unsigned int si_num_slots; | ||
| 36 | unsigned int si_size; | ||
| 37 | s16 si_global_node_nums[OCFS2_MAX_SLOTS]; | ||
| 38 | }; | ||
| 39 | |||
| 40 | int ocfs2_init_slot_info(struct ocfs2_super *osb); | 30 | int ocfs2_init_slot_info(struct ocfs2_super *osb); |
| 41 | void ocfs2_free_slot_info(struct ocfs2_slot_info *si); | 31 | void ocfs2_free_slot_info(struct ocfs2_super *osb); |
| 42 | 32 | ||
| 43 | int ocfs2_find_slot(struct ocfs2_super *osb); | 33 | int ocfs2_find_slot(struct ocfs2_super *osb); |
| 44 | void ocfs2_put_slot(struct ocfs2_super *osb); | 34 | void ocfs2_put_slot(struct ocfs2_super *osb); |
| 45 | 35 | ||
| 46 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si); | 36 | int ocfs2_refresh_slot_info(struct ocfs2_super *osb); |
| 47 | int ocfs2_update_disk_slots(struct ocfs2_super *osb, | ||
| 48 | struct ocfs2_slot_info *si); | ||
| 49 | |||
| 50 | s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | ||
| 51 | s16 global); | ||
| 52 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, | ||
| 53 | s16 slot_num); | ||
| 54 | 37 | ||
| 55 | static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si, | 38 | int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num); |
| 56 | int slot_num) | 39 | int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num, |
| 57 | { | 40 | unsigned int *node_num); |
| 58 | BUG_ON(slot_num == OCFS2_INVALID_SLOT); | ||
| 59 | assert_spin_locked(&si->si_lock); | ||
| 60 | 41 | ||
| 61 | return si->si_global_node_nums[slot_num] == OCFS2_INVALID_SLOT; | 42 | int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num); |
| 62 | } | ||
| 63 | 43 | ||
| 64 | #endif | 44 | #endif |
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c new file mode 100644 index 000000000000..ac1d74c63bf5 --- /dev/null +++ b/fs/ocfs2/stack_o2cb.c | |||
| @@ -0,0 +1,420 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * stack_o2cb.c | ||
| 5 | * | ||
| 6 | * Code which interfaces ocfs2 with the o2cb stack. | ||
| 7 | * | ||
| 8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public | ||
| 12 | * License as published by the Free Software Foundation, version 2. | ||
| 13 | * | ||
| 14 | * This program is distributed in the hope that it will be useful, | ||
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 17 | * General Public License for more details. | ||
| 18 | */ | ||
| 19 | |||
| 20 | #include <linux/crc32.h> | ||
| 21 | #include <linux/module.h> | ||
| 22 | |||
| 23 | /* Needed for AOP_TRUNCATED_PAGE in mlog_errno() */ | ||
| 24 | #include <linux/fs.h> | ||
| 25 | |||
| 26 | #include "cluster/masklog.h" | ||
| 27 | #include "cluster/nodemanager.h" | ||
| 28 | #include "cluster/heartbeat.h" | ||
| 29 | |||
| 30 | #include "stackglue.h" | ||
| 31 | |||
| 32 | struct o2dlm_private { | ||
| 33 | struct dlm_eviction_cb op_eviction_cb; | ||
| 34 | }; | ||
| 35 | |||
| 36 | static struct ocfs2_stack_plugin o2cb_stack; | ||
| 37 | |||
| 38 | /* These should be identical */ | ||
| 39 | #if (DLM_LOCK_IV != LKM_IVMODE) | ||
| 40 | # error Lock modes do not match | ||
| 41 | #endif | ||
| 42 | #if (DLM_LOCK_NL != LKM_NLMODE) | ||
| 43 | # error Lock modes do not match | ||
| 44 | #endif | ||
| 45 | #if (DLM_LOCK_CR != LKM_CRMODE) | ||
| 46 | # error Lock modes do not match | ||
| 47 | #endif | ||
| 48 | #if (DLM_LOCK_CW != LKM_CWMODE) | ||
| 49 | # error Lock modes do not match | ||
| 50 | #endif | ||
| 51 | #if (DLM_LOCK_PR != LKM_PRMODE) | ||
| 52 | # error Lock modes do not match | ||
| 53 | #endif | ||
| 54 | #if (DLM_LOCK_PW != LKM_PWMODE) | ||
| 55 | # error Lock modes do not match | ||
| 56 | #endif | ||
| 57 | #if (DLM_LOCK_EX != LKM_EXMODE) | ||
| 58 | # error Lock modes do not match | ||
| 59 | #endif | ||
| 60 | static inline int mode_to_o2dlm(int mode) | ||
| 61 | { | ||
| 62 | BUG_ON(mode > LKM_MAXMODE); | ||
| 63 | |||
| 64 | return mode; | ||
| 65 | } | ||
| 66 | |||
| 67 | #define map_flag(_generic, _o2dlm) \ | ||
| 68 | if (flags & (_generic)) { \ | ||
| 69 | flags &= ~(_generic); \ | ||
| 70 | o2dlm_flags |= (_o2dlm); \ | ||
| 71 | } | ||
| 72 | static int flags_to_o2dlm(u32 flags) | ||
| 73 | { | ||
| 74 | int o2dlm_flags = 0; | ||
| 75 | |||
| 76 | map_flag(DLM_LKF_NOQUEUE, LKM_NOQUEUE); | ||
| 77 | map_flag(DLM_LKF_CANCEL, LKM_CANCEL); | ||
| 78 | map_flag(DLM_LKF_CONVERT, LKM_CONVERT); | ||
| 79 | map_flag(DLM_LKF_VALBLK, LKM_VALBLK); | ||
| 80 | map_flag(DLM_LKF_IVVALBLK, LKM_INVVALBLK); | ||
| 81 | map_flag(DLM_LKF_ORPHAN, LKM_ORPHAN); | ||
| 82 | map_flag(DLM_LKF_FORCEUNLOCK, LKM_FORCE); | ||
| 83 | map_flag(DLM_LKF_TIMEOUT, LKM_TIMEOUT); | ||
| 84 | map_flag(DLM_LKF_LOCAL, LKM_LOCAL); | ||
| 85 | |||
| 86 | /* map_flag() should have cleared every flag passed in */ | ||
| 87 | BUG_ON(flags != 0); | ||
| 88 | |||
| 89 | return o2dlm_flags; | ||
| 90 | } | ||
| 91 | #undef map_flag | ||
| 92 | |||
| 93 | /* | ||
| 94 | * Map an o2dlm status to standard errno values. | ||
| 95 | * | ||
| 96 | * o2dlm only uses a handful of these, and returns even fewer to the | ||
| 97 | * caller. Still, we try to assign sane values to each error. | ||
| 98 | * | ||
| 99 | * The following value pairs have special meanings to dlmglue, thus | ||
| 100 | * the right hand side needs to stay unique - never duplicate the | ||
| 101 | * mapping elsewhere in the table! | ||
| 102 | * | ||
| 103 | * DLM_NORMAL: 0 | ||
| 104 | * DLM_NOTQUEUED: -EAGAIN | ||
| 105 | * DLM_CANCELGRANT: -EBUSY | ||
| 106 | * DLM_CANCEL: -DLM_ECANCEL | ||
| 107 | */ | ||
| 108 | /* Keep in sync with dlmapi.h */ | ||
| 109 | static int status_map[] = { | ||
| 110 | [DLM_NORMAL] = 0, /* Success */ | ||
| 111 | [DLM_GRANTED] = -EINVAL, | ||
| 112 | [DLM_DENIED] = -EACCES, | ||
| 113 | [DLM_DENIED_NOLOCKS] = -EACCES, | ||
| 114 | [DLM_WORKING] = -EACCES, | ||
| 115 | [DLM_BLOCKED] = -EINVAL, | ||
| 116 | [DLM_BLOCKED_ORPHAN] = -EINVAL, | ||
| 117 | [DLM_DENIED_GRACE_PERIOD] = -EACCES, | ||
| 118 | [DLM_SYSERR] = -ENOMEM, /* It is what it is */ | ||
| 119 | [DLM_NOSUPPORT] = -EPROTO, | ||
| 120 | [DLM_CANCELGRANT] = -EBUSY, /* Cancel after grant */ | ||
| 121 | [DLM_IVLOCKID] = -EINVAL, | ||
| 122 | [DLM_SYNC] = -EINVAL, | ||
| 123 | [DLM_BADTYPE] = -EINVAL, | ||
| 124 | [DLM_BADRESOURCE] = -EINVAL, | ||
| 125 | [DLM_MAXHANDLES] = -ENOMEM, | ||
| 126 | [DLM_NOCLINFO] = -EINVAL, | ||
| 127 | [DLM_NOLOCKMGR] = -EINVAL, | ||
| 128 | [DLM_NOPURGED] = -EINVAL, | ||
| 129 | [DLM_BADARGS] = -EINVAL, | ||
| 130 | [DLM_VOID] = -EINVAL, | ||
| 131 | [DLM_NOTQUEUED] = -EAGAIN, /* Trylock failed */ | ||
| 132 | [DLM_IVBUFLEN] = -EINVAL, | ||
| 133 | [DLM_CVTUNGRANT] = -EPERM, | ||
| 134 | [DLM_BADPARAM] = -EINVAL, | ||
| 135 | [DLM_VALNOTVALID] = -EINVAL, | ||
| 136 | [DLM_REJECTED] = -EPERM, | ||
| 137 | [DLM_ABORT] = -EINVAL, | ||
| 138 | [DLM_CANCEL] = -DLM_ECANCEL, /* Successful cancel */ | ||
| 139 | [DLM_IVRESHANDLE] = -EINVAL, | ||
| 140 | [DLM_DEADLOCK] = -EDEADLK, | ||
| 141 | [DLM_DENIED_NOASTS] = -EINVAL, | ||
| 142 | [DLM_FORWARD] = -EINVAL, | ||
| 143 | [DLM_TIMEOUT] = -ETIMEDOUT, | ||
| 144 | [DLM_IVGROUPID] = -EINVAL, | ||
| 145 | [DLM_VERS_CONFLICT] = -EOPNOTSUPP, | ||
| 146 | [DLM_BAD_DEVICE_PATH] = -ENOENT, | ||
| 147 | [DLM_NO_DEVICE_PERMISSION] = -EPERM, | ||
| 148 | [DLM_NO_CONTROL_DEVICE] = -ENOENT, | ||
| 149 | [DLM_RECOVERING] = -ENOTCONN, | ||
| 150 | [DLM_MIGRATING] = -ERESTART, | ||
| 151 | [DLM_MAXSTATS] = -EINVAL, | ||
| 152 | }; | ||
| 153 | |||
| 154 | static int dlm_status_to_errno(enum dlm_status status) | ||
| 155 | { | ||
| 156 | BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); | ||
| 157 | |||
| 158 | return status_map[status]; | ||
| 159 | } | ||
| 160 | |||
| 161 | static void o2dlm_lock_ast_wrapper(void *astarg) | ||
| 162 | { | ||
| 163 | BUG_ON(o2cb_stack.sp_proto == NULL); | ||
| 164 | |||
| 165 | o2cb_stack.sp_proto->lp_lock_ast(astarg); | ||
| 166 | } | ||
| 167 | |||
| 168 | static void o2dlm_blocking_ast_wrapper(void *astarg, int level) | ||
| 169 | { | ||
| 170 | BUG_ON(o2cb_stack.sp_proto == NULL); | ||
| 171 | |||
| 172 | o2cb_stack.sp_proto->lp_blocking_ast(astarg, level); | ||
| 173 | } | ||
| 174 | |||
| 175 | static void o2dlm_unlock_ast_wrapper(void *astarg, enum dlm_status status) | ||
| 176 | { | ||
| 177 | int error = dlm_status_to_errno(status); | ||
| 178 | |||
| 179 | BUG_ON(o2cb_stack.sp_proto == NULL); | ||
| 180 | |||
| 181 | /* | ||
| 182 | * In o2dlm, you can get both the lock_ast() for the lock being | ||
| 183 | * granted and the unlock_ast() for the CANCEL failing. A | ||
| 184 | * successful cancel sends DLM_NORMAL here. If the | ||
| 185 | * lock grant happened before the cancel arrived, you get | ||
| 186 | * DLM_CANCELGRANT. | ||
| 187 | * | ||
| 188 | * There's no need for the double-ast. If we see DLM_CANCELGRANT, | ||
| 189 | * we just ignore it. We expect the lock_ast() to handle the | ||
| 190 | * granted lock. | ||
| 191 | */ | ||
| 192 | if (status == DLM_CANCELGRANT) | ||
| 193 | return; | ||
| 194 | |||
| 195 | o2cb_stack.sp_proto->lp_unlock_ast(astarg, error); | ||
| 196 | } | ||
| 197 | |||
| 198 | static int o2cb_dlm_lock(struct ocfs2_cluster_connection *conn, | ||
| 199 | int mode, | ||
| 200 | union ocfs2_dlm_lksb *lksb, | ||
| 201 | u32 flags, | ||
| 202 | void *name, | ||
| 203 | unsigned int namelen, | ||
| 204 | void *astarg) | ||
| 205 | { | ||
| 206 | enum dlm_status status; | ||
| 207 | int o2dlm_mode = mode_to_o2dlm(mode); | ||
| 208 | int o2dlm_flags = flags_to_o2dlm(flags); | ||
| 209 | int ret; | ||
| 210 | |||
| 211 | status = dlmlock(conn->cc_lockspace, o2dlm_mode, &lksb->lksb_o2dlm, | ||
| 212 | o2dlm_flags, name, namelen, | ||
| 213 | o2dlm_lock_ast_wrapper, astarg, | ||
| 214 | o2dlm_blocking_ast_wrapper); | ||
| 215 | ret = dlm_status_to_errno(status); | ||
| 216 | return ret; | ||
| 217 | } | ||
| 218 | |||
| 219 | static int o2cb_dlm_unlock(struct ocfs2_cluster_connection *conn, | ||
| 220 | union ocfs2_dlm_lksb *lksb, | ||
| 221 | u32 flags, | ||
| 222 | void *astarg) | ||
| 223 | { | ||
| 224 | enum dlm_status status; | ||
| 225 | int o2dlm_flags = flags_to_o2dlm(flags); | ||
| 226 | int ret; | ||
| 227 | |||
| 228 | status = dlmunlock(conn->cc_lockspace, &lksb->lksb_o2dlm, | ||
| 229 | o2dlm_flags, o2dlm_unlock_ast_wrapper, astarg); | ||
| 230 | ret = dlm_status_to_errno(status); | ||
| 231 | return ret; | ||
| 232 | } | ||
| 233 | |||
| 234 | static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb) | ||
| 235 | { | ||
| 236 | return dlm_status_to_errno(lksb->lksb_o2dlm.status); | ||
| 237 | } | ||
| 238 | |||
| 239 | static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb) | ||
| 240 | { | ||
| 241 | return (void *)(lksb->lksb_o2dlm.lvb); | ||
| 242 | } | ||
| 243 | |||
| 244 | static void o2cb_dump_lksb(union ocfs2_dlm_lksb *lksb) | ||
| 245 | { | ||
| 246 | dlm_print_one_lock(lksb->lksb_o2dlm.lockid); | ||
| 247 | } | ||
| 248 | |||
| 249 | /* | ||
| 250 | * Called from the dlm when it's about to evict a node. This is how the | ||
| 251 | * classic stack signals node death. | ||
| 252 | */ | ||
| 253 | static void o2dlm_eviction_cb(int node_num, void *data) | ||
| 254 | { | ||
| 255 | struct ocfs2_cluster_connection *conn = data; | ||
| 256 | |||
| 257 | mlog(ML_NOTICE, "o2dlm has evicted node %d from group %.*s\n", | ||
| 258 | node_num, conn->cc_namelen, conn->cc_name); | ||
| 259 | |||
| 260 | conn->cc_recovery_handler(node_num, conn->cc_recovery_data); | ||
| 261 | } | ||
| 262 | |||
| 263 | static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) | ||
| 264 | { | ||
| 265 | int rc = 0; | ||
| 266 | u32 dlm_key; | ||
| 267 | struct dlm_ctxt *dlm; | ||
| 268 | struct o2dlm_private *priv; | ||
| 269 | struct dlm_protocol_version dlm_version; | ||
| 270 | |||
| 271 | BUG_ON(conn == NULL); | ||
| 272 | BUG_ON(o2cb_stack.sp_proto == NULL); | ||
| 273 | |||
| 274 | /* for now we only have one cluster/node, make sure we see it | ||
| 275 | * in the heartbeat universe */ | ||
| 276 | if (!o2hb_check_local_node_heartbeating()) { | ||
| 277 | rc = -EINVAL; | ||
| 278 | goto out; | ||
| 279 | } | ||
| 280 | |||
| 281 | priv = kzalloc(sizeof(struct o2dlm_private), GFP_KERNEL); | ||
| 282 | if (!priv) { | ||
| 283 | rc = -ENOMEM; | ||
| 284 | goto out_free; | ||
| 285 | } | ||
| 286 | |||
| 287 | /* This just fills the structure in. It is safe to pass conn. */ | ||
| 288 | dlm_setup_eviction_cb(&priv->op_eviction_cb, o2dlm_eviction_cb, | ||
| 289 | conn); | ||
| 290 | |||
| 291 | conn->cc_private = priv; | ||
| 292 | |||
| 293 | /* used by the dlm code to make message headers unique, each | ||
| 294 | * node in this domain must agree on this. */ | ||
| 295 | dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen); | ||
| 296 | dlm_version.pv_major = conn->cc_version.pv_major; | ||
| 297 | dlm_version.pv_minor = conn->cc_version.pv_minor; | ||
| 298 | |||
| 299 | dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version); | ||
| 300 | if (IS_ERR(dlm)) { | ||
| 301 | rc = PTR_ERR(dlm); | ||
| 302 | mlog_errno(rc); | ||
| 303 | goto out_free; | ||
| 304 | } | ||
| 305 | |||
| 306 | conn->cc_version.pv_major = dlm_version.pv_major; | ||
| 307 | conn->cc_version.pv_minor = dlm_version.pv_minor; | ||
| 308 | conn->cc_lockspace = dlm; | ||
| 309 | |||
| 310 | dlm_register_eviction_cb(dlm, &priv->op_eviction_cb); | ||
| 311 | |||
| 312 | out_free: | ||
| 313 | if (rc && conn->cc_private) | ||
| 314 | kfree(conn->cc_private); | ||
| 315 | |||
| 316 | out: | ||
| 317 | return rc; | ||
| 318 | } | ||
| 319 | |||
| 320 | static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn, | ||
| 321 | int hangup_pending) | ||
| 322 | { | ||
| 323 | struct dlm_ctxt *dlm = conn->cc_lockspace; | ||
| 324 | struct o2dlm_private *priv = conn->cc_private; | ||
| 325 | |||
| 326 | dlm_unregister_eviction_cb(&priv->op_eviction_cb); | ||
| 327 | conn->cc_private = NULL; | ||
| 328 | kfree(priv); | ||
| 329 | |||
| 330 | dlm_unregister_domain(dlm); | ||
| 331 | conn->cc_lockspace = NULL; | ||
| 332 | |||
| 333 | return 0; | ||
| 334 | } | ||
| 335 | |||
| 336 | static void o2hb_stop(const char *group) | ||
| 337 | { | ||
| 338 | int ret; | ||
| 339 | char *argv[5], *envp[3]; | ||
| 340 | |||
| 341 | argv[0] = (char *)o2nm_get_hb_ctl_path(); | ||
| 342 | argv[1] = "-K"; | ||
| 343 | argv[2] = "-u"; | ||
| 344 | argv[3] = (char *)group; | ||
| 345 | argv[4] = NULL; | ||
| 346 | |||
| 347 | mlog(0, "Run: %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3]); | ||
| 348 | |||
| 349 | /* minimal command environment taken from cpu_run_sbin_hotplug */ | ||
| 350 | envp[0] = "HOME=/"; | ||
| 351 | envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; | ||
| 352 | envp[2] = NULL; | ||
| 353 | |||
| 354 | ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | ||
| 355 | if (ret < 0) | ||
| 356 | mlog_errno(ret); | ||
| 357 | } | ||
| 358 | |||
| 359 | /* | ||
| 360 | * Hangup is a hack for tools compatibility. Older ocfs2-tools software | ||
| 361 | * expects the filesystem to call "ocfs2_hb_ctl" during unmount. This | ||
| 362 | * happens regardless of whether the DLM got started, so we can't do it | ||
| 363 | * in ocfs2_cluster_disconnect(). We bring the o2hb_stop() function into | ||
| 364 | * the glue and provide a "hangup" API for super.c to call. | ||
| 365 | * | ||
| 366 | * Other stacks will eventually provide a NULL ->hangup() pointer. | ||
| 367 | */ | ||
| 368 | static void o2cb_cluster_hangup(const char *group, int grouplen) | ||
| 369 | { | ||
| 370 | o2hb_stop(group); | ||
| 371 | } | ||
| 372 | |||
| 373 | static int o2cb_cluster_this_node(unsigned int *node) | ||
| 374 | { | ||
| 375 | int node_num; | ||
| 376 | |||
| 377 | node_num = o2nm_this_node(); | ||
| 378 | if (node_num == O2NM_INVALID_NODE_NUM) | ||
| 379 | return -ENOENT; | ||
| 380 | |||
| 381 | if (node_num >= O2NM_MAX_NODES) | ||
| 382 | return -EOVERFLOW; | ||
| 383 | |||
| 384 | *node = node_num; | ||
| 385 | return 0; | ||
| 386 | } | ||
| 387 | |||
| 388 | struct ocfs2_stack_operations o2cb_stack_ops = { | ||
| 389 | .connect = o2cb_cluster_connect, | ||
| 390 | .disconnect = o2cb_cluster_disconnect, | ||
| 391 | .hangup = o2cb_cluster_hangup, | ||
| 392 | .this_node = o2cb_cluster_this_node, | ||
| 393 | .dlm_lock = o2cb_dlm_lock, | ||
| 394 | .dlm_unlock = o2cb_dlm_unlock, | ||
| 395 | .lock_status = o2cb_dlm_lock_status, | ||
| 396 | .lock_lvb = o2cb_dlm_lvb, | ||
| 397 | .dump_lksb = o2cb_dump_lksb, | ||
| 398 | }; | ||
| 399 | |||
| 400 | static struct ocfs2_stack_plugin o2cb_stack = { | ||
| 401 | .sp_name = "o2cb", | ||
| 402 | .sp_ops = &o2cb_stack_ops, | ||
| 403 | .sp_owner = THIS_MODULE, | ||
| 404 | }; | ||
| 405 | |||
| 406 | static int __init o2cb_stack_init(void) | ||
| 407 | { | ||
| 408 | return ocfs2_stack_glue_register(&o2cb_stack); | ||
| 409 | } | ||
| 410 | |||
| 411 | static void __exit o2cb_stack_exit(void) | ||
| 412 | { | ||
| 413 | ocfs2_stack_glue_unregister(&o2cb_stack); | ||
| 414 | } | ||
| 415 | |||
| 416 | MODULE_AUTHOR("Oracle"); | ||
| 417 | MODULE_DESCRIPTION("ocfs2 driver for the classic o2cb stack"); | ||
| 418 | MODULE_LICENSE("GPL"); | ||
| 419 | module_init(o2cb_stack_init); | ||
| 420 | module_exit(o2cb_stack_exit); | ||
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c new file mode 100644 index 000000000000..7428663f9cbb --- /dev/null +++ b/fs/ocfs2/stack_user.c | |||
| @@ -0,0 +1,883 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * stack_user.c | ||
| 5 | * | ||
| 6 | * Code which interfaces ocfs2 with fs/dlm and a userspace stack. | ||
| 7 | * | ||
| 8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public | ||
| 12 | * License as published by the Free Software Foundation, version 2. | ||
| 13 | * | ||
| 14 | * This program is distributed in the hope that it will be useful, | ||
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 17 | * General Public License for more details. | ||
| 18 | */ | ||
| 19 | |||
| 20 | #include <linux/module.h> | ||
| 21 | #include <linux/fs.h> | ||
| 22 | #include <linux/miscdevice.h> | ||
| 23 | #include <linux/mutex.h> | ||
| 24 | #include <linux/reboot.h> | ||
| 25 | #include <asm/uaccess.h> | ||
| 26 | |||
| 27 | #include "ocfs2.h" /* For struct ocfs2_lock_res */ | ||
| 28 | #include "stackglue.h" | ||
| 29 | |||
| 30 | |||
| 31 | /* | ||
| 32 | * The control protocol starts with a handshake. Until the handshake | ||
| 33 | * is complete, the control device will fail all write(2)s. | ||
| 34 | * | ||
| 35 | * The handshake is simple. First, the client reads until EOF. Each line | ||
| 36 | * of output is a supported protocol tag. All protocol tags are a single | ||
| 37 | * character followed by a two hex digit version number. Currently the | ||
| 38 | * only things supported is T01, for "Text-base version 0x01". Next, the | ||
| 39 | * client writes the version they would like to use, including the newline. | ||
| 40 | * Thus, the protocol tag is 'T01\n'. If the version tag written is | ||
| 41 | * unknown, -EINVAL is returned. Once the negotiation is complete, the | ||
| 42 | * client can start sending messages. | ||
| 43 | * | ||
| 44 | * The T01 protocol has three messages. First is the "SETN" message. | ||
| 45 | * It has the following syntax: | ||
| 46 | * | ||
| 47 | * SETN<space><8-char-hex-nodenum><newline> | ||
| 48 | * | ||
| 49 | * This is 14 characters. | ||
| 50 | * | ||
| 51 | * The "SETN" message must be the first message following the protocol. | ||
| 52 | * It tells ocfs2_control the local node number. | ||
| 53 | * | ||
| 54 | * Next comes the "SETV" message. It has the following syntax: | ||
| 55 | * | ||
| 56 | * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> | ||
| 57 | * | ||
| 58 | * This is 11 characters. | ||
| 59 | * | ||
| 60 | * The "SETV" message sets the filesystem locking protocol version as | ||
| 61 | * negotiated by the client. The client negotiates based on the maximum | ||
| 62 | * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major | ||
| 63 | * number from the "SETV" message must match | ||
| 64 | * user_stack.sp_proto->lp_max_version.pv_major, and the minor number | ||
| 65 | * must be less than or equal to ...->lp_max_version.pv_minor. | ||
| 66 | * | ||
| 67 | * Once this information has been set, mounts will be allowed. From this | ||
| 68 | * point on, the "DOWN" message can be sent for node down notification. | ||
| 69 | * It has the following syntax: | ||
| 70 | * | ||
| 71 | * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> | ||
| 72 | * | ||
| 73 | * eg: | ||
| 74 | * | ||
| 75 | * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n | ||
| 76 | * | ||
| 77 | * This is 47 characters. | ||
| 78 | */ | ||
| 79 | |||
| 80 | /* | ||
| 81 | * Whether or not the client has done the handshake. | ||
| 82 | * For now, we have just one protocol version. | ||
| 83 | */ | ||
| 84 | #define OCFS2_CONTROL_PROTO "T01\n" | ||
| 85 | #define OCFS2_CONTROL_PROTO_LEN 4 | ||
| 86 | |||
| 87 | /* Handshake states */ | ||
| 88 | #define OCFS2_CONTROL_HANDSHAKE_INVALID (0) | ||
| 89 | #define OCFS2_CONTROL_HANDSHAKE_READ (1) | ||
| 90 | #define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2) | ||
| 91 | #define OCFS2_CONTROL_HANDSHAKE_VALID (3) | ||
| 92 | |||
| 93 | /* Messages */ | ||
| 94 | #define OCFS2_CONTROL_MESSAGE_OP_LEN 4 | ||
| 95 | #define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN" | ||
| 96 | #define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14 | ||
| 97 | #define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV" | ||
| 98 | #define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11 | ||
| 99 | #define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN" | ||
| 100 | #define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47 | ||
| 101 | #define OCFS2_TEXT_UUID_LEN 32 | ||
| 102 | #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2 | ||
| 103 | #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8 | ||
| 104 | |||
| 105 | /* | ||
| 106 | * ocfs2_live_connection is refcounted because the filesystem and | ||
| 107 | * miscdevice sides can detach in different order. Let's just be safe. | ||
| 108 | */ | ||
| 109 | struct ocfs2_live_connection { | ||
| 110 | struct list_head oc_list; | ||
| 111 | struct ocfs2_cluster_connection *oc_conn; | ||
| 112 | }; | ||
| 113 | |||
| 114 | struct ocfs2_control_private { | ||
| 115 | struct list_head op_list; | ||
| 116 | int op_state; | ||
| 117 | int op_this_node; | ||
| 118 | struct ocfs2_protocol_version op_proto; | ||
| 119 | }; | ||
| 120 | |||
| 121 | /* SETN<space><8-char-hex-nodenum><newline> */ | ||
| 122 | struct ocfs2_control_message_setn { | ||
| 123 | char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; | ||
| 124 | char space; | ||
| 125 | char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; | ||
| 126 | char newline; | ||
| 127 | }; | ||
| 128 | |||
| 129 | /* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */ | ||
| 130 | struct ocfs2_control_message_setv { | ||
| 131 | char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; | ||
| 132 | char space1; | ||
| 133 | char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; | ||
| 134 | char space2; | ||
| 135 | char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; | ||
| 136 | char newline; | ||
| 137 | }; | ||
| 138 | |||
| 139 | /* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */ | ||
| 140 | struct ocfs2_control_message_down { | ||
| 141 | char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; | ||
| 142 | char space1; | ||
| 143 | char uuid[OCFS2_TEXT_UUID_LEN]; | ||
| 144 | char space2; | ||
| 145 | char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; | ||
| 146 | char newline; | ||
| 147 | }; | ||
| 148 | |||
| 149 | union ocfs2_control_message { | ||
| 150 | char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; | ||
| 151 | struct ocfs2_control_message_setn u_setn; | ||
| 152 | struct ocfs2_control_message_setv u_setv; | ||
| 153 | struct ocfs2_control_message_down u_down; | ||
| 154 | }; | ||
| 155 | |||
| 156 | static struct ocfs2_stack_plugin user_stack; | ||
| 157 | |||
| 158 | static atomic_t ocfs2_control_opened; | ||
| 159 | static int ocfs2_control_this_node = -1; | ||
| 160 | static struct ocfs2_protocol_version running_proto; | ||
| 161 | |||
| 162 | static LIST_HEAD(ocfs2_live_connection_list); | ||
| 163 | static LIST_HEAD(ocfs2_control_private_list); | ||
| 164 | static DEFINE_MUTEX(ocfs2_control_lock); | ||
| 165 | |||
| 166 | static inline void ocfs2_control_set_handshake_state(struct file *file, | ||
| 167 | int state) | ||
| 168 | { | ||
| 169 | struct ocfs2_control_private *p = file->private_data; | ||
| 170 | p->op_state = state; | ||
| 171 | } | ||
| 172 | |||
| 173 | static inline int ocfs2_control_get_handshake_state(struct file *file) | ||
| 174 | { | ||
| 175 | struct ocfs2_control_private *p = file->private_data; | ||
| 176 | return p->op_state; | ||
| 177 | } | ||
| 178 | |||
| 179 | static struct ocfs2_live_connection *ocfs2_connection_find(const char *name) | ||
| 180 | { | ||
| 181 | size_t len = strlen(name); | ||
| 182 | struct ocfs2_live_connection *c; | ||
| 183 | |||
| 184 | BUG_ON(!mutex_is_locked(&ocfs2_control_lock)); | ||
| 185 | |||
| 186 | list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) { | ||
| 187 | if ((c->oc_conn->cc_namelen == len) && | ||
| 188 | !strncmp(c->oc_conn->cc_name, name, len)) | ||
| 189 | return c; | ||
| 190 | } | ||
| 191 | |||
| 192 | return c; | ||
| 193 | } | ||
| 194 | |||
| 195 | /* | ||
| 196 | * ocfs2_live_connection structures are created underneath the ocfs2 | ||
| 197 | * mount path. Since the VFS prevents multiple calls to | ||
| 198 | * fill_super(), we can't get dupes here. | ||
| 199 | */ | ||
| 200 | static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn, | ||
| 201 | struct ocfs2_live_connection **c_ret) | ||
| 202 | { | ||
| 203 | int rc = 0; | ||
| 204 | struct ocfs2_live_connection *c; | ||
| 205 | |||
| 206 | c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); | ||
| 207 | if (!c) | ||
| 208 | return -ENOMEM; | ||
| 209 | |||
| 210 | mutex_lock(&ocfs2_control_lock); | ||
| 211 | c->oc_conn = conn; | ||
| 212 | |||
| 213 | if (atomic_read(&ocfs2_control_opened)) | ||
| 214 | list_add(&c->oc_list, &ocfs2_live_connection_list); | ||
| 215 | else { | ||
| 216 | printk(KERN_ERR | ||
| 217 | "ocfs2: Userspace control daemon is not present\n"); | ||
| 218 | rc = -ESRCH; | ||
| 219 | } | ||
| 220 | |||
| 221 | mutex_unlock(&ocfs2_control_lock); | ||
| 222 | |||
| 223 | if (!rc) | ||
| 224 | *c_ret = c; | ||
| 225 | else | ||
| 226 | kfree(c); | ||
| 227 | |||
| 228 | return rc; | ||
| 229 | } | ||
| 230 | |||
| 231 | /* | ||
| 232 | * This function disconnects the cluster connection from ocfs2_control. | ||
| 233 | * Afterwards, userspace can't affect the cluster connection. | ||
| 234 | */ | ||
| 235 | static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c) | ||
| 236 | { | ||
| 237 | mutex_lock(&ocfs2_control_lock); | ||
| 238 | list_del_init(&c->oc_list); | ||
| 239 | c->oc_conn = NULL; | ||
| 240 | mutex_unlock(&ocfs2_control_lock); | ||
| 241 | |||
| 242 | kfree(c); | ||
| 243 | } | ||
| 244 | |||
| 245 | static int ocfs2_control_cfu(void *target, size_t target_len, | ||
| 246 | const char __user *buf, size_t count) | ||
| 247 | { | ||
| 248 | /* The T01 expects write(2) calls to have exactly one command */ | ||
| 249 | if ((count != target_len) || | ||
| 250 | (count > sizeof(union ocfs2_control_message))) | ||
| 251 | return -EINVAL; | ||
| 252 | |||
| 253 | if (copy_from_user(target, buf, target_len)) | ||
| 254 | return -EFAULT; | ||
| 255 | |||
| 256 | return 0; | ||
| 257 | } | ||
| 258 | |||
| 259 | static ssize_t ocfs2_control_validate_protocol(struct file *file, | ||
| 260 | const char __user *buf, | ||
| 261 | size_t count) | ||
| 262 | { | ||
| 263 | ssize_t ret; | ||
| 264 | char kbuf[OCFS2_CONTROL_PROTO_LEN]; | ||
| 265 | |||
| 266 | ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN, | ||
| 267 | buf, count); | ||
| 268 | if (ret) | ||
| 269 | return ret; | ||
| 270 | |||
| 271 | if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN)) | ||
| 272 | return -EINVAL; | ||
| 273 | |||
| 274 | ocfs2_control_set_handshake_state(file, | ||
| 275 | OCFS2_CONTROL_HANDSHAKE_PROTOCOL); | ||
| 276 | |||
| 277 | return count; | ||
| 278 | } | ||
| 279 | |||
| 280 | static void ocfs2_control_send_down(const char *uuid, | ||
| 281 | int nodenum) | ||
| 282 | { | ||
| 283 | struct ocfs2_live_connection *c; | ||
| 284 | |||
| 285 | mutex_lock(&ocfs2_control_lock); | ||
| 286 | |||
| 287 | c = ocfs2_connection_find(uuid); | ||
| 288 | if (c) { | ||
| 289 | BUG_ON(c->oc_conn == NULL); | ||
| 290 | c->oc_conn->cc_recovery_handler(nodenum, | ||
| 291 | c->oc_conn->cc_recovery_data); | ||
| 292 | } | ||
| 293 | |||
| 294 | mutex_unlock(&ocfs2_control_lock); | ||
| 295 | } | ||
| 296 | |||
| 297 | /* | ||
| 298 | * Called whenever configuration elements are sent to /dev/ocfs2_control. | ||
| 299 | * If all configuration elements are present, try to set the global | ||
| 300 | * values. If there is a problem, return an error. Skip any missing | ||
| 301 | * elements, and only bump ocfs2_control_opened when we have all elements | ||
| 302 | * and are successful. | ||
| 303 | */ | ||
| 304 | static int ocfs2_control_install_private(struct file *file) | ||
| 305 | { | ||
| 306 | int rc = 0; | ||
| 307 | int set_p = 1; | ||
| 308 | struct ocfs2_control_private *p = file->private_data; | ||
| 309 | |||
| 310 | BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL); | ||
| 311 | |||
| 312 | mutex_lock(&ocfs2_control_lock); | ||
| 313 | |||
| 314 | if (p->op_this_node < 0) { | ||
| 315 | set_p = 0; | ||
| 316 | } else if ((ocfs2_control_this_node >= 0) && | ||
| 317 | (ocfs2_control_this_node != p->op_this_node)) { | ||
| 318 | rc = -EINVAL; | ||
| 319 | goto out_unlock; | ||
| 320 | } | ||
| 321 | |||
| 322 | if (!p->op_proto.pv_major) { | ||
| 323 | set_p = 0; | ||
| 324 | } else if (!list_empty(&ocfs2_live_connection_list) && | ||
| 325 | ((running_proto.pv_major != p->op_proto.pv_major) || | ||
| 326 | (running_proto.pv_minor != p->op_proto.pv_minor))) { | ||
| 327 | rc = -EINVAL; | ||
| 328 | goto out_unlock; | ||
| 329 | } | ||
| 330 | |||
| 331 | if (set_p) { | ||
| 332 | ocfs2_control_this_node = p->op_this_node; | ||
| 333 | running_proto.pv_major = p->op_proto.pv_major; | ||
| 334 | running_proto.pv_minor = p->op_proto.pv_minor; | ||
| 335 | } | ||
| 336 | |||
| 337 | out_unlock: | ||
| 338 | mutex_unlock(&ocfs2_control_lock); | ||
| 339 | |||
| 340 | if (!rc && set_p) { | ||
| 341 | /* We set the global values successfully */ | ||
| 342 | atomic_inc(&ocfs2_control_opened); | ||
| 343 | ocfs2_control_set_handshake_state(file, | ||
| 344 | OCFS2_CONTROL_HANDSHAKE_VALID); | ||
| 345 | } | ||
| 346 | |||
| 347 | return rc; | ||
| 348 | } | ||
| 349 | |||
| 350 | static int ocfs2_control_get_this_node(void) | ||
| 351 | { | ||
| 352 | int rc; | ||
| 353 | |||
| 354 | mutex_lock(&ocfs2_control_lock); | ||
| 355 | if (ocfs2_control_this_node < 0) | ||
| 356 | rc = -EINVAL; | ||
| 357 | else | ||
| 358 | rc = ocfs2_control_this_node; | ||
| 359 | mutex_unlock(&ocfs2_control_lock); | ||
| 360 | |||
| 361 | return rc; | ||
| 362 | } | ||
| 363 | |||
| 364 | static int ocfs2_control_do_setnode_msg(struct file *file, | ||
| 365 | struct ocfs2_control_message_setn *msg) | ||
| 366 | { | ||
| 367 | long nodenum; | ||
| 368 | char *ptr = NULL; | ||
| 369 | struct ocfs2_control_private *p = file->private_data; | ||
| 370 | |||
| 371 | if (ocfs2_control_get_handshake_state(file) != | ||
| 372 | OCFS2_CONTROL_HANDSHAKE_PROTOCOL) | ||
| 373 | return -EINVAL; | ||
| 374 | |||
| 375 | if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, | ||
| 376 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
| 377 | return -EINVAL; | ||
| 378 | |||
| 379 | if ((msg->space != ' ') || (msg->newline != '\n')) | ||
| 380 | return -EINVAL; | ||
| 381 | msg->space = msg->newline = '\0'; | ||
| 382 | |||
| 383 | nodenum = simple_strtol(msg->nodestr, &ptr, 16); | ||
| 384 | if (!ptr || *ptr) | ||
| 385 | return -EINVAL; | ||
| 386 | |||
| 387 | if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || | ||
| 388 | (nodenum > INT_MAX) || (nodenum < 0)) | ||
| 389 | return -ERANGE; | ||
| 390 | p->op_this_node = nodenum; | ||
| 391 | |||
| 392 | return ocfs2_control_install_private(file); | ||
| 393 | } | ||
| 394 | |||
| 395 | static int ocfs2_control_do_setversion_msg(struct file *file, | ||
| 396 | struct ocfs2_control_message_setv *msg) | ||
| 397 | { | ||
| 398 | long major, minor; | ||
| 399 | char *ptr = NULL; | ||
| 400 | struct ocfs2_control_private *p = file->private_data; | ||
| 401 | struct ocfs2_protocol_version *max = | ||
| 402 | &user_stack.sp_proto->lp_max_version; | ||
| 403 | |||
| 404 | if (ocfs2_control_get_handshake_state(file) != | ||
| 405 | OCFS2_CONTROL_HANDSHAKE_PROTOCOL) | ||
| 406 | return -EINVAL; | ||
| 407 | |||
| 408 | if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, | ||
| 409 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
| 410 | return -EINVAL; | ||
| 411 | |||
| 412 | if ((msg->space1 != ' ') || (msg->space2 != ' ') || | ||
| 413 | (msg->newline != '\n')) | ||
| 414 | return -EINVAL; | ||
| 415 | msg->space1 = msg->space2 = msg->newline = '\0'; | ||
| 416 | |||
| 417 | major = simple_strtol(msg->major, &ptr, 16); | ||
| 418 | if (!ptr || *ptr) | ||
| 419 | return -EINVAL; | ||
| 420 | minor = simple_strtol(msg->minor, &ptr, 16); | ||
| 421 | if (!ptr || *ptr) | ||
| 422 | return -EINVAL; | ||
| 423 | |||
| 424 | /* | ||
| 425 | * The major must be between 1 and 255, inclusive. The minor | ||
| 426 | * must be between 0 and 255, inclusive. The version passed in | ||
| 427 | * must be within the maximum version supported by the filesystem. | ||
| 428 | */ | ||
| 429 | if ((major == LONG_MIN) || (major == LONG_MAX) || | ||
| 430 | (major > (u8)-1) || (major < 1)) | ||
| 431 | return -ERANGE; | ||
| 432 | if ((minor == LONG_MIN) || (minor == LONG_MAX) || | ||
| 433 | (minor > (u8)-1) || (minor < 0)) | ||
| 434 | return -ERANGE; | ||
| 435 | if ((major != max->pv_major) || | ||
| 436 | (minor > max->pv_minor)) | ||
| 437 | return -EINVAL; | ||
| 438 | |||
| 439 | p->op_proto.pv_major = major; | ||
| 440 | p->op_proto.pv_minor = minor; | ||
| 441 | |||
| 442 | return ocfs2_control_install_private(file); | ||
| 443 | } | ||
| 444 | |||
| 445 | static int ocfs2_control_do_down_msg(struct file *file, | ||
| 446 | struct ocfs2_control_message_down *msg) | ||
| 447 | { | ||
| 448 | long nodenum; | ||
| 449 | char *p = NULL; | ||
| 450 | |||
| 451 | if (ocfs2_control_get_handshake_state(file) != | ||
| 452 | OCFS2_CONTROL_HANDSHAKE_VALID) | ||
| 453 | return -EINVAL; | ||
| 454 | |||
| 455 | if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, | ||
| 456 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
| 457 | return -EINVAL; | ||
| 458 | |||
| 459 | if ((msg->space1 != ' ') || (msg->space2 != ' ') || | ||
| 460 | (msg->newline != '\n')) | ||
| 461 | return -EINVAL; | ||
| 462 | msg->space1 = msg->space2 = msg->newline = '\0'; | ||
| 463 | |||
| 464 | nodenum = simple_strtol(msg->nodestr, &p, 16); | ||
| 465 | if (!p || *p) | ||
| 466 | return -EINVAL; | ||
| 467 | |||
| 468 | if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || | ||
| 469 | (nodenum > INT_MAX) || (nodenum < 0)) | ||
| 470 | return -ERANGE; | ||
| 471 | |||
| 472 | ocfs2_control_send_down(msg->uuid, nodenum); | ||
| 473 | |||
| 474 | return 0; | ||
| 475 | } | ||
| 476 | |||
| 477 | static ssize_t ocfs2_control_message(struct file *file, | ||
| 478 | const char __user *buf, | ||
| 479 | size_t count) | ||
| 480 | { | ||
| 481 | ssize_t ret; | ||
| 482 | union ocfs2_control_message msg; | ||
| 483 | |||
| 484 | /* Try to catch padding issues */ | ||
| 485 | WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) != | ||
| 486 | (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1))); | ||
| 487 | |||
| 488 | memset(&msg, 0, sizeof(union ocfs2_control_message)); | ||
| 489 | ret = ocfs2_control_cfu(&msg, count, buf, count); | ||
| 490 | if (ret) | ||
| 491 | goto out; | ||
| 492 | |||
| 493 | if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) && | ||
| 494 | !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, | ||
| 495 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
| 496 | ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn); | ||
| 497 | else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) && | ||
| 498 | !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, | ||
| 499 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
| 500 | ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv); | ||
| 501 | else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) && | ||
| 502 | !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, | ||
| 503 | OCFS2_CONTROL_MESSAGE_OP_LEN)) | ||
| 504 | ret = ocfs2_control_do_down_msg(file, &msg.u_down); | ||
| 505 | else | ||
| 506 | ret = -EINVAL; | ||
| 507 | |||
| 508 | out: | ||
| 509 | return ret ? ret : count; | ||
| 510 | } | ||
| 511 | |||
| 512 | static ssize_t ocfs2_control_write(struct file *file, | ||
| 513 | const char __user *buf, | ||
| 514 | size_t count, | ||
| 515 | loff_t *ppos) | ||
| 516 | { | ||
| 517 | ssize_t ret; | ||
| 518 | |||
| 519 | switch (ocfs2_control_get_handshake_state(file)) { | ||
| 520 | case OCFS2_CONTROL_HANDSHAKE_INVALID: | ||
| 521 | ret = -EINVAL; | ||
| 522 | break; | ||
| 523 | |||
| 524 | case OCFS2_CONTROL_HANDSHAKE_READ: | ||
| 525 | ret = ocfs2_control_validate_protocol(file, buf, | ||
| 526 | count); | ||
| 527 | break; | ||
| 528 | |||
| 529 | case OCFS2_CONTROL_HANDSHAKE_PROTOCOL: | ||
| 530 | case OCFS2_CONTROL_HANDSHAKE_VALID: | ||
| 531 | ret = ocfs2_control_message(file, buf, count); | ||
| 532 | break; | ||
| 533 | |||
| 534 | default: | ||
| 535 | BUG(); | ||
| 536 | ret = -EIO; | ||
| 537 | break; | ||
| 538 | } | ||
| 539 | |||
| 540 | return ret; | ||
| 541 | } | ||
| 542 | |||
| 543 | /* | ||
| 544 | * This is a naive version. If we ever have a new protocol, we'll expand | ||
| 545 | * it. Probably using seq_file. | ||
| 546 | */ | ||
| 547 | static ssize_t ocfs2_control_read(struct file *file, | ||
| 548 | char __user *buf, | ||
| 549 | size_t count, | ||
| 550 | loff_t *ppos) | ||
| 551 | { | ||
| 552 | char *proto_string = OCFS2_CONTROL_PROTO; | ||
| 553 | size_t to_write = 0; | ||
| 554 | |||
| 555 | if (*ppos >= OCFS2_CONTROL_PROTO_LEN) | ||
| 556 | return 0; | ||
| 557 | |||
| 558 | to_write = OCFS2_CONTROL_PROTO_LEN - *ppos; | ||
| 559 | if (to_write > count) | ||
| 560 | to_write = count; | ||
| 561 | if (copy_to_user(buf, proto_string + *ppos, to_write)) | ||
| 562 | return -EFAULT; | ||
| 563 | |||
| 564 | *ppos += to_write; | ||
| 565 | |||
| 566 | /* Have we read the whole protocol list? */ | ||
| 567 | if (*ppos >= OCFS2_CONTROL_PROTO_LEN) | ||
| 568 | ocfs2_control_set_handshake_state(file, | ||
| 569 | OCFS2_CONTROL_HANDSHAKE_READ); | ||
| 570 | |||
| 571 | return to_write; | ||
| 572 | } | ||
| 573 | |||
| 574 | static int ocfs2_control_release(struct inode *inode, struct file *file) | ||
| 575 | { | ||
| 576 | struct ocfs2_control_private *p = file->private_data; | ||
| 577 | |||
| 578 | mutex_lock(&ocfs2_control_lock); | ||
| 579 | |||
| 580 | if (ocfs2_control_get_handshake_state(file) != | ||
| 581 | OCFS2_CONTROL_HANDSHAKE_VALID) | ||
| 582 | goto out; | ||
| 583 | |||
| 584 | if (atomic_dec_and_test(&ocfs2_control_opened)) { | ||
| 585 | if (!list_empty(&ocfs2_live_connection_list)) { | ||
| 586 | /* XXX: Do bad things! */ | ||
| 587 | printk(KERN_ERR | ||
| 588 | "ocfs2: Unexpected release of ocfs2_control!\n" | ||
| 589 | " Loss of cluster connection requires " | ||
| 590 | "an emergency restart!\n"); | ||
| 591 | emergency_restart(); | ||
| 592 | } | ||
| 593 | /* | ||
| 594 | * Last valid close clears the node number and resets | ||
| 595 | * the locking protocol version | ||
| 596 | */ | ||
| 597 | ocfs2_control_this_node = -1; | ||
| 598 | running_proto.pv_major = 0; | ||
| 599 | running_proto.pv_major = 0; | ||
| 600 | } | ||
| 601 | |||
| 602 | out: | ||
| 603 | list_del_init(&p->op_list); | ||
| 604 | file->private_data = NULL; | ||
| 605 | |||
| 606 | mutex_unlock(&ocfs2_control_lock); | ||
| 607 | |||
| 608 | kfree(p); | ||
| 609 | |||
| 610 | return 0; | ||
| 611 | } | ||
| 612 | |||
| 613 | static int ocfs2_control_open(struct inode *inode, struct file *file) | ||
| 614 | { | ||
| 615 | struct ocfs2_control_private *p; | ||
| 616 | |||
| 617 | p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL); | ||
| 618 | if (!p) | ||
| 619 | return -ENOMEM; | ||
| 620 | p->op_this_node = -1; | ||
| 621 | |||
| 622 | mutex_lock(&ocfs2_control_lock); | ||
| 623 | file->private_data = p; | ||
| 624 | list_add(&p->op_list, &ocfs2_control_private_list); | ||
| 625 | mutex_unlock(&ocfs2_control_lock); | ||
| 626 | |||
| 627 | return 0; | ||
| 628 | } | ||
| 629 | |||
| 630 | static const struct file_operations ocfs2_control_fops = { | ||
| 631 | .open = ocfs2_control_open, | ||
| 632 | .release = ocfs2_control_release, | ||
| 633 | .read = ocfs2_control_read, | ||
| 634 | .write = ocfs2_control_write, | ||
| 635 | .owner = THIS_MODULE, | ||
| 636 | }; | ||
| 637 | |||
| 638 | struct miscdevice ocfs2_control_device = { | ||
| 639 | .minor = MISC_DYNAMIC_MINOR, | ||
| 640 | .name = "ocfs2_control", | ||
| 641 | .fops = &ocfs2_control_fops, | ||
| 642 | }; | ||
| 643 | |||
| 644 | static int ocfs2_control_init(void) | ||
| 645 | { | ||
| 646 | int rc; | ||
| 647 | |||
| 648 | atomic_set(&ocfs2_control_opened, 0); | ||
| 649 | |||
| 650 | rc = misc_register(&ocfs2_control_device); | ||
| 651 | if (rc) | ||
| 652 | printk(KERN_ERR | ||
| 653 | "ocfs2: Unable to register ocfs2_control device " | ||
| 654 | "(errno %d)\n", | ||
| 655 | -rc); | ||
| 656 | |||
| 657 | return rc; | ||
| 658 | } | ||
| 659 | |||
| 660 | static void ocfs2_control_exit(void) | ||
| 661 | { | ||
| 662 | int rc; | ||
| 663 | |||
| 664 | rc = misc_deregister(&ocfs2_control_device); | ||
| 665 | if (rc) | ||
| 666 | printk(KERN_ERR | ||
| 667 | "ocfs2: Unable to deregister ocfs2_control device " | ||
| 668 | "(errno %d)\n", | ||
| 669 | -rc); | ||
| 670 | } | ||
| 671 | |||
| 672 | static struct dlm_lksb *fsdlm_astarg_to_lksb(void *astarg) | ||
| 673 | { | ||
| 674 | struct ocfs2_lock_res *res = astarg; | ||
| 675 | return &res->l_lksb.lksb_fsdlm; | ||
| 676 | } | ||
| 677 | |||
| 678 | static void fsdlm_lock_ast_wrapper(void *astarg) | ||
| 679 | { | ||
| 680 | struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg); | ||
| 681 | int status = lksb->sb_status; | ||
| 682 | |||
| 683 | BUG_ON(user_stack.sp_proto == NULL); | ||
| 684 | |||
| 685 | /* | ||
| 686 | * For now we're punting on the issue of other non-standard errors | ||
| 687 | * where we can't tell if the unlock_ast or lock_ast should be called. | ||
| 688 | * The main "other error" that's possible is EINVAL which means the | ||
| 689 | * function was called with invalid args, which shouldn't be possible | ||
| 690 | * since the caller here is under our control. Other non-standard | ||
| 691 | * errors probably fall into the same category, or otherwise are fatal | ||
| 692 | * which means we can't carry on anyway. | ||
| 693 | */ | ||
| 694 | |||
| 695 | if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) | ||
| 696 | user_stack.sp_proto->lp_unlock_ast(astarg, 0); | ||
| 697 | else | ||
| 698 | user_stack.sp_proto->lp_lock_ast(astarg); | ||
| 699 | } | ||
| 700 | |||
| 701 | static void fsdlm_blocking_ast_wrapper(void *astarg, int level) | ||
| 702 | { | ||
| 703 | BUG_ON(user_stack.sp_proto == NULL); | ||
| 704 | |||
| 705 | user_stack.sp_proto->lp_blocking_ast(astarg, level); | ||
| 706 | } | ||
| 707 | |||
| 708 | static int user_dlm_lock(struct ocfs2_cluster_connection *conn, | ||
| 709 | int mode, | ||
| 710 | union ocfs2_dlm_lksb *lksb, | ||
| 711 | u32 flags, | ||
| 712 | void *name, | ||
| 713 | unsigned int namelen, | ||
| 714 | void *astarg) | ||
| 715 | { | ||
| 716 | int ret; | ||
| 717 | |||
| 718 | if (!lksb->lksb_fsdlm.sb_lvbptr) | ||
| 719 | lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + | ||
| 720 | sizeof(struct dlm_lksb); | ||
| 721 | |||
| 722 | ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm, | ||
| 723 | flags|DLM_LKF_NODLCKWT, name, namelen, 0, | ||
| 724 | fsdlm_lock_ast_wrapper, astarg, | ||
| 725 | fsdlm_blocking_ast_wrapper); | ||
| 726 | return ret; | ||
| 727 | } | ||
| 728 | |||
| 729 | static int user_dlm_unlock(struct ocfs2_cluster_connection *conn, | ||
| 730 | union ocfs2_dlm_lksb *lksb, | ||
| 731 | u32 flags, | ||
| 732 | void *astarg) | ||
| 733 | { | ||
| 734 | int ret; | ||
| 735 | |||
| 736 | ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid, | ||
| 737 | flags, &lksb->lksb_fsdlm, astarg); | ||
| 738 | return ret; | ||
| 739 | } | ||
| 740 | |||
| 741 | static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb) | ||
| 742 | { | ||
| 743 | return lksb->lksb_fsdlm.sb_status; | ||
| 744 | } | ||
| 745 | |||
| 746 | static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) | ||
| 747 | { | ||
| 748 | return (void *)(lksb->lksb_fsdlm.sb_lvbptr); | ||
| 749 | } | ||
| 750 | |||
| 751 | static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | ||
| 752 | { | ||
| 753 | } | ||
| 754 | |||
| 755 | /* | ||
| 756 | * Compare a requested locking protocol version against the current one. | ||
| 757 | * | ||
| 758 | * If the major numbers are different, they are incompatible. | ||
| 759 | * If the current minor is greater than the request, they are incompatible. | ||
| 760 | * If the current minor is less than or equal to the request, they are | ||
| 761 | * compatible, and the requester should run at the current minor version. | ||
| 762 | */ | ||
| 763 | static int fs_protocol_compare(struct ocfs2_protocol_version *existing, | ||
| 764 | struct ocfs2_protocol_version *request) | ||
| 765 | { | ||
| 766 | if (existing->pv_major != request->pv_major) | ||
| 767 | return 1; | ||
| 768 | |||
| 769 | if (existing->pv_minor > request->pv_minor) | ||
| 770 | return 1; | ||
| 771 | |||
| 772 | if (existing->pv_minor < request->pv_minor) | ||
| 773 | request->pv_minor = existing->pv_minor; | ||
| 774 | |||
| 775 | return 0; | ||
| 776 | } | ||
| 777 | |||
| 778 | static int user_cluster_connect(struct ocfs2_cluster_connection *conn) | ||
| 779 | { | ||
| 780 | dlm_lockspace_t *fsdlm; | ||
| 781 | struct ocfs2_live_connection *control; | ||
| 782 | int rc = 0; | ||
| 783 | |||
| 784 | BUG_ON(conn == NULL); | ||
| 785 | |||
| 786 | rc = ocfs2_live_connection_new(conn, &control); | ||
| 787 | if (rc) | ||
| 788 | goto out; | ||
| 789 | |||
| 790 | /* | ||
| 791 | * running_proto must have been set before we allowed any mounts | ||
| 792 | * to proceed. | ||
| 793 | */ | ||
| 794 | if (fs_protocol_compare(&running_proto, &conn->cc_version)) { | ||
| 795 | printk(KERN_ERR | ||
| 796 | "Unable to mount with fs locking protocol version " | ||
| 797 | "%u.%u because the userspace control daemon has " | ||
| 798 | "negotiated %u.%u\n", | ||
| 799 | conn->cc_version.pv_major, conn->cc_version.pv_minor, | ||
| 800 | running_proto.pv_major, running_proto.pv_minor); | ||
| 801 | rc = -EPROTO; | ||
| 802 | ocfs2_live_connection_drop(control); | ||
| 803 | goto out; | ||
| 804 | } | ||
| 805 | |||
| 806 | rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name), | ||
| 807 | &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN); | ||
| 808 | if (rc) { | ||
| 809 | ocfs2_live_connection_drop(control); | ||
| 810 | goto out; | ||
| 811 | } | ||
| 812 | |||
| 813 | conn->cc_private = control; | ||
| 814 | conn->cc_lockspace = fsdlm; | ||
| 815 | out: | ||
| 816 | return rc; | ||
| 817 | } | ||
| 818 | |||
| 819 | static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn, | ||
| 820 | int hangup_pending) | ||
| 821 | { | ||
| 822 | dlm_release_lockspace(conn->cc_lockspace, 2); | ||
| 823 | conn->cc_lockspace = NULL; | ||
| 824 | ocfs2_live_connection_drop(conn->cc_private); | ||
| 825 | conn->cc_private = NULL; | ||
| 826 | return 0; | ||
| 827 | } | ||
| 828 | |||
| 829 | static int user_cluster_this_node(unsigned int *this_node) | ||
| 830 | { | ||
| 831 | int rc; | ||
| 832 | |||
| 833 | rc = ocfs2_control_get_this_node(); | ||
| 834 | if (rc < 0) | ||
| 835 | return rc; | ||
| 836 | |||
| 837 | *this_node = rc; | ||
| 838 | return 0; | ||
| 839 | } | ||
| 840 | |||
| 841 | static struct ocfs2_stack_operations user_stack_ops = { | ||
| 842 | .connect = user_cluster_connect, | ||
| 843 | .disconnect = user_cluster_disconnect, | ||
| 844 | .this_node = user_cluster_this_node, | ||
| 845 | .dlm_lock = user_dlm_lock, | ||
| 846 | .dlm_unlock = user_dlm_unlock, | ||
| 847 | .lock_status = user_dlm_lock_status, | ||
| 848 | .lock_lvb = user_dlm_lvb, | ||
| 849 | .dump_lksb = user_dlm_dump_lksb, | ||
| 850 | }; | ||
| 851 | |||
| 852 | static struct ocfs2_stack_plugin user_stack = { | ||
| 853 | .sp_name = "user", | ||
| 854 | .sp_ops = &user_stack_ops, | ||
| 855 | .sp_owner = THIS_MODULE, | ||
| 856 | }; | ||
| 857 | |||
| 858 | |||
| 859 | static int __init user_stack_init(void) | ||
| 860 | { | ||
| 861 | int rc; | ||
| 862 | |||
| 863 | rc = ocfs2_control_init(); | ||
| 864 | if (!rc) { | ||
| 865 | rc = ocfs2_stack_glue_register(&user_stack); | ||
| 866 | if (rc) | ||
| 867 | ocfs2_control_exit(); | ||
| 868 | } | ||
| 869 | |||
| 870 | return rc; | ||
| 871 | } | ||
| 872 | |||
| 873 | static void __exit user_stack_exit(void) | ||
| 874 | { | ||
| 875 | ocfs2_stack_glue_unregister(&user_stack); | ||
| 876 | ocfs2_control_exit(); | ||
| 877 | } | ||
| 878 | |||
| 879 | MODULE_AUTHOR("Oracle"); | ||
| 880 | MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks"); | ||
| 881 | MODULE_LICENSE("GPL"); | ||
| 882 | module_init(user_stack_init); | ||
| 883 | module_exit(user_stack_exit); | ||
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c new file mode 100644 index 000000000000..119f60cea9cc --- /dev/null +++ b/fs/ocfs2/stackglue.c | |||
| @@ -0,0 +1,568 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * stackglue.c | ||
| 5 | * | ||
| 6 | * Code which implements an OCFS2 specific interface to underlying | ||
| 7 | * cluster stacks. | ||
| 8 | * | ||
| 9 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or | ||
| 12 | * modify it under the terms of the GNU General Public | ||
| 13 | * License as published by the Free Software Foundation, version 2. | ||
| 14 | * | ||
| 15 | * This program is distributed in the hope that it will be useful, | ||
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 18 | * General Public License for more details. | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include <linux/list.h> | ||
| 22 | #include <linux/spinlock.h> | ||
| 23 | #include <linux/module.h> | ||
| 24 | #include <linux/slab.h> | ||
| 25 | #include <linux/kmod.h> | ||
| 26 | #include <linux/fs.h> | ||
| 27 | #include <linux/kobject.h> | ||
| 28 | #include <linux/sysfs.h> | ||
| 29 | |||
| 30 | #include "ocfs2_fs.h" | ||
| 31 | |||
| 32 | #include "stackglue.h" | ||
| 33 | |||
| 34 | #define OCFS2_STACK_PLUGIN_O2CB "o2cb" | ||
| 35 | #define OCFS2_STACK_PLUGIN_USER "user" | ||
| 36 | |||
| 37 | static struct ocfs2_locking_protocol *lproto; | ||
| 38 | static DEFINE_SPINLOCK(ocfs2_stack_lock); | ||
| 39 | static LIST_HEAD(ocfs2_stack_list); | ||
| 40 | static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; | ||
| 41 | |||
| 42 | /* | ||
| 43 | * The stack currently in use. If not null, active_stack->sp_count > 0, | ||
| 44 | * the module is pinned, and the locking protocol cannot be changed. | ||
| 45 | */ | ||
| 46 | static struct ocfs2_stack_plugin *active_stack; | ||
| 47 | |||
| 48 | static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) | ||
| 49 | { | ||
| 50 | struct ocfs2_stack_plugin *p; | ||
| 51 | |||
| 52 | assert_spin_locked(&ocfs2_stack_lock); | ||
| 53 | |||
| 54 | list_for_each_entry(p, &ocfs2_stack_list, sp_list) { | ||
| 55 | if (!strcmp(p->sp_name, name)) | ||
| 56 | return p; | ||
| 57 | } | ||
| 58 | |||
| 59 | return NULL; | ||
| 60 | } | ||
| 61 | |||
| 62 | static int ocfs2_stack_driver_request(const char *stack_name, | ||
| 63 | const char *plugin_name) | ||
| 64 | { | ||
| 65 | int rc; | ||
| 66 | struct ocfs2_stack_plugin *p; | ||
| 67 | |||
| 68 | spin_lock(&ocfs2_stack_lock); | ||
| 69 | |||
| 70 | /* | ||
| 71 | * If the stack passed by the filesystem isn't the selected one, | ||
| 72 | * we can't continue. | ||
| 73 | */ | ||
| 74 | if (strcmp(stack_name, cluster_stack_name)) { | ||
| 75 | rc = -EBUSY; | ||
| 76 | goto out; | ||
| 77 | } | ||
| 78 | |||
| 79 | if (active_stack) { | ||
| 80 | /* | ||
| 81 | * If the active stack isn't the one we want, it cannot | ||
| 82 | * be selected right now. | ||
| 83 | */ | ||
| 84 | if (!strcmp(active_stack->sp_name, plugin_name)) | ||
| 85 | rc = 0; | ||
| 86 | else | ||
| 87 | rc = -EBUSY; | ||
| 88 | goto out; | ||
| 89 | } | ||
| 90 | |||
| 91 | p = ocfs2_stack_lookup(plugin_name); | ||
| 92 | if (!p || !try_module_get(p->sp_owner)) { | ||
| 93 | rc = -ENOENT; | ||
| 94 | goto out; | ||
| 95 | } | ||
| 96 | |||
| 97 | /* Ok, the stack is pinned */ | ||
| 98 | p->sp_count++; | ||
| 99 | active_stack = p; | ||
| 100 | |||
| 101 | rc = 0; | ||
| 102 | |||
| 103 | out: | ||
| 104 | spin_unlock(&ocfs2_stack_lock); | ||
| 105 | return rc; | ||
| 106 | } | ||
| 107 | |||
| 108 | /* | ||
| 109 | * This function looks up the appropriate stack and makes it active. If | ||
| 110 | * there is no stack, it tries to load it. It will fail if the stack still | ||
| 111 | * cannot be found. It will also fail if a different stack is in use. | ||
| 112 | */ | ||
| 113 | static int ocfs2_stack_driver_get(const char *stack_name) | ||
| 114 | { | ||
| 115 | int rc; | ||
| 116 | char *plugin_name = OCFS2_STACK_PLUGIN_O2CB; | ||
| 117 | |||
| 118 | /* | ||
| 119 | * Classic stack does not pass in a stack name. This is | ||
| 120 | * compatible with older tools as well. | ||
| 121 | */ | ||
| 122 | if (!stack_name || !*stack_name) | ||
| 123 | stack_name = OCFS2_STACK_PLUGIN_O2CB; | ||
| 124 | |||
| 125 | if (strlen(stack_name) != OCFS2_STACK_LABEL_LEN) { | ||
| 126 | printk(KERN_ERR | ||
| 127 | "ocfs2 passed an invalid cluster stack label: \"%s\"\n", | ||
| 128 | stack_name); | ||
| 129 | return -EINVAL; | ||
| 130 | } | ||
| 131 | |||
| 132 | /* Anything that isn't the classic stack is a user stack */ | ||
| 133 | if (strcmp(stack_name, OCFS2_STACK_PLUGIN_O2CB)) | ||
| 134 | plugin_name = OCFS2_STACK_PLUGIN_USER; | ||
| 135 | |||
| 136 | rc = ocfs2_stack_driver_request(stack_name, plugin_name); | ||
| 137 | if (rc == -ENOENT) { | ||
| 138 | request_module("ocfs2_stack_%s", plugin_name); | ||
| 139 | rc = ocfs2_stack_driver_request(stack_name, plugin_name); | ||
| 140 | } | ||
| 141 | |||
| 142 | if (rc == -ENOENT) { | ||
| 143 | printk(KERN_ERR | ||
| 144 | "ocfs2: Cluster stack driver \"%s\" cannot be found\n", | ||
| 145 | plugin_name); | ||
| 146 | } else if (rc == -EBUSY) { | ||
| 147 | printk(KERN_ERR | ||
| 148 | "ocfs2: A different cluster stack is in use\n"); | ||
| 149 | } | ||
| 150 | |||
| 151 | return rc; | ||
| 152 | } | ||
| 153 | |||
| 154 | static void ocfs2_stack_driver_put(void) | ||
| 155 | { | ||
| 156 | spin_lock(&ocfs2_stack_lock); | ||
| 157 | BUG_ON(active_stack == NULL); | ||
| 158 | BUG_ON(active_stack->sp_count == 0); | ||
| 159 | |||
| 160 | active_stack->sp_count--; | ||
| 161 | if (!active_stack->sp_count) { | ||
| 162 | module_put(active_stack->sp_owner); | ||
| 163 | active_stack = NULL; | ||
| 164 | } | ||
| 165 | spin_unlock(&ocfs2_stack_lock); | ||
| 166 | } | ||
| 167 | |||
| 168 | int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin) | ||
| 169 | { | ||
| 170 | int rc; | ||
| 171 | |||
| 172 | spin_lock(&ocfs2_stack_lock); | ||
| 173 | if (!ocfs2_stack_lookup(plugin->sp_name)) { | ||
| 174 | plugin->sp_count = 0; | ||
| 175 | plugin->sp_proto = lproto; | ||
| 176 | list_add(&plugin->sp_list, &ocfs2_stack_list); | ||
| 177 | printk(KERN_INFO "ocfs2: Registered cluster interface %s\n", | ||
| 178 | plugin->sp_name); | ||
| 179 | rc = 0; | ||
| 180 | } else { | ||
| 181 | printk(KERN_ERR "ocfs2: Stack \"%s\" already registered\n", | ||
| 182 | plugin->sp_name); | ||
| 183 | rc = -EEXIST; | ||
| 184 | } | ||
| 185 | spin_unlock(&ocfs2_stack_lock); | ||
| 186 | |||
| 187 | return rc; | ||
| 188 | } | ||
| 189 | EXPORT_SYMBOL_GPL(ocfs2_stack_glue_register); | ||
| 190 | |||
| 191 | void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin) | ||
| 192 | { | ||
| 193 | struct ocfs2_stack_plugin *p; | ||
| 194 | |||
| 195 | spin_lock(&ocfs2_stack_lock); | ||
| 196 | p = ocfs2_stack_lookup(plugin->sp_name); | ||
| 197 | if (p) { | ||
| 198 | BUG_ON(p != plugin); | ||
| 199 | BUG_ON(plugin == active_stack); | ||
| 200 | BUG_ON(plugin->sp_count != 0); | ||
| 201 | list_del_init(&plugin->sp_list); | ||
| 202 | printk(KERN_INFO "ocfs2: Unregistered cluster interface %s\n", | ||
| 203 | plugin->sp_name); | ||
| 204 | } else { | ||
| 205 | printk(KERN_ERR "Stack \"%s\" is not registered\n", | ||
| 206 | plugin->sp_name); | ||
| 207 | } | ||
| 208 | spin_unlock(&ocfs2_stack_lock); | ||
| 209 | } | ||
| 210 | EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister); | ||
| 211 | |||
| 212 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) | ||
| 213 | { | ||
| 214 | struct ocfs2_stack_plugin *p; | ||
| 215 | |||
| 216 | BUG_ON(proto == NULL); | ||
| 217 | |||
| 218 | spin_lock(&ocfs2_stack_lock); | ||
| 219 | BUG_ON(active_stack != NULL); | ||
| 220 | |||
| 221 | lproto = proto; | ||
| 222 | list_for_each_entry(p, &ocfs2_stack_list, sp_list) { | ||
| 223 | p->sp_proto = lproto; | ||
| 224 | } | ||
| 225 | |||
| 226 | spin_unlock(&ocfs2_stack_lock); | ||
| 227 | } | ||
| 228 | EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol); | ||
| 229 | |||
| 230 | |||
| 231 | /* | ||
| 232 | * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take | ||
| 233 | * "struct ocfs2_lock_res *astarg" instead of "void *astarg" because the | ||
| 234 | * underlying stack plugins need to pilfer the lksb off of the lock_res. | ||
| 235 | * If some other structure needs to be passed as an astarg, the plugins | ||
| 236 | * will need to be given a different avenue to the lksb. | ||
| 237 | */ | ||
| 238 | int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, | ||
| 239 | int mode, | ||
| 240 | union ocfs2_dlm_lksb *lksb, | ||
| 241 | u32 flags, | ||
| 242 | void *name, | ||
| 243 | unsigned int namelen, | ||
| 244 | struct ocfs2_lock_res *astarg) | ||
| 245 | { | ||
| 246 | BUG_ON(lproto == NULL); | ||
| 247 | |||
| 248 | return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags, | ||
| 249 | name, namelen, astarg); | ||
| 250 | } | ||
| 251 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lock); | ||
| 252 | |||
| 253 | int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, | ||
| 254 | union ocfs2_dlm_lksb *lksb, | ||
| 255 | u32 flags, | ||
| 256 | struct ocfs2_lock_res *astarg) | ||
| 257 | { | ||
| 258 | BUG_ON(lproto == NULL); | ||
| 259 | |||
| 260 | return active_stack->sp_ops->dlm_unlock(conn, lksb, flags, astarg); | ||
| 261 | } | ||
| 262 | EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock); | ||
| 263 | |||
| 264 | int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) | ||
| 265 | { | ||
| 266 | return active_stack->sp_ops->lock_status(lksb); | ||
| 267 | } | ||
| 268 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); | ||
| 269 | |||
| 270 | /* | ||
| 271 | * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we | ||
| 272 | * don't cast at the glue level. The real answer is that the header | ||
| 273 | * ordering is nigh impossible. | ||
| 274 | */ | ||
| 275 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) | ||
| 276 | { | ||
| 277 | return active_stack->sp_ops->lock_lvb(lksb); | ||
| 278 | } | ||
| 279 | EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb); | ||
| 280 | |||
| 281 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | ||
| 282 | { | ||
| 283 | active_stack->sp_ops->dump_lksb(lksb); | ||
| 284 | } | ||
| 285 | EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); | ||
| 286 | |||
| 287 | int ocfs2_cluster_connect(const char *stack_name, | ||
| 288 | const char *group, | ||
| 289 | int grouplen, | ||
| 290 | void (*recovery_handler)(int node_num, | ||
| 291 | void *recovery_data), | ||
| 292 | void *recovery_data, | ||
| 293 | struct ocfs2_cluster_connection **conn) | ||
| 294 | { | ||
| 295 | int rc = 0; | ||
| 296 | struct ocfs2_cluster_connection *new_conn; | ||
| 297 | |||
| 298 | BUG_ON(group == NULL); | ||
| 299 | BUG_ON(conn == NULL); | ||
| 300 | BUG_ON(recovery_handler == NULL); | ||
| 301 | |||
| 302 | if (grouplen > GROUP_NAME_MAX) { | ||
| 303 | rc = -EINVAL; | ||
| 304 | goto out; | ||
| 305 | } | ||
| 306 | |||
| 307 | new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), | ||
| 308 | GFP_KERNEL); | ||
| 309 | if (!new_conn) { | ||
| 310 | rc = -ENOMEM; | ||
| 311 | goto out; | ||
| 312 | } | ||
| 313 | |||
| 314 | memcpy(new_conn->cc_name, group, grouplen); | ||
| 315 | new_conn->cc_namelen = grouplen; | ||
| 316 | new_conn->cc_recovery_handler = recovery_handler; | ||
| 317 | new_conn->cc_recovery_data = recovery_data; | ||
| 318 | |||
| 319 | /* Start the new connection at our maximum compatibility level */ | ||
| 320 | new_conn->cc_version = lproto->lp_max_version; | ||
| 321 | |||
| 322 | /* This will pin the stack driver if successful */ | ||
| 323 | rc = ocfs2_stack_driver_get(stack_name); | ||
| 324 | if (rc) | ||
| 325 | goto out_free; | ||
| 326 | |||
| 327 | rc = active_stack->sp_ops->connect(new_conn); | ||
| 328 | if (rc) { | ||
| 329 | ocfs2_stack_driver_put(); | ||
| 330 | goto out_free; | ||
| 331 | } | ||
| 332 | |||
| 333 | *conn = new_conn; | ||
| 334 | |||
| 335 | out_free: | ||
| 336 | if (rc) | ||
| 337 | kfree(new_conn); | ||
| 338 | |||
| 339 | out: | ||
| 340 | return rc; | ||
| 341 | } | ||
| 342 | EXPORT_SYMBOL_GPL(ocfs2_cluster_connect); | ||
| 343 | |||
| 344 | /* If hangup_pending is 0, the stack driver will be dropped */ | ||
| 345 | int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, | ||
| 346 | int hangup_pending) | ||
| 347 | { | ||
| 348 | int ret; | ||
| 349 | |||
| 350 | BUG_ON(conn == NULL); | ||
| 351 | |||
| 352 | ret = active_stack->sp_ops->disconnect(conn, hangup_pending); | ||
| 353 | |||
| 354 | /* XXX Should we free it anyway? */ | ||
| 355 | if (!ret) { | ||
| 356 | kfree(conn); | ||
| 357 | if (!hangup_pending) | ||
| 358 | ocfs2_stack_driver_put(); | ||
| 359 | } | ||
| 360 | |||
| 361 | return ret; | ||
| 362 | } | ||
| 363 | EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); | ||
| 364 | |||
| 365 | void ocfs2_cluster_hangup(const char *group, int grouplen) | ||
| 366 | { | ||
| 367 | BUG_ON(group == NULL); | ||
| 368 | BUG_ON(group[grouplen] != '\0'); | ||
| 369 | |||
| 370 | if (active_stack->sp_ops->hangup) | ||
| 371 | active_stack->sp_ops->hangup(group, grouplen); | ||
| 372 | |||
| 373 | /* cluster_disconnect() was called with hangup_pending==1 */ | ||
| 374 | ocfs2_stack_driver_put(); | ||
| 375 | } | ||
| 376 | EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup); | ||
| 377 | |||
| 378 | int ocfs2_cluster_this_node(unsigned int *node) | ||
| 379 | { | ||
| 380 | return active_stack->sp_ops->this_node(node); | ||
| 381 | } | ||
| 382 | EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node); | ||
| 383 | |||
| 384 | |||
| 385 | /* | ||
| 386 | * Sysfs bits | ||
| 387 | */ | ||
| 388 | |||
| 389 | static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj, | ||
| 390 | struct kobj_attribute *attr, | ||
| 391 | char *buf) | ||
| 392 | { | ||
| 393 | ssize_t ret = 0; | ||
| 394 | |||
| 395 | spin_lock(&ocfs2_stack_lock); | ||
| 396 | if (lproto) | ||
| 397 | ret = snprintf(buf, PAGE_SIZE, "%u.%u\n", | ||
| 398 | lproto->lp_max_version.pv_major, | ||
| 399 | lproto->lp_max_version.pv_minor); | ||
| 400 | spin_unlock(&ocfs2_stack_lock); | ||
| 401 | |||
| 402 | return ret; | ||
| 403 | } | ||
| 404 | |||
| 405 | static struct kobj_attribute ocfs2_attr_max_locking_protocol = | ||
| 406 | __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, | ||
| 407 | ocfs2_max_locking_protocol_show, NULL); | ||
| 408 | |||
| 409 | static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, | ||
| 410 | struct kobj_attribute *attr, | ||
| 411 | char *buf) | ||
| 412 | { | ||
| 413 | ssize_t ret = 0, total = 0, remain = PAGE_SIZE; | ||
| 414 | struct ocfs2_stack_plugin *p; | ||
| 415 | |||
| 416 | spin_lock(&ocfs2_stack_lock); | ||
| 417 | list_for_each_entry(p, &ocfs2_stack_list, sp_list) { | ||
| 418 | ret = snprintf(buf, remain, "%s\n", | ||
| 419 | p->sp_name); | ||
| 420 | if (ret < 0) { | ||
| 421 | total = ret; | ||
| 422 | break; | ||
| 423 | } | ||
| 424 | if (ret == remain) { | ||
| 425 | /* snprintf() didn't fit */ | ||
| 426 | total = -E2BIG; | ||
| 427 | break; | ||
| 428 | } | ||
| 429 | total += ret; | ||
| 430 | remain -= ret; | ||
| 431 | } | ||
| 432 | spin_unlock(&ocfs2_stack_lock); | ||
| 433 | |||
| 434 | return total; | ||
| 435 | } | ||
| 436 | |||
| 437 | static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = | ||
| 438 | __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, | ||
| 439 | ocfs2_loaded_cluster_plugins_show, NULL); | ||
| 440 | |||
| 441 | static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, | ||
| 442 | struct kobj_attribute *attr, | ||
| 443 | char *buf) | ||
| 444 | { | ||
| 445 | ssize_t ret = 0; | ||
| 446 | |||
| 447 | spin_lock(&ocfs2_stack_lock); | ||
| 448 | if (active_stack) { | ||
| 449 | ret = snprintf(buf, PAGE_SIZE, "%s\n", | ||
| 450 | active_stack->sp_name); | ||
| 451 | if (ret == PAGE_SIZE) | ||
| 452 | ret = -E2BIG; | ||
| 453 | } | ||
| 454 | spin_unlock(&ocfs2_stack_lock); | ||
| 455 | |||
| 456 | return ret; | ||
| 457 | } | ||
| 458 | |||
| 459 | static struct kobj_attribute ocfs2_attr_active_cluster_plugin = | ||
| 460 | __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, | ||
| 461 | ocfs2_active_cluster_plugin_show, NULL); | ||
| 462 | |||
| 463 | static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, | ||
| 464 | struct kobj_attribute *attr, | ||
| 465 | char *buf) | ||
| 466 | { | ||
| 467 | ssize_t ret; | ||
| 468 | spin_lock(&ocfs2_stack_lock); | ||
| 469 | ret = snprintf(buf, PAGE_SIZE, "%s\n", cluster_stack_name); | ||
| 470 | spin_unlock(&ocfs2_stack_lock); | ||
| 471 | |||
| 472 | return ret; | ||
| 473 | } | ||
| 474 | |||
| 475 | static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj, | ||
| 476 | struct kobj_attribute *attr, | ||
| 477 | const char *buf, size_t count) | ||
| 478 | { | ||
| 479 | size_t len = count; | ||
| 480 | ssize_t ret; | ||
| 481 | |||
| 482 | if (len == 0) | ||
| 483 | return len; | ||
| 484 | |||
| 485 | if (buf[len - 1] == '\n') | ||
| 486 | len--; | ||
| 487 | |||
| 488 | if ((len != OCFS2_STACK_LABEL_LEN) || | ||
| 489 | (strnlen(buf, len) != len)) | ||
| 490 | return -EINVAL; | ||
| 491 | |||
| 492 | spin_lock(&ocfs2_stack_lock); | ||
| 493 | if (active_stack) { | ||
| 494 | if (!strncmp(buf, cluster_stack_name, len)) | ||
| 495 | ret = count; | ||
| 496 | else | ||
| 497 | ret = -EBUSY; | ||
| 498 | } else { | ||
| 499 | memcpy(cluster_stack_name, buf, len); | ||
| 500 | ret = count; | ||
| 501 | } | ||
| 502 | spin_unlock(&ocfs2_stack_lock); | ||
| 503 | |||
| 504 | return ret; | ||
| 505 | } | ||
| 506 | |||
| 507 | |||
| 508 | static struct kobj_attribute ocfs2_attr_cluster_stack = | ||
| 509 | __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, | ||
| 510 | ocfs2_cluster_stack_show, | ||
| 511 | ocfs2_cluster_stack_store); | ||
| 512 | |||
| 513 | static struct attribute *ocfs2_attrs[] = { | ||
| 514 | &ocfs2_attr_max_locking_protocol.attr, | ||
| 515 | &ocfs2_attr_loaded_cluster_plugins.attr, | ||
| 516 | &ocfs2_attr_active_cluster_plugin.attr, | ||
| 517 | &ocfs2_attr_cluster_stack.attr, | ||
| 518 | NULL, | ||
| 519 | }; | ||
| 520 | |||
| 521 | static struct attribute_group ocfs2_attr_group = { | ||
| 522 | .attrs = ocfs2_attrs, | ||
| 523 | }; | ||
| 524 | |||
| 525 | static struct kset *ocfs2_kset; | ||
| 526 | |||
| 527 | static void ocfs2_sysfs_exit(void) | ||
| 528 | { | ||
| 529 | kset_unregister(ocfs2_kset); | ||
| 530 | } | ||
| 531 | |||
| 532 | static int ocfs2_sysfs_init(void) | ||
| 533 | { | ||
| 534 | int ret; | ||
| 535 | |||
| 536 | ocfs2_kset = kset_create_and_add("ocfs2", NULL, fs_kobj); | ||
| 537 | if (!ocfs2_kset) | ||
| 538 | return -ENOMEM; | ||
| 539 | |||
| 540 | ret = sysfs_create_group(&ocfs2_kset->kobj, &ocfs2_attr_group); | ||
| 541 | if (ret) | ||
| 542 | goto error; | ||
| 543 | |||
| 544 | return 0; | ||
| 545 | |||
| 546 | error: | ||
| 547 | kset_unregister(ocfs2_kset); | ||
| 548 | return ret; | ||
| 549 | } | ||
| 550 | |||
| 551 | static int __init ocfs2_stack_glue_init(void) | ||
| 552 | { | ||
| 553 | strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); | ||
| 554 | |||
| 555 | return ocfs2_sysfs_init(); | ||
| 556 | } | ||
| 557 | |||
| 558 | static void __exit ocfs2_stack_glue_exit(void) | ||
| 559 | { | ||
| 560 | lproto = NULL; | ||
| 561 | ocfs2_sysfs_exit(); | ||
| 562 | } | ||
| 563 | |||
| 564 | MODULE_AUTHOR("Oracle"); | ||
| 565 | MODULE_DESCRIPTION("ocfs2 cluter stack glue layer"); | ||
| 566 | MODULE_LICENSE("GPL"); | ||
| 567 | module_init(ocfs2_stack_glue_init); | ||
| 568 | module_exit(ocfs2_stack_glue_exit); | ||
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h new file mode 100644 index 000000000000..005e4f170e0f --- /dev/null +++ b/fs/ocfs2/stackglue.h | |||
| @@ -0,0 +1,261 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * stackglue.h | ||
| 5 | * | ||
| 6 | * Glue to the underlying cluster stack. | ||
| 7 | * | ||
| 8 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public | ||
| 12 | * License as published by the Free Software Foundation, version 2. | ||
| 13 | * | ||
| 14 | * This program is distributed in the hope that it will be useful, | ||
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 17 | * General Public License for more details. | ||
| 18 | */ | ||
| 19 | |||
| 20 | |||
| 21 | #ifndef STACKGLUE_H | ||
| 22 | #define STACKGLUE_H | ||
| 23 | |||
| 24 | #include <linux/types.h> | ||
| 25 | #include <linux/list.h> | ||
| 26 | #include <linux/dlmconstants.h> | ||
| 27 | |||
| 28 | #include "dlm/dlmapi.h" | ||
| 29 | #include <linux/dlm.h> | ||
| 30 | |||
| 31 | /* | ||
| 32 | * dlmconstants.h does not have a LOCAL flag. We hope to remove it | ||
| 33 | * some day, but right now we need it. Let's fake it. This value is larger | ||
| 34 | * than any flag in dlmconstants.h. | ||
| 35 | */ | ||
| 36 | #define DLM_LKF_LOCAL 0x00100000 | ||
| 37 | |||
| 38 | /* | ||
| 39 | * This shadows DLM_LOCKSPACE_LEN in fs/dlm/dlm_internal.h. That probably | ||
| 40 | * wants to be in a public header. | ||
| 41 | */ | ||
| 42 | #define GROUP_NAME_MAX 64 | ||
| 43 | |||
| 44 | |||
| 45 | /* | ||
| 46 | * ocfs2_protocol_version changes when ocfs2 does something different in | ||
| 47 | * its inter-node behavior. See dlmglue.c for more information. | ||
| 48 | */ | ||
| 49 | struct ocfs2_protocol_version { | ||
| 50 | u8 pv_major; | ||
| 51 | u8 pv_minor; | ||
| 52 | }; | ||
| 53 | |||
| 54 | /* | ||
| 55 | * The ocfs2_locking_protocol defines the handlers called on ocfs2's behalf. | ||
| 56 | */ | ||
| 57 | struct ocfs2_locking_protocol { | ||
| 58 | struct ocfs2_protocol_version lp_max_version; | ||
| 59 | void (*lp_lock_ast)(void *astarg); | ||
| 60 | void (*lp_blocking_ast)(void *astarg, int level); | ||
| 61 | void (*lp_unlock_ast)(void *astarg, int error); | ||
| 62 | }; | ||
| 63 | |||
| 64 | |||
| 65 | /* | ||
| 66 | * The dlm_lockstatus struct includes lvb space, but the dlm_lksb struct only | ||
| 67 | * has a pointer to separately allocated lvb space. This struct exists only to | ||
| 68 | * include in the lksb union to make space for a combined dlm_lksb and lvb. | ||
| 69 | */ | ||
| 70 | struct fsdlm_lksb_plus_lvb { | ||
| 71 | struct dlm_lksb lksb; | ||
| 72 | char lvb[DLM_LVB_LEN]; | ||
| 73 | }; | ||
| 74 | |||
| 75 | /* | ||
| 76 | * A union of all lock status structures. We define it here so that the | ||
| 77 | * size of the union is known. Lock status structures are embedded in | ||
| 78 | * ocfs2 inodes. | ||
| 79 | */ | ||
| 80 | union ocfs2_dlm_lksb { | ||
| 81 | struct dlm_lockstatus lksb_o2dlm; | ||
| 82 | struct dlm_lksb lksb_fsdlm; | ||
| 83 | struct fsdlm_lksb_plus_lvb padding; | ||
| 84 | }; | ||
| 85 | |||
| 86 | /* | ||
| 87 | * A cluster connection. Mostly opaque to ocfs2, the connection holds | ||
| 88 | * state for the underlying stack. ocfs2 does use cc_version to determine | ||
| 89 | * locking compatibility. | ||
| 90 | */ | ||
| 91 | struct ocfs2_cluster_connection { | ||
| 92 | char cc_name[GROUP_NAME_MAX]; | ||
| 93 | int cc_namelen; | ||
| 94 | struct ocfs2_protocol_version cc_version; | ||
| 95 | void (*cc_recovery_handler)(int node_num, void *recovery_data); | ||
| 96 | void *cc_recovery_data; | ||
| 97 | void *cc_lockspace; | ||
| 98 | void *cc_private; | ||
| 99 | }; | ||
| 100 | |||
| 101 | /* | ||
| 102 | * Each cluster stack implements the stack operations structure. Not used | ||
| 103 | * in the ocfs2 code, the stackglue code translates generic cluster calls | ||
| 104 | * into stack operations. | ||
| 105 | */ | ||
| 106 | struct ocfs2_stack_operations { | ||
| 107 | /* | ||
| 108 | * The fs code calls ocfs2_cluster_connect() to attach a new | ||
| 109 | * filesystem to the cluster stack. The ->connect() op is passed | ||
| 110 | * an ocfs2_cluster_connection with the name and recovery field | ||
| 111 | * filled in. | ||
| 112 | * | ||
| 113 | * The stack must set up any notification mechanisms and create | ||
| 114 | * the filesystem lockspace in the DLM. The lockspace should be | ||
| 115 | * stored on cc_lockspace. Any other information can be stored on | ||
| 116 | * cc_private. | ||
| 117 | * | ||
| 118 | * ->connect() must not return until it is guaranteed that | ||
| 119 | * | ||
| 120 | * - Node down notifications for the filesystem will be recieved | ||
| 121 | * and passed to conn->cc_recovery_handler(). | ||
| 122 | * - Locking requests for the filesystem will be processed. | ||
| 123 | */ | ||
| 124 | int (*connect)(struct ocfs2_cluster_connection *conn); | ||
| 125 | |||
| 126 | /* | ||
| 127 | * The fs code calls ocfs2_cluster_disconnect() when a filesystem | ||
| 128 | * no longer needs cluster services. All DLM locks have been | ||
| 129 | * dropped, and recovery notification is being ignored by the | ||
| 130 | * fs code. The stack must disengage from the DLM and discontinue | ||
| 131 | * recovery notification. | ||
| 132 | * | ||
| 133 | * Once ->disconnect() has returned, the connection structure will | ||
| 134 | * be freed. Thus, a stack must not return from ->disconnect() | ||
| 135 | * until it will no longer reference the conn pointer. | ||
| 136 | * | ||
| 137 | * If hangup_pending is zero, ocfs2_cluster_disconnect() will also | ||
| 138 | * be dropping the reference on the module. | ||
| 139 | */ | ||
| 140 | int (*disconnect)(struct ocfs2_cluster_connection *conn, | ||
| 141 | int hangup_pending); | ||
| 142 | |||
| 143 | /* | ||
| 144 | * ocfs2_cluster_hangup() exists for compatibility with older | ||
| 145 | * ocfs2 tools. Only the classic stack really needs it. As such | ||
| 146 | * ->hangup() is not required of all stacks. See the comment by | ||
| 147 | * ocfs2_cluster_hangup() for more details. | ||
| 148 | * | ||
| 149 | * Note that ocfs2_cluster_hangup() can only be called if | ||
| 150 | * hangup_pending was passed to ocfs2_cluster_disconnect(). | ||
| 151 | */ | ||
| 152 | void (*hangup)(const char *group, int grouplen); | ||
| 153 | |||
| 154 | /* | ||
| 155 | * ->this_node() returns the cluster's unique identifier for the | ||
| 156 | * local node. | ||
| 157 | */ | ||
| 158 | int (*this_node)(unsigned int *node); | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Call the underlying dlm lock function. The ->dlm_lock() | ||
| 162 | * callback should convert the flags and mode as appropriate. | ||
| 163 | * | ||
| 164 | * ast and bast functions are not part of the call because the | ||
| 165 | * stack will likely want to wrap ast and bast calls before passing | ||
| 166 | * them to stack->sp_proto. | ||
| 167 | */ | ||
| 168 | int (*dlm_lock)(struct ocfs2_cluster_connection *conn, | ||
| 169 | int mode, | ||
| 170 | union ocfs2_dlm_lksb *lksb, | ||
| 171 | u32 flags, | ||
| 172 | void *name, | ||
| 173 | unsigned int namelen, | ||
| 174 | void *astarg); | ||
| 175 | |||
| 176 | /* | ||
| 177 | * Call the underlying dlm unlock function. The ->dlm_unlock() | ||
| 178 | * function should convert the flags as appropriate. | ||
| 179 | * | ||
| 180 | * The unlock ast is not passed, as the stack will want to wrap | ||
| 181 | * it before calling stack->sp_proto->lp_unlock_ast(). | ||
| 182 | */ | ||
| 183 | int (*dlm_unlock)(struct ocfs2_cluster_connection *conn, | ||
| 184 | union ocfs2_dlm_lksb *lksb, | ||
| 185 | u32 flags, | ||
| 186 | void *astarg); | ||
| 187 | |||
| 188 | /* | ||
| 189 | * Return the status of the current lock status block. The fs | ||
| 190 | * code should never dereference the union. The ->lock_status() | ||
| 191 | * callback pulls out the stack-specific lksb, converts the status | ||
| 192 | * to a proper errno, and returns it. | ||
| 193 | */ | ||
| 194 | int (*lock_status)(union ocfs2_dlm_lksb *lksb); | ||
| 195 | |||
| 196 | /* | ||
| 197 | * Pull the lvb pointer off of the stack-specific lksb. | ||
| 198 | */ | ||
| 199 | void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); | ||
| 200 | |||
| 201 | /* | ||
| 202 | * This is an optoinal debugging hook. If provided, the | ||
| 203 | * stack can dump debugging information about this lock. | ||
| 204 | */ | ||
| 205 | void (*dump_lksb)(union ocfs2_dlm_lksb *lksb); | ||
| 206 | }; | ||
| 207 | |||
| 208 | /* | ||
| 209 | * Each stack plugin must describe itself by registering a | ||
| 210 | * ocfs2_stack_plugin structure. This is only seen by stackglue and the | ||
| 211 | * stack driver. | ||
| 212 | */ | ||
| 213 | struct ocfs2_stack_plugin { | ||
| 214 | char *sp_name; | ||
| 215 | struct ocfs2_stack_operations *sp_ops; | ||
| 216 | struct module *sp_owner; | ||
| 217 | |||
| 218 | /* These are managed by the stackglue code. */ | ||
| 219 | struct list_head sp_list; | ||
| 220 | unsigned int sp_count; | ||
| 221 | struct ocfs2_locking_protocol *sp_proto; | ||
| 222 | }; | ||
| 223 | |||
| 224 | |||
| 225 | /* Used by the filesystem */ | ||
| 226 | int ocfs2_cluster_connect(const char *stack_name, | ||
| 227 | const char *group, | ||
| 228 | int grouplen, | ||
| 229 | void (*recovery_handler)(int node_num, | ||
| 230 | void *recovery_data), | ||
| 231 | void *recovery_data, | ||
| 232 | struct ocfs2_cluster_connection **conn); | ||
| 233 | int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, | ||
| 234 | int hangup_pending); | ||
| 235 | void ocfs2_cluster_hangup(const char *group, int grouplen); | ||
| 236 | int ocfs2_cluster_this_node(unsigned int *node); | ||
| 237 | |||
| 238 | struct ocfs2_lock_res; | ||
| 239 | int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, | ||
| 240 | int mode, | ||
| 241 | union ocfs2_dlm_lksb *lksb, | ||
| 242 | u32 flags, | ||
| 243 | void *name, | ||
| 244 | unsigned int namelen, | ||
| 245 | struct ocfs2_lock_res *astarg); | ||
| 246 | int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, | ||
| 247 | union ocfs2_dlm_lksb *lksb, | ||
| 248 | u32 flags, | ||
| 249 | struct ocfs2_lock_res *astarg); | ||
| 250 | |||
| 251 | int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); | ||
| 252 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); | ||
| 253 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); | ||
| 254 | |||
| 255 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); | ||
| 256 | |||
| 257 | |||
| 258 | /* Used by stack plugins */ | ||
| 259 | int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); | ||
| 260 | void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); | ||
| 261 | #endif /* STACKGLUE_H */ | ||
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 72c198a004df..d2d278fb9819 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
| @@ -46,6 +46,11 @@ | |||
| 46 | 46 | ||
| 47 | #include "buffer_head_io.h" | 47 | #include "buffer_head_io.h" |
| 48 | 48 | ||
| 49 | #define NOT_ALLOC_NEW_GROUP 0 | ||
| 50 | #define ALLOC_NEW_GROUP 1 | ||
| 51 | |||
| 52 | #define OCFS2_MAX_INODES_TO_STEAL 1024 | ||
| 53 | |||
| 49 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); | 54 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); |
| 50 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); | 55 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); |
| 51 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); | 56 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); |
| @@ -106,7 +111,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, | |||
| 106 | u64 *bg_blkno, | 111 | u64 *bg_blkno, |
| 107 | u16 *bg_bit_off); | 112 | u16 *bg_bit_off); |
| 108 | 113 | ||
| 109 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | 114 | static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) |
| 110 | { | 115 | { |
| 111 | struct inode *inode = ac->ac_inode; | 116 | struct inode *inode = ac->ac_inode; |
| 112 | 117 | ||
| @@ -117,9 +122,17 @@ void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | |||
| 117 | mutex_unlock(&inode->i_mutex); | 122 | mutex_unlock(&inode->i_mutex); |
| 118 | 123 | ||
| 119 | iput(inode); | 124 | iput(inode); |
| 125 | ac->ac_inode = NULL; | ||
| 120 | } | 126 | } |
| 121 | if (ac->ac_bh) | 127 | if (ac->ac_bh) { |
| 122 | brelse(ac->ac_bh); | 128 | brelse(ac->ac_bh); |
| 129 | ac->ac_bh = NULL; | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | ||
| 134 | { | ||
| 135 | ocfs2_free_ac_resource(ac); | ||
| 123 | kfree(ac); | 136 | kfree(ac); |
| 124 | } | 137 | } |
| 125 | 138 | ||
| @@ -391,7 +404,8 @@ bail: | |||
| 391 | static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | 404 | static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, |
| 392 | struct ocfs2_alloc_context *ac, | 405 | struct ocfs2_alloc_context *ac, |
| 393 | int type, | 406 | int type, |
| 394 | u32 slot) | 407 | u32 slot, |
| 408 | int alloc_new_group) | ||
| 395 | { | 409 | { |
| 396 | int status; | 410 | int status; |
| 397 | u32 bits_wanted = ac->ac_bits_wanted; | 411 | u32 bits_wanted = ac->ac_bits_wanted; |
| @@ -420,6 +434,7 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
| 420 | } | 434 | } |
| 421 | 435 | ||
| 422 | ac->ac_inode = alloc_inode; | 436 | ac->ac_inode = alloc_inode; |
| 437 | ac->ac_alloc_slot = slot; | ||
| 423 | 438 | ||
| 424 | fe = (struct ocfs2_dinode *) bh->b_data; | 439 | fe = (struct ocfs2_dinode *) bh->b_data; |
| 425 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 440 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
| @@ -446,6 +461,14 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
| 446 | goto bail; | 461 | goto bail; |
| 447 | } | 462 | } |
| 448 | 463 | ||
| 464 | if (alloc_new_group != ALLOC_NEW_GROUP) { | ||
| 465 | mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, " | ||
| 466 | "and we don't alloc a new group for it.\n", | ||
| 467 | slot, bits_wanted, free_bits); | ||
| 468 | status = -ENOSPC; | ||
| 469 | goto bail; | ||
| 470 | } | ||
| 471 | |||
| 449 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh); | 472 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh); |
| 450 | if (status < 0) { | 473 | if (status < 0) { |
| 451 | if (status != -ENOSPC) | 474 | if (status != -ENOSPC) |
| @@ -490,7 +513,8 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | |||
| 490 | (*ac)->ac_group_search = ocfs2_block_group_search; | 513 | (*ac)->ac_group_search = ocfs2_block_group_search; |
| 491 | 514 | ||
| 492 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), | 515 | status = ocfs2_reserve_suballoc_bits(osb, (*ac), |
| 493 | EXTENT_ALLOC_SYSTEM_INODE, slot); | 516 | EXTENT_ALLOC_SYSTEM_INODE, |
| 517 | slot, ALLOC_NEW_GROUP); | ||
| 494 | if (status < 0) { | 518 | if (status < 0) { |
| 495 | if (status != -ENOSPC) | 519 | if (status != -ENOSPC) |
| 496 | mlog_errno(status); | 520 | mlog_errno(status); |
| @@ -508,10 +532,42 @@ bail: | |||
| 508 | return status; | 532 | return status; |
| 509 | } | 533 | } |
| 510 | 534 | ||
| 535 | static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, | ||
| 536 | struct ocfs2_alloc_context *ac) | ||
| 537 | { | ||
| 538 | int i, status = -ENOSPC; | ||
| 539 | s16 slot = ocfs2_get_inode_steal_slot(osb); | ||
| 540 | |||
| 541 | /* Start to steal inodes from the first slot after ours. */ | ||
| 542 | if (slot == OCFS2_INVALID_SLOT) | ||
| 543 | slot = osb->slot_num + 1; | ||
| 544 | |||
| 545 | for (i = 0; i < osb->max_slots; i++, slot++) { | ||
| 546 | if (slot == osb->max_slots) | ||
| 547 | slot = 0; | ||
| 548 | |||
| 549 | if (slot == osb->slot_num) | ||
| 550 | continue; | ||
| 551 | |||
| 552 | status = ocfs2_reserve_suballoc_bits(osb, ac, | ||
| 553 | INODE_ALLOC_SYSTEM_INODE, | ||
| 554 | slot, NOT_ALLOC_NEW_GROUP); | ||
| 555 | if (status >= 0) { | ||
| 556 | ocfs2_set_inode_steal_slot(osb, slot); | ||
| 557 | break; | ||
| 558 | } | ||
| 559 | |||
| 560 | ocfs2_free_ac_resource(ac); | ||
| 561 | } | ||
| 562 | |||
| 563 | return status; | ||
| 564 | } | ||
| 565 | |||
| 511 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | 566 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, |
| 512 | struct ocfs2_alloc_context **ac) | 567 | struct ocfs2_alloc_context **ac) |
| 513 | { | 568 | { |
| 514 | int status; | 569 | int status; |
| 570 | s16 slot = ocfs2_get_inode_steal_slot(osb); | ||
| 515 | 571 | ||
| 516 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); | 572 | *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); |
| 517 | if (!(*ac)) { | 573 | if (!(*ac)) { |
| @@ -525,9 +581,43 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |||
| 525 | 581 | ||
| 526 | (*ac)->ac_group_search = ocfs2_block_group_search; | 582 | (*ac)->ac_group_search = ocfs2_block_group_search; |
| 527 | 583 | ||
| 584 | /* | ||
| 585 | * slot is set when we successfully steal inode from other nodes. | ||
| 586 | * It is reset in 3 places: | ||
| 587 | * 1. when we flush the truncate log | ||
| 588 | * 2. when we complete local alloc recovery. | ||
| 589 | * 3. when we successfully allocate from our own slot. | ||
| 590 | * After it is set, we will go on stealing inodes until we find the | ||
| 591 | * need to check our slots to see whether there is some space for us. | ||
| 592 | */ | ||
| 593 | if (slot != OCFS2_INVALID_SLOT && | ||
| 594 | atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL) | ||
| 595 | goto inode_steal; | ||
| 596 | |||
| 597 | atomic_set(&osb->s_num_inodes_stolen, 0); | ||
| 528 | status = ocfs2_reserve_suballoc_bits(osb, *ac, | 598 | status = ocfs2_reserve_suballoc_bits(osb, *ac, |
| 529 | INODE_ALLOC_SYSTEM_INODE, | 599 | INODE_ALLOC_SYSTEM_INODE, |
| 530 | osb->slot_num); | 600 | osb->slot_num, ALLOC_NEW_GROUP); |
| 601 | if (status >= 0) { | ||
| 602 | status = 0; | ||
| 603 | |||
| 604 | /* | ||
| 605 | * Some inodes must be freed by us, so try to allocate | ||
| 606 | * from our own next time. | ||
| 607 | */ | ||
| 608 | if (slot != OCFS2_INVALID_SLOT) | ||
| 609 | ocfs2_init_inode_steal_slot(osb); | ||
| 610 | goto bail; | ||
| 611 | } else if (status < 0 && status != -ENOSPC) { | ||
| 612 | mlog_errno(status); | ||
| 613 | goto bail; | ||
| 614 | } | ||
| 615 | |||
| 616 | ocfs2_free_ac_resource(*ac); | ||
| 617 | |||
| 618 | inode_steal: | ||
| 619 | status = ocfs2_steal_inode_from_other_nodes(osb, *ac); | ||
| 620 | atomic_inc(&osb->s_num_inodes_stolen); | ||
| 531 | if (status < 0) { | 621 | if (status < 0) { |
| 532 | if (status != -ENOSPC) | 622 | if (status != -ENOSPC) |
| 533 | mlog_errno(status); | 623 | mlog_errno(status); |
| @@ -557,7 +647,8 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | |||
| 557 | 647 | ||
| 558 | status = ocfs2_reserve_suballoc_bits(osb, ac, | 648 | status = ocfs2_reserve_suballoc_bits(osb, ac, |
| 559 | GLOBAL_BITMAP_SYSTEM_INODE, | 649 | GLOBAL_BITMAP_SYSTEM_INODE, |
| 560 | OCFS2_INVALID_SLOT); | 650 | OCFS2_INVALID_SLOT, |
| 651 | ALLOC_NEW_GROUP); | ||
| 561 | if (status < 0 && status != -ENOSPC) { | 652 | if (status < 0 && status != -ENOSPC) { |
| 562 | mlog_errno(status); | 653 | mlog_errno(status); |
| 563 | goto bail; | 654 | goto bail; |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 8799033bb459..544c600662bd 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
| @@ -36,6 +36,7 @@ typedef int (group_search_t)(struct inode *, | |||
| 36 | struct ocfs2_alloc_context { | 36 | struct ocfs2_alloc_context { |
| 37 | struct inode *ac_inode; /* which bitmap are we allocating from? */ | 37 | struct inode *ac_inode; /* which bitmap are we allocating from? */ |
| 38 | struct buffer_head *ac_bh; /* file entry bh */ | 38 | struct buffer_head *ac_bh; /* file entry bh */ |
| 39 | u32 ac_alloc_slot; /* which slot are we allocating from? */ | ||
| 39 | u32 ac_bits_wanted; | 40 | u32 ac_bits_wanted; |
| 40 | u32 ac_bits_given; | 41 | u32 ac_bits_given; |
| 41 | #define OCFS2_AC_USE_LOCAL 1 | 42 | #define OCFS2_AC_USE_LOCAL 1 |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index bec75aff3d9f..df63ba20ae90 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -40,8 +40,7 @@ | |||
| 40 | #include <linux/crc32.h> | 40 | #include <linux/crc32.h> |
| 41 | #include <linux/debugfs.h> | 41 | #include <linux/debugfs.h> |
| 42 | #include <linux/mount.h> | 42 | #include <linux/mount.h> |
| 43 | 43 | #include <linux/seq_file.h> | |
| 44 | #include <cluster/nodemanager.h> | ||
| 45 | 44 | ||
| 46 | #define MLOG_MASK_PREFIX ML_SUPER | 45 | #define MLOG_MASK_PREFIX ML_SUPER |
| 47 | #include <cluster/masklog.h> | 46 | #include <cluster/masklog.h> |
| @@ -88,6 +87,7 @@ struct mount_options | |||
| 88 | unsigned int atime_quantum; | 87 | unsigned int atime_quantum; |
| 89 | signed short slot; | 88 | signed short slot; |
| 90 | unsigned int localalloc_opt; | 89 | unsigned int localalloc_opt; |
| 90 | char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; | ||
| 91 | }; | 91 | }; |
| 92 | 92 | ||
| 93 | static int ocfs2_parse_options(struct super_block *sb, char *options, | 93 | static int ocfs2_parse_options(struct super_block *sb, char *options, |
| @@ -109,7 +109,6 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait); | |||
| 109 | static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); | 109 | static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); |
| 110 | static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); | 110 | static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); |
| 111 | static void ocfs2_release_system_inodes(struct ocfs2_super *osb); | 111 | static void ocfs2_release_system_inodes(struct ocfs2_super *osb); |
| 112 | static int ocfs2_fill_local_node_info(struct ocfs2_super *osb); | ||
| 113 | static int ocfs2_check_volume(struct ocfs2_super *osb); | 112 | static int ocfs2_check_volume(struct ocfs2_super *osb); |
| 114 | static int ocfs2_verify_volume(struct ocfs2_dinode *di, | 113 | static int ocfs2_verify_volume(struct ocfs2_dinode *di, |
| 115 | struct buffer_head *bh, | 114 | struct buffer_head *bh, |
| @@ -154,6 +153,7 @@ enum { | |||
| 154 | Opt_commit, | 153 | Opt_commit, |
| 155 | Opt_localalloc, | 154 | Opt_localalloc, |
| 156 | Opt_localflocks, | 155 | Opt_localflocks, |
| 156 | Opt_stack, | ||
| 157 | Opt_err, | 157 | Opt_err, |
| 158 | }; | 158 | }; |
| 159 | 159 | ||
| @@ -172,6 +172,7 @@ static match_table_t tokens = { | |||
| 172 | {Opt_commit, "commit=%u"}, | 172 | {Opt_commit, "commit=%u"}, |
| 173 | {Opt_localalloc, "localalloc=%d"}, | 173 | {Opt_localalloc, "localalloc=%d"}, |
| 174 | {Opt_localflocks, "localflocks"}, | 174 | {Opt_localflocks, "localflocks"}, |
| 175 | {Opt_stack, "cluster_stack=%s"}, | ||
| 175 | {Opt_err, NULL} | 176 | {Opt_err, NULL} |
| 176 | }; | 177 | }; |
| 177 | 178 | ||
| @@ -551,8 +552,17 @@ static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) | |||
| 551 | } | 552 | } |
| 552 | } | 553 | } |
| 553 | 554 | ||
| 555 | if (ocfs2_userspace_stack(osb)) { | ||
| 556 | if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { | ||
| 557 | mlog(ML_ERROR, "Userspace stack expected, but " | ||
| 558 | "o2cb heartbeat arguments passed to mount\n"); | ||
| 559 | return -EINVAL; | ||
| 560 | } | ||
| 561 | } | ||
| 562 | |||
| 554 | if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { | 563 | if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { |
| 555 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb)) { | 564 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && |
| 565 | !ocfs2_userspace_stack(osb)) { | ||
| 556 | mlog(ML_ERROR, "Heartbeat has to be started to mount " | 566 | mlog(ML_ERROR, "Heartbeat has to be started to mount " |
| 557 | "a read-write clustered device.\n"); | 567 | "a read-write clustered device.\n"); |
| 558 | return -EINVAL; | 568 | return -EINVAL; |
| @@ -562,6 +572,35 @@ static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) | |||
| 562 | return 0; | 572 | return 0; |
| 563 | } | 573 | } |
| 564 | 574 | ||
| 575 | /* | ||
| 576 | * If we're using a userspace stack, mount should have passed | ||
| 577 | * a name that matches the disk. If not, mount should not | ||
| 578 | * have passed a stack. | ||
| 579 | */ | ||
| 580 | static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, | ||
| 581 | struct mount_options *mopt) | ||
| 582 | { | ||
| 583 | if (!ocfs2_userspace_stack(osb) && mopt->cluster_stack[0]) { | ||
| 584 | mlog(ML_ERROR, | ||
| 585 | "cluster stack passed to mount, but this filesystem " | ||
| 586 | "does not support it\n"); | ||
| 587 | return -EINVAL; | ||
| 588 | } | ||
| 589 | |||
| 590 | if (ocfs2_userspace_stack(osb) && | ||
| 591 | strncmp(osb->osb_cluster_stack, mopt->cluster_stack, | ||
| 592 | OCFS2_STACK_LABEL_LEN)) { | ||
| 593 | mlog(ML_ERROR, | ||
| 594 | "cluster stack passed to mount (\"%s\") does not " | ||
| 595 | "match the filesystem (\"%s\")\n", | ||
| 596 | mopt->cluster_stack, | ||
| 597 | osb->osb_cluster_stack); | ||
| 598 | return -EINVAL; | ||
| 599 | } | ||
| 600 | |||
| 601 | return 0; | ||
| 602 | } | ||
| 603 | |||
| 565 | static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | 604 | static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) |
| 566 | { | 605 | { |
| 567 | struct dentry *root; | 606 | struct dentry *root; |
| @@ -579,15 +618,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 579 | goto read_super_error; | 618 | goto read_super_error; |
| 580 | } | 619 | } |
| 581 | 620 | ||
| 582 | /* for now we only have one cluster/node, make sure we see it | ||
| 583 | * in the heartbeat universe */ | ||
| 584 | if (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL) { | ||
| 585 | if (!o2hb_check_local_node_heartbeating()) { | ||
| 586 | status = -EINVAL; | ||
| 587 | goto read_super_error; | ||
| 588 | } | ||
| 589 | } | ||
| 590 | |||
| 591 | /* probe for superblock */ | 621 | /* probe for superblock */ |
| 592 | status = ocfs2_sb_probe(sb, &bh, §or_size); | 622 | status = ocfs2_sb_probe(sb, &bh, §or_size); |
| 593 | if (status < 0) { | 623 | if (status < 0) { |
| @@ -609,6 +639,10 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 609 | osb->osb_commit_interval = parsed_options.commit_interval; | 639 | osb->osb_commit_interval = parsed_options.commit_interval; |
| 610 | osb->local_alloc_size = parsed_options.localalloc_opt; | 640 | osb->local_alloc_size = parsed_options.localalloc_opt; |
| 611 | 641 | ||
| 642 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); | ||
| 643 | if (status) | ||
| 644 | goto read_super_error; | ||
| 645 | |||
| 612 | sb->s_magic = OCFS2_SUPER_MAGIC; | 646 | sb->s_magic = OCFS2_SUPER_MAGIC; |
| 613 | 647 | ||
| 614 | /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, | 648 | /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, |
| @@ -694,7 +728,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 694 | if (ocfs2_mount_local(osb)) | 728 | if (ocfs2_mount_local(osb)) |
| 695 | snprintf(nodestr, sizeof(nodestr), "local"); | 729 | snprintf(nodestr, sizeof(nodestr), "local"); |
| 696 | else | 730 | else |
| 697 | snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); | 731 | snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num); |
| 698 | 732 | ||
| 699 | printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " | 733 | printk(KERN_INFO "ocfs2: Mounting device (%s) on (node %s, slot %d) " |
| 700 | "with %s data mode.\n", | 734 | "with %s data mode.\n", |
| @@ -763,6 +797,7 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 763 | mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 797 | mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; |
| 764 | mopt->slot = OCFS2_INVALID_SLOT; | 798 | mopt->slot = OCFS2_INVALID_SLOT; |
| 765 | mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | 799 | mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; |
| 800 | mopt->cluster_stack[0] = '\0'; | ||
| 766 | 801 | ||
| 767 | if (!options) { | 802 | if (!options) { |
| 768 | status = 1; | 803 | status = 1; |
| @@ -864,6 +899,25 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 864 | if (!is_remount) | 899 | if (!is_remount) |
| 865 | mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; | 900 | mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; |
| 866 | break; | 901 | break; |
| 902 | case Opt_stack: | ||
| 903 | /* Check both that the option we were passed | ||
| 904 | * is of the right length and that it is a proper | ||
| 905 | * string of the right length. | ||
| 906 | */ | ||
| 907 | if (((args[0].to - args[0].from) != | ||
| 908 | OCFS2_STACK_LABEL_LEN) || | ||
| 909 | (strnlen(args[0].from, | ||
| 910 | OCFS2_STACK_LABEL_LEN) != | ||
| 911 | OCFS2_STACK_LABEL_LEN)) { | ||
| 912 | mlog(ML_ERROR, | ||
| 913 | "Invalid cluster_stack option\n"); | ||
| 914 | status = 0; | ||
| 915 | goto bail; | ||
| 916 | } | ||
| 917 | memcpy(mopt->cluster_stack, args[0].from, | ||
| 918 | OCFS2_STACK_LABEL_LEN); | ||
| 919 | mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; | ||
| 920 | break; | ||
| 867 | default: | 921 | default: |
| 868 | mlog(ML_ERROR, | 922 | mlog(ML_ERROR, |
| 869 | "Unrecognized mount option \"%s\" " | 923 | "Unrecognized mount option \"%s\" " |
| @@ -922,6 +976,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 922 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) | 976 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) |
| 923 | seq_printf(s, ",localflocks,"); | 977 | seq_printf(s, ",localflocks,"); |
| 924 | 978 | ||
| 979 | if (osb->osb_cluster_stack[0]) | ||
| 980 | seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, | ||
| 981 | osb->osb_cluster_stack); | ||
| 982 | |||
| 925 | return 0; | 983 | return 0; |
| 926 | } | 984 | } |
| 927 | 985 | ||
| @@ -957,6 +1015,8 @@ static int __init ocfs2_init(void) | |||
| 957 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); | 1015 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); |
| 958 | } | 1016 | } |
| 959 | 1017 | ||
| 1018 | ocfs2_set_locking_protocol(); | ||
| 1019 | |||
| 960 | leave: | 1020 | leave: |
| 961 | if (status < 0) { | 1021 | if (status < 0) { |
| 962 | ocfs2_free_mem_caches(); | 1022 | ocfs2_free_mem_caches(); |
| @@ -1132,31 +1192,6 @@ static int ocfs2_get_sector(struct super_block *sb, | |||
| 1132 | return 0; | 1192 | return 0; |
| 1133 | } | 1193 | } |
| 1134 | 1194 | ||
| 1135 | /* ocfs2 1.0 only allows one cluster and node identity per kernel image. */ | ||
| 1136 | static int ocfs2_fill_local_node_info(struct ocfs2_super *osb) | ||
| 1137 | { | ||
| 1138 | int status; | ||
| 1139 | |||
| 1140 | /* XXX hold a ref on the node while mounte? easy enough, if | ||
| 1141 | * desirable. */ | ||
| 1142 | if (ocfs2_mount_local(osb)) | ||
| 1143 | osb->node_num = 0; | ||
| 1144 | else | ||
| 1145 | osb->node_num = o2nm_this_node(); | ||
| 1146 | |||
| 1147 | if (osb->node_num == O2NM_MAX_NODES) { | ||
| 1148 | mlog(ML_ERROR, "could not find this host's node number\n"); | ||
| 1149 | status = -ENOENT; | ||
| 1150 | goto bail; | ||
| 1151 | } | ||
| 1152 | |||
| 1153 | mlog(0, "I am node %d\n", osb->node_num); | ||
| 1154 | |||
| 1155 | status = 0; | ||
| 1156 | bail: | ||
| 1157 | return status; | ||
| 1158 | } | ||
| 1159 | |||
| 1160 | static int ocfs2_mount_volume(struct super_block *sb) | 1195 | static int ocfs2_mount_volume(struct super_block *sb) |
| 1161 | { | 1196 | { |
| 1162 | int status = 0; | 1197 | int status = 0; |
| @@ -1168,12 +1203,6 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
| 1168 | if (ocfs2_is_hard_readonly(osb)) | 1203 | if (ocfs2_is_hard_readonly(osb)) |
| 1169 | goto leave; | 1204 | goto leave; |
| 1170 | 1205 | ||
| 1171 | status = ocfs2_fill_local_node_info(osb); | ||
| 1172 | if (status < 0) { | ||
| 1173 | mlog_errno(status); | ||
| 1174 | goto leave; | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | status = ocfs2_dlm_init(osb); | 1206 | status = ocfs2_dlm_init(osb); |
| 1178 | if (status < 0) { | 1207 | if (status < 0) { |
| 1179 | mlog_errno(status); | 1208 | mlog_errno(status); |
| @@ -1224,18 +1253,9 @@ leave: | |||
| 1224 | return status; | 1253 | return status; |
| 1225 | } | 1254 | } |
| 1226 | 1255 | ||
| 1227 | /* we can't grab the goofy sem lock from inside wait_event, so we use | ||
| 1228 | * memory barriers to make sure that we'll see the null task before | ||
| 1229 | * being woken up */ | ||
| 1230 | static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) | ||
| 1231 | { | ||
| 1232 | mb(); | ||
| 1233 | return osb->recovery_thread_task != NULL; | ||
| 1234 | } | ||
| 1235 | |||
| 1236 | static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | 1256 | static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) |
| 1237 | { | 1257 | { |
| 1238 | int tmp; | 1258 | int tmp, hangup_needed = 0; |
| 1239 | struct ocfs2_super *osb = NULL; | 1259 | struct ocfs2_super *osb = NULL; |
| 1240 | char nodestr[8]; | 1260 | char nodestr[8]; |
| 1241 | 1261 | ||
| @@ -1249,25 +1269,16 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
| 1249 | 1269 | ||
| 1250 | ocfs2_truncate_log_shutdown(osb); | 1270 | ocfs2_truncate_log_shutdown(osb); |
| 1251 | 1271 | ||
| 1252 | /* disable any new recovery threads and wait for any currently | 1272 | /* This will disable recovery and flush any recovery work. */ |
| 1253 | * running ones to exit. Do this before setting the vol_state. */ | 1273 | ocfs2_recovery_exit(osb); |
| 1254 | mutex_lock(&osb->recovery_lock); | ||
| 1255 | osb->disable_recovery = 1; | ||
| 1256 | mutex_unlock(&osb->recovery_lock); | ||
| 1257 | wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); | ||
| 1258 | |||
| 1259 | /* At this point, we know that no more recovery threads can be | ||
| 1260 | * launched, so wait for any recovery completion work to | ||
| 1261 | * complete. */ | ||
| 1262 | flush_workqueue(ocfs2_wq); | ||
| 1263 | 1274 | ||
| 1264 | ocfs2_journal_shutdown(osb); | 1275 | ocfs2_journal_shutdown(osb); |
| 1265 | 1276 | ||
| 1266 | ocfs2_sync_blockdev(sb); | 1277 | ocfs2_sync_blockdev(sb); |
| 1267 | 1278 | ||
| 1268 | /* No dlm means we've failed during mount, so skip all the | 1279 | /* No cluster connection means we've failed during mount, so skip |
| 1269 | * steps which depended on that to complete. */ | 1280 | * all the steps which depended on that to complete. */ |
| 1270 | if (osb->dlm) { | 1281 | if (osb->cconn) { |
| 1271 | tmp = ocfs2_super_lock(osb, 1); | 1282 | tmp = ocfs2_super_lock(osb, 1); |
| 1272 | if (tmp < 0) { | 1283 | if (tmp < 0) { |
| 1273 | mlog_errno(tmp); | 1284 | mlog_errno(tmp); |
| @@ -1278,25 +1289,34 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
| 1278 | if (osb->slot_num != OCFS2_INVALID_SLOT) | 1289 | if (osb->slot_num != OCFS2_INVALID_SLOT) |
| 1279 | ocfs2_put_slot(osb); | 1290 | ocfs2_put_slot(osb); |
| 1280 | 1291 | ||
| 1281 | if (osb->dlm) | 1292 | if (osb->cconn) |
| 1282 | ocfs2_super_unlock(osb, 1); | 1293 | ocfs2_super_unlock(osb, 1); |
| 1283 | 1294 | ||
| 1284 | ocfs2_release_system_inodes(osb); | 1295 | ocfs2_release_system_inodes(osb); |
| 1285 | 1296 | ||
| 1286 | if (osb->dlm) | 1297 | /* |
| 1287 | ocfs2_dlm_shutdown(osb); | 1298 | * If we're dismounting due to mount error, mount.ocfs2 will clean |
| 1299 | * up heartbeat. If we're a local mount, there is no heartbeat. | ||
| 1300 | * If we failed before we got a uuid_str yet, we can't stop | ||
| 1301 | * heartbeat. Otherwise, do it. | ||
| 1302 | */ | ||
| 1303 | if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str) | ||
| 1304 | hangup_needed = 1; | ||
| 1305 | |||
| 1306 | if (osb->cconn) | ||
| 1307 | ocfs2_dlm_shutdown(osb, hangup_needed); | ||
| 1288 | 1308 | ||
| 1289 | debugfs_remove(osb->osb_debug_root); | 1309 | debugfs_remove(osb->osb_debug_root); |
| 1290 | 1310 | ||
| 1291 | if (!mnt_err) | 1311 | if (hangup_needed) |
| 1292 | ocfs2_stop_heartbeat(osb); | 1312 | ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str)); |
| 1293 | 1313 | ||
| 1294 | atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); | 1314 | atomic_set(&osb->vol_state, VOLUME_DISMOUNTED); |
| 1295 | 1315 | ||
| 1296 | if (ocfs2_mount_local(osb)) | 1316 | if (ocfs2_mount_local(osb)) |
| 1297 | snprintf(nodestr, sizeof(nodestr), "local"); | 1317 | snprintf(nodestr, sizeof(nodestr), "local"); |
| 1298 | else | 1318 | else |
| 1299 | snprintf(nodestr, sizeof(nodestr), "%d", osb->node_num); | 1319 | snprintf(nodestr, sizeof(nodestr), "%u", osb->node_num); |
| 1300 | 1320 | ||
| 1301 | printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", | 1321 | printk(KERN_INFO "ocfs2: Unmounting device (%s) on (node %s)\n", |
| 1302 | osb->dev_str, nodestr); | 1322 | osb->dev_str, nodestr); |
| @@ -1355,7 +1375,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1355 | sb->s_fs_info = osb; | 1375 | sb->s_fs_info = osb; |
| 1356 | sb->s_op = &ocfs2_sops; | 1376 | sb->s_op = &ocfs2_sops; |
| 1357 | sb->s_export_op = &ocfs2_export_ops; | 1377 | sb->s_export_op = &ocfs2_export_ops; |
| 1358 | osb->osb_locking_proto = ocfs2_locking_protocol; | ||
| 1359 | sb->s_time_gran = 1; | 1378 | sb->s_time_gran = 1; |
| 1360 | sb->s_flags |= MS_NOATIME; | 1379 | sb->s_flags |= MS_NOATIME; |
| 1361 | /* this is needed to support O_LARGEFILE */ | 1380 | /* this is needed to support O_LARGEFILE */ |
| @@ -1368,7 +1387,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1368 | osb->s_sectsize_bits = blksize_bits(sector_size); | 1387 | osb->s_sectsize_bits = blksize_bits(sector_size); |
| 1369 | BUG_ON(!osb->s_sectsize_bits); | 1388 | BUG_ON(!osb->s_sectsize_bits); |
| 1370 | 1389 | ||
| 1371 | init_waitqueue_head(&osb->recovery_event); | ||
| 1372 | spin_lock_init(&osb->dc_task_lock); | 1390 | spin_lock_init(&osb->dc_task_lock); |
| 1373 | init_waitqueue_head(&osb->dc_event); | 1391 | init_waitqueue_head(&osb->dc_event); |
| 1374 | osb->dc_work_sequence = 0; | 1392 | osb->dc_work_sequence = 0; |
| @@ -1376,6 +1394,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1376 | INIT_LIST_HEAD(&osb->blocked_lock_list); | 1394 | INIT_LIST_HEAD(&osb->blocked_lock_list); |
| 1377 | osb->blocked_lock_count = 0; | 1395 | osb->blocked_lock_count = 0; |
| 1378 | spin_lock_init(&osb->osb_lock); | 1396 | spin_lock_init(&osb->osb_lock); |
| 1397 | ocfs2_init_inode_steal_slot(osb); | ||
| 1379 | 1398 | ||
| 1380 | atomic_set(&osb->alloc_stats.moves, 0); | 1399 | atomic_set(&osb->alloc_stats.moves, 0); |
| 1381 | atomic_set(&osb->alloc_stats.local_data, 0); | 1400 | atomic_set(&osb->alloc_stats.local_data, 0); |
| @@ -1388,24 +1407,23 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1388 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", | 1407 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", |
| 1389 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 1408 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
| 1390 | 1409 | ||
| 1391 | mutex_init(&osb->recovery_lock); | 1410 | status = ocfs2_recovery_init(osb); |
| 1392 | 1411 | if (status) { | |
| 1393 | osb->disable_recovery = 0; | 1412 | mlog(ML_ERROR, "Unable to initialize recovery state\n"); |
| 1394 | osb->recovery_thread_task = NULL; | 1413 | mlog_errno(status); |
| 1414 | goto bail; | ||
| 1415 | } | ||
| 1395 | 1416 | ||
| 1396 | init_waitqueue_head(&osb->checkpoint_event); | 1417 | init_waitqueue_head(&osb->checkpoint_event); |
| 1397 | atomic_set(&osb->needs_checkpoint, 0); | 1418 | atomic_set(&osb->needs_checkpoint, 0); |
| 1398 | 1419 | ||
| 1399 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; | 1420 | osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; |
| 1400 | 1421 | ||
| 1401 | osb->node_num = O2NM_INVALID_NODE_NUM; | ||
| 1402 | osb->slot_num = OCFS2_INVALID_SLOT; | 1422 | osb->slot_num = OCFS2_INVALID_SLOT; |
| 1403 | 1423 | ||
| 1404 | osb->local_alloc_state = OCFS2_LA_UNUSED; | 1424 | osb->local_alloc_state = OCFS2_LA_UNUSED; |
| 1405 | osb->local_alloc_bh = NULL; | 1425 | osb->local_alloc_bh = NULL; |
| 1406 | 1426 | ||
| 1407 | ocfs2_setup_hb_callbacks(osb); | ||
| 1408 | |||
| 1409 | init_waitqueue_head(&osb->osb_mount_event); | 1427 | init_waitqueue_head(&osb->osb_mount_event); |
| 1410 | 1428 | ||
| 1411 | osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); | 1429 | osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); |
| @@ -1455,6 +1473,25 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1455 | goto bail; | 1473 | goto bail; |
| 1456 | } | 1474 | } |
| 1457 | 1475 | ||
| 1476 | if (ocfs2_userspace_stack(osb)) { | ||
| 1477 | memcpy(osb->osb_cluster_stack, | ||
| 1478 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, | ||
| 1479 | OCFS2_STACK_LABEL_LEN); | ||
| 1480 | osb->osb_cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; | ||
| 1481 | if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) { | ||
| 1482 | mlog(ML_ERROR, | ||
| 1483 | "couldn't mount because of an invalid " | ||
| 1484 | "cluster stack label (%s) \n", | ||
| 1485 | osb->osb_cluster_stack); | ||
| 1486 | status = -EINVAL; | ||
| 1487 | goto bail; | ||
| 1488 | } | ||
| 1489 | } else { | ||
| 1490 | /* The empty string is identical with classic tools that | ||
| 1491 | * don't know about s_cluster_info. */ | ||
| 1492 | osb->osb_cluster_stack[0] = '\0'; | ||
| 1493 | } | ||
| 1494 | |||
| 1458 | get_random_bytes(&osb->s_next_generation, sizeof(u32)); | 1495 | get_random_bytes(&osb->s_next_generation, sizeof(u32)); |
| 1459 | 1496 | ||
| 1460 | /* FIXME | 1497 | /* FIXME |
| @@ -1724,8 +1761,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) | |||
| 1724 | 1761 | ||
| 1725 | /* This function assumes that the caller has the main osb resource */ | 1762 | /* This function assumes that the caller has the main osb resource */ |
| 1726 | 1763 | ||
| 1727 | if (osb->slot_info) | 1764 | ocfs2_free_slot_info(osb); |
| 1728 | ocfs2_free_slot_info(osb->slot_info); | ||
| 1729 | 1765 | ||
| 1730 | kfree(osb->osb_orphan_wipes); | 1766 | kfree(osb->osb_orphan_wipes); |
| 1731 | /* FIXME | 1767 | /* FIXME |
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 4caa5f774fb7..13cd7835d0df 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c | |||
| @@ -44,7 +44,9 @@ int seq_open_net(struct inode *ino, struct file *f, | |||
| 44 | put_net(net); | 44 | put_net(net); |
| 45 | return -ENOMEM; | 45 | return -ENOMEM; |
| 46 | } | 46 | } |
| 47 | #ifdef CONFIG_NET_NS | ||
| 47 | p->net = net; | 48 | p->net = net; |
| 49 | #endif | ||
| 48 | return 0; | 50 | return 0; |
| 49 | } | 51 | } |
| 50 | EXPORT_SYMBOL_GPL(seq_open_net); | 52 | EXPORT_SYMBOL_GPL(seq_open_net); |
| @@ -52,12 +54,10 @@ EXPORT_SYMBOL_GPL(seq_open_net); | |||
| 52 | int seq_release_net(struct inode *ino, struct file *f) | 54 | int seq_release_net(struct inode *ino, struct file *f) |
| 53 | { | 55 | { |
| 54 | struct seq_file *seq; | 56 | struct seq_file *seq; |
| 55 | struct seq_net_private *p; | ||
| 56 | 57 | ||
| 57 | seq = f->private_data; | 58 | seq = f->private_data; |
| 58 | p = seq->private; | ||
| 59 | 59 | ||
| 60 | put_net(p->net); | 60 | put_net(seq_file_net(seq)); |
| 61 | seq_release_private(ino, f); | 61 | seq_release_private(ino, f); |
| 62 | return 0; | 62 | return 0; |
| 63 | } | 63 | } |
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 5f66c4466151..817f5966edca 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c | |||
| @@ -87,7 +87,14 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char | |||
| 87 | 87 | ||
| 88 | void sysfs_remove_link(struct kobject * kobj, const char * name) | 88 | void sysfs_remove_link(struct kobject * kobj, const char * name) |
| 89 | { | 89 | { |
| 90 | sysfs_hash_and_remove(kobj->sd, name); | 90 | struct sysfs_dirent *parent_sd = NULL; |
| 91 | |||
| 92 | if (!kobj) | ||
| 93 | parent_sd = &sysfs_root; | ||
| 94 | else | ||
| 95 | parent_sd = kobj->sd; | ||
| 96 | |||
| 97 | sysfs_hash_and_remove(parent_sd, name); | ||
| 91 | } | 98 | } |
| 92 | 99 | ||
| 93 | static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, | 100 | static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, |
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 35115bca036e..524021ff5436 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig | |||
| @@ -35,18 +35,6 @@ config XFS_QUOTA | |||
| 35 | with or without the generic quota support enabled (CONFIG_QUOTA) - | 35 | with or without the generic quota support enabled (CONFIG_QUOTA) - |
| 36 | they are completely independent subsystems. | 36 | they are completely independent subsystems. |
| 37 | 37 | ||
| 38 | config XFS_SECURITY | ||
| 39 | bool "XFS Security Label support" | ||
| 40 | depends on XFS_FS | ||
| 41 | help | ||
| 42 | Security labels support alternative access control models | ||
| 43 | implemented by security modules like SELinux. This option | ||
| 44 | enables an extended attribute namespace for inode security | ||
| 45 | labels in the XFS filesystem. | ||
| 46 | |||
| 47 | If you are not using a security module that requires using | ||
| 48 | extended attributes for inode security labels, say N. | ||
| 49 | |||
| 50 | config XFS_POSIX_ACL | 38 | config XFS_POSIX_ACL |
| 51 | bool "XFS POSIX ACL support" | 39 | bool "XFS POSIX ACL support" |
| 52 | depends on XFS_FS | 40 | depends on XFS_FS |
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index e040f1ce1b6a..9b1bb17a0501 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c | |||
| @@ -37,7 +37,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) | |||
| 37 | #ifdef DEBUG | 37 | #ifdef DEBUG |
| 38 | if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) { | 38 | if (unlikely(!(flags & KM_LARGE) && (size > PAGE_SIZE))) { |
| 39 | printk(KERN_WARNING "Large %s attempt, size=%ld\n", | 39 | printk(KERN_WARNING "Large %s attempt, size=%ld\n", |
| 40 | __FUNCTION__, (long)size); | 40 | __func__, (long)size); |
| 41 | dump_stack(); | 41 | dump_stack(); |
| 42 | } | 42 | } |
| 43 | #endif | 43 | #endif |
| @@ -52,7 +52,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) | |||
| 52 | if (!(++retries % 100)) | 52 | if (!(++retries % 100)) |
| 53 | printk(KERN_ERR "XFS: possible memory allocation " | 53 | printk(KERN_ERR "XFS: possible memory allocation " |
| 54 | "deadlock in %s (mode:0x%x)\n", | 54 | "deadlock in %s (mode:0x%x)\n", |
| 55 | __FUNCTION__, lflags); | 55 | __func__, lflags); |
| 56 | congestion_wait(WRITE, HZ/50); | 56 | congestion_wait(WRITE, HZ/50); |
| 57 | } while (1); | 57 | } while (1); |
| 58 | } | 58 | } |
| @@ -129,7 +129,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) | |||
| 129 | if (!(++retries % 100)) | 129 | if (!(++retries % 100)) |
| 130 | printk(KERN_ERR "XFS: possible memory allocation " | 130 | printk(KERN_ERR "XFS: possible memory allocation " |
| 131 | "deadlock in %s (mode:0x%x)\n", | 131 | "deadlock in %s (mode:0x%x)\n", |
| 132 | __FUNCTION__, lflags); | 132 | __func__, lflags); |
| 133 | congestion_wait(WRITE, HZ/50); | 133 | congestion_wait(WRITE, HZ/50); |
| 134 | } while (1); | 134 | } while (1); |
| 135 | } | 135 | } |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index e0519529c26c..a55c3b26d840 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
| @@ -243,8 +243,12 @@ xfs_end_bio_unwritten( | |||
| 243 | size_t size = ioend->io_size; | 243 | size_t size = ioend->io_size; |
| 244 | 244 | ||
| 245 | if (likely(!ioend->io_error)) { | 245 | if (likely(!ioend->io_error)) { |
| 246 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) | 246 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
| 247 | xfs_iomap_write_unwritten(ip, offset, size); | 247 | int error; |
| 248 | error = xfs_iomap_write_unwritten(ip, offset, size); | ||
| 249 | if (error) | ||
| 250 | ioend->io_error = error; | ||
| 251 | } | ||
| 248 | xfs_setfilesize(ioend); | 252 | xfs_setfilesize(ioend); |
| 249 | } | 253 | } |
| 250 | xfs_destroy_ioend(ioend); | 254 | xfs_destroy_ioend(ioend); |
| @@ -1532,9 +1536,9 @@ xfs_vm_bmap( | |||
| 1532 | struct xfs_inode *ip = XFS_I(inode); | 1536 | struct xfs_inode *ip = XFS_I(inode); |
| 1533 | 1537 | ||
| 1534 | xfs_itrace_entry(XFS_I(inode)); | 1538 | xfs_itrace_entry(XFS_I(inode)); |
| 1535 | xfs_rwlock(ip, VRWLOCK_READ); | 1539 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
| 1536 | xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); | 1540 | xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); |
| 1537 | xfs_rwunlock(ip, VRWLOCK_READ); | 1541 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
| 1538 | return generic_block_bmap(mapping, block, xfs_get_blocks); | 1542 | return generic_block_bmap(mapping, block, xfs_get_blocks); |
| 1539 | } | 1543 | } |
| 1540 | 1544 | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index e347bfd47c91..52f6846101d5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
| @@ -400,7 +400,7 @@ _xfs_buf_lookup_pages( | |||
| 400 | printk(KERN_ERR | 400 | printk(KERN_ERR |
| 401 | "XFS: possible memory allocation " | 401 | "XFS: possible memory allocation " |
| 402 | "deadlock in %s (mode:0x%x)\n", | 402 | "deadlock in %s (mode:0x%x)\n", |
| 403 | __FUNCTION__, gfp_mask); | 403 | __func__, gfp_mask); |
| 404 | 404 | ||
| 405 | XFS_STATS_INC(xb_page_retries); | 405 | XFS_STATS_INC(xb_page_retries); |
| 406 | xfsbufd_wakeup(0, gfp_mask); | 406 | xfsbufd_wakeup(0, gfp_mask); |
| @@ -598,7 +598,7 @@ xfs_buf_get_flags( | |||
| 598 | error = _xfs_buf_map_pages(bp, flags); | 598 | error = _xfs_buf_map_pages(bp, flags); |
| 599 | if (unlikely(error)) { | 599 | if (unlikely(error)) { |
| 600 | printk(KERN_WARNING "%s: failed to map pages\n", | 600 | printk(KERN_WARNING "%s: failed to map pages\n", |
| 601 | __FUNCTION__); | 601 | __func__); |
| 602 | goto no_buffer; | 602 | goto no_buffer; |
| 603 | } | 603 | } |
| 604 | } | 604 | } |
| @@ -778,7 +778,7 @@ xfs_buf_get_noaddr( | |||
| 778 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); | 778 | error = _xfs_buf_map_pages(bp, XBF_MAPPED); |
| 779 | if (unlikely(error)) { | 779 | if (unlikely(error)) { |
| 780 | printk(KERN_WARNING "%s: failed to map pages\n", | 780 | printk(KERN_WARNING "%s: failed to map pages\n", |
| 781 | __FUNCTION__); | 781 | __func__); |
| 782 | goto fail_free_mem; | 782 | goto fail_free_mem; |
| 783 | } | 783 | } |
| 784 | 784 | ||
| @@ -1060,7 +1060,7 @@ xfs_buf_iostart( | |||
| 1060 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC); | 1060 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC); |
| 1061 | bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC); | 1061 | bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC); |
| 1062 | xfs_buf_delwri_queue(bp, 1); | 1062 | xfs_buf_delwri_queue(bp, 1); |
| 1063 | return status; | 1063 | return 0; |
| 1064 | } | 1064 | } |
| 1065 | 1065 | ||
| 1066 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ | 1066 | bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index a3d207de48b8..841d7883528d 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
| @@ -387,11 +387,15 @@ static inline int XFS_bwrite(xfs_buf_t *bp) | |||
| 387 | return error; | 387 | return error; |
| 388 | } | 388 | } |
| 389 | 389 | ||
| 390 | static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp) | 390 | /* |
| 391 | * No error can be returned from xfs_buf_iostart for delwri | ||
| 392 | * buffers as they are queued and no I/O is issued. | ||
| 393 | */ | ||
| 394 | static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp) | ||
| 391 | { | 395 | { |
| 392 | bp->b_strat = xfs_bdstrat_cb; | 396 | bp->b_strat = xfs_bdstrat_cb; |
| 393 | bp->b_fspriv3 = mp; | 397 | bp->b_fspriv3 = mp; |
| 394 | return xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC); | 398 | (void)xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC); |
| 395 | } | 399 | } |
| 396 | 400 | ||
| 397 | #define XFS_bdstrat(bp) xfs_buf_iorequest(bp) | 401 | #define XFS_bdstrat(bp) xfs_buf_iorequest(bp) |
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h index e7f3da61c6c3..652721ce0ea5 100644 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ b/fs/xfs/linux-2.6/xfs_cred.h | |||
| @@ -30,7 +30,7 @@ typedef struct cred { | |||
| 30 | extern struct cred *sys_cred; | 30 | extern struct cred *sys_cred; |
| 31 | 31 | ||
| 32 | /* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ | 32 | /* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ |
| 33 | static __inline int capable_cred(cred_t *cr, int cid) | 33 | static inline int capable_cred(cred_t *cr, int cid) |
| 34 | { | 34 | { |
| 35 | return (cr == sys_cred) ? 1 : capable(cid); | 35 | return (cr == sys_cred) ? 1 : capable(cid); |
| 36 | } | 36 | } |
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index ca4f66c4de16..265f0168ab76 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include "xfs_trans.h" | 22 | #include "xfs_trans.h" |
| 23 | #include "xfs_sb.h" | 23 | #include "xfs_sb.h" |
| 24 | #include "xfs_ag.h" | 24 | #include "xfs_ag.h" |
| 25 | #include "xfs_dir2.h" | ||
| 25 | #include "xfs_dmapi.h" | 26 | #include "xfs_dmapi.h" |
| 26 | #include "xfs_mount.h" | 27 | #include "xfs_mount.h" |
| 27 | #include "xfs_export.h" | 28 | #include "xfs_export.h" |
| @@ -30,8 +31,6 @@ | |||
| 30 | #include "xfs_inode.h" | 31 | #include "xfs_inode.h" |
| 31 | #include "xfs_vfsops.h" | 32 | #include "xfs_vfsops.h" |
| 32 | 33 | ||
| 33 | static struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, }; | ||
| 34 | |||
| 35 | /* | 34 | /* |
| 36 | * Note that we only accept fileids which are long enough rather than allow | 35 | * Note that we only accept fileids which are long enough rather than allow |
| 37 | * the parent generation number to default to zero. XFS considers zero a | 36 | * the parent generation number to default to zero. XFS considers zero a |
| @@ -66,7 +65,7 @@ xfs_fs_encode_fh( | |||
| 66 | int len; | 65 | int len; |
| 67 | 66 | ||
| 68 | /* Directories don't need their parent encoded, they have ".." */ | 67 | /* Directories don't need their parent encoded, they have ".." */ |
| 69 | if (S_ISDIR(inode->i_mode)) | 68 | if (S_ISDIR(inode->i_mode) || !connectable) |
| 70 | fileid_type = FILEID_INO32_GEN; | 69 | fileid_type = FILEID_INO32_GEN; |
| 71 | else | 70 | else |
| 72 | fileid_type = FILEID_INO32_GEN_PARENT; | 71 | fileid_type = FILEID_INO32_GEN_PARENT; |
| @@ -213,17 +212,16 @@ xfs_fs_get_parent( | |||
| 213 | struct dentry *child) | 212 | struct dentry *child) |
| 214 | { | 213 | { |
| 215 | int error; | 214 | int error; |
| 216 | bhv_vnode_t *cvp; | 215 | struct xfs_inode *cip; |
| 217 | struct dentry *parent; | 216 | struct dentry *parent; |
| 218 | 217 | ||
| 219 | cvp = NULL; | 218 | error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip); |
| 220 | error = xfs_lookup(XFS_I(child->d_inode), &dotdot, &cvp); | ||
| 221 | if (unlikely(error)) | 219 | if (unlikely(error)) |
| 222 | return ERR_PTR(-error); | 220 | return ERR_PTR(-error); |
| 223 | 221 | ||
| 224 | parent = d_alloc_anon(vn_to_inode(cvp)); | 222 | parent = d_alloc_anon(cip->i_vnode); |
| 225 | if (unlikely(!parent)) { | 223 | if (unlikely(!parent)) { |
| 226 | VN_RELE(cvp); | 224 | iput(cip->i_vnode); |
| 227 | return ERR_PTR(-ENOMEM); | 225 | return ERR_PTR(-ENOMEM); |
| 228 | } | 226 | } |
| 229 | return parent; | 227 | return parent; |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index edab1ffbb163..05905246434d 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
| @@ -469,16 +469,11 @@ xfs_file_open_exec( | |||
| 469 | struct inode *inode) | 469 | struct inode *inode) |
| 470 | { | 470 | { |
| 471 | struct xfs_mount *mp = XFS_M(inode->i_sb); | 471 | struct xfs_mount *mp = XFS_M(inode->i_sb); |
| 472 | struct xfs_inode *ip = XFS_I(inode); | ||
| 472 | 473 | ||
| 473 | if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI)) { | 474 | if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) && |
| 474 | if (DM_EVENT_ENABLED(XFS_I(inode), DM_EVENT_READ)) { | 475 | DM_EVENT_ENABLED(ip, DM_EVENT_READ)) |
| 475 | bhv_vnode_t *vp = vn_from_inode(inode); | 476 | return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); |
| 476 | |||
| 477 | return -XFS_SEND_DATA(mp, DM_EVENT_READ, | ||
| 478 | vp, 0, 0, 0, NULL); | ||
| 479 | } | ||
| 480 | } | ||
| 481 | |||
| 482 | return 0; | 477 | return 0; |
| 483 | } | 478 | } |
| 484 | #endif /* HAVE_FOP_OPEN_EXEC */ | 479 | #endif /* HAVE_FOP_OPEN_EXEC */ |
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index ac6d34cc355d..1eefe61f0e10 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c | |||
| @@ -17,18 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | #include "xfs.h" | 18 | #include "xfs.h" |
| 19 | #include "xfs_vnodeops.h" | 19 | #include "xfs_vnodeops.h" |
| 20 | |||
| 21 | /* | ||
| 22 | * The following six includes are needed so that we can include | ||
| 23 | * xfs_inode.h. What a mess.. | ||
| 24 | */ | ||
| 25 | #include "xfs_bmap_btree.h" | 20 | #include "xfs_bmap_btree.h" |
| 26 | #include "xfs_inum.h" | ||
| 27 | #include "xfs_dir2.h" | ||
| 28 | #include "xfs_dir2_sf.h" | ||
| 29 | #include "xfs_attr_sf.h" | ||
| 30 | #include "xfs_dinode.h" | ||
| 31 | |||
| 32 | #include "xfs_inode.h" | 21 | #include "xfs_inode.h" |
| 33 | 22 | ||
| 34 | int fs_noerr(void) { return 0; } | 23 | int fs_noerr(void) { return 0; } |
| @@ -42,11 +31,10 @@ xfs_tosspages( | |||
| 42 | xfs_off_t last, | 31 | xfs_off_t last, |
| 43 | int fiopt) | 32 | int fiopt) |
| 44 | { | 33 | { |
| 45 | bhv_vnode_t *vp = XFS_ITOV(ip); | 34 | struct address_space *mapping = ip->i_vnode->i_mapping; |
| 46 | struct inode *inode = vn_to_inode(vp); | ||
| 47 | 35 | ||
| 48 | if (VN_CACHED(vp)) | 36 | if (mapping->nrpages) |
| 49 | truncate_inode_pages(inode->i_mapping, first); | 37 | truncate_inode_pages(mapping, first); |
| 50 | } | 38 | } |
| 51 | 39 | ||
| 52 | int | 40 | int |
| @@ -56,15 +44,14 @@ xfs_flushinval_pages( | |||
| 56 | xfs_off_t last, | 44 | xfs_off_t last, |
| 57 | int fiopt) | 45 | int fiopt) |
| 58 | { | 46 | { |
| 59 | bhv_vnode_t *vp = XFS_ITOV(ip); | 47 | struct address_space *mapping = ip->i_vnode->i_mapping; |
| 60 | struct inode *inode = vn_to_inode(vp); | ||
| 61 | int ret = 0; | 48 | int ret = 0; |
| 62 | 49 | ||
| 63 | if (VN_CACHED(vp)) { | 50 | if (mapping->nrpages) { |
| 64 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 51 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
| 65 | ret = filemap_write_and_wait(inode->i_mapping); | 52 | ret = filemap_write_and_wait(mapping); |
| 66 | if (!ret) | 53 | if (!ret) |
| 67 | truncate_inode_pages(inode->i_mapping, first); | 54 | truncate_inode_pages(mapping, first); |
| 68 | } | 55 | } |
| 69 | return ret; | 56 | return ret; |
| 70 | } | 57 | } |
| @@ -77,17 +64,16 @@ xfs_flush_pages( | |||
| 77 | uint64_t flags, | 64 | uint64_t flags, |
| 78 | int fiopt) | 65 | int fiopt) |
| 79 | { | 66 | { |
| 80 | bhv_vnode_t *vp = XFS_ITOV(ip); | 67 | struct address_space *mapping = ip->i_vnode->i_mapping; |
| 81 | struct inode *inode = vn_to_inode(vp); | ||
| 82 | int ret = 0; | 68 | int ret = 0; |
| 83 | int ret2; | 69 | int ret2; |
| 84 | 70 | ||
| 85 | if (VN_DIRTY(vp)) { | 71 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
| 86 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 72 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
| 87 | ret = filemap_fdatawrite(inode->i_mapping); | 73 | ret = filemap_fdatawrite(mapping); |
| 88 | if (flags & XFS_B_ASYNC) | 74 | if (flags & XFS_B_ASYNC) |
| 89 | return ret; | 75 | return ret; |
| 90 | ret2 = filemap_fdatawait(inode->i_mapping); | 76 | ret2 = filemap_fdatawait(mapping); |
| 91 | if (!ret) | 77 | if (!ret) |
| 92 | ret = ret2; | 78 | ret = ret2; |
| 93 | } | 79 | } |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index f34bd010eb51..bf7759793856 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
| @@ -651,314 +651,6 @@ xfs_attrmulti_by_handle( | |||
| 651 | return -error; | 651 | return -error; |
| 652 | } | 652 | } |
| 653 | 653 | ||
| 654 | /* prototypes for a few of the stack-hungry cases that have | ||
| 655 | * their own functions. Functions are defined after their use | ||
| 656 | * so gcc doesn't get fancy and inline them with -03 */ | ||
| 657 | |||
| 658 | STATIC int | ||
| 659 | xfs_ioc_space( | ||
| 660 | struct xfs_inode *ip, | ||
| 661 | struct inode *inode, | ||
| 662 | struct file *filp, | ||
| 663 | int flags, | ||
| 664 | unsigned int cmd, | ||
| 665 | void __user *arg); | ||
| 666 | |||
| 667 | STATIC int | ||
| 668 | xfs_ioc_bulkstat( | ||
| 669 | xfs_mount_t *mp, | ||
| 670 | unsigned int cmd, | ||
| 671 | void __user *arg); | ||
| 672 | |||
| 673 | STATIC int | ||
| 674 | xfs_ioc_fsgeometry_v1( | ||
| 675 | xfs_mount_t *mp, | ||
| 676 | void __user *arg); | ||
| 677 | |||
| 678 | STATIC int | ||
| 679 | xfs_ioc_fsgeometry( | ||
| 680 | xfs_mount_t *mp, | ||
| 681 | void __user *arg); | ||
| 682 | |||
| 683 | STATIC int | ||
| 684 | xfs_ioc_xattr( | ||
| 685 | xfs_inode_t *ip, | ||
| 686 | struct file *filp, | ||
| 687 | unsigned int cmd, | ||
| 688 | void __user *arg); | ||
| 689 | |||
| 690 | STATIC int | ||
| 691 | xfs_ioc_fsgetxattr( | ||
| 692 | xfs_inode_t *ip, | ||
| 693 | int attr, | ||
| 694 | void __user *arg); | ||
| 695 | |||
| 696 | STATIC int | ||
| 697 | xfs_ioc_getbmap( | ||
| 698 | struct xfs_inode *ip, | ||
| 699 | int flags, | ||
| 700 | unsigned int cmd, | ||
| 701 | void __user *arg); | ||
| 702 | |||
| 703 | STATIC int | ||
| 704 | xfs_ioc_getbmapx( | ||
| 705 | struct xfs_inode *ip, | ||
| 706 | void __user *arg); | ||
| 707 | |||
| 708 | int | ||
| 709 | xfs_ioctl( | ||
| 710 | xfs_inode_t *ip, | ||
| 711 | struct file *filp, | ||
| 712 | int ioflags, | ||
| 713 | unsigned int cmd, | ||
| 714 | void __user *arg) | ||
| 715 | { | ||
| 716 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
| 717 | xfs_mount_t *mp = ip->i_mount; | ||
| 718 | int error; | ||
| 719 | |||
| 720 | xfs_itrace_entry(XFS_I(inode)); | ||
| 721 | switch (cmd) { | ||
| 722 | |||
| 723 | case XFS_IOC_ALLOCSP: | ||
| 724 | case XFS_IOC_FREESP: | ||
| 725 | case XFS_IOC_RESVSP: | ||
| 726 | case XFS_IOC_UNRESVSP: | ||
| 727 | case XFS_IOC_ALLOCSP64: | ||
| 728 | case XFS_IOC_FREESP64: | ||
| 729 | case XFS_IOC_RESVSP64: | ||
| 730 | case XFS_IOC_UNRESVSP64: | ||
| 731 | /* | ||
| 732 | * Only allow the sys admin to reserve space unless | ||
| 733 | * unwritten extents are enabled. | ||
| 734 | */ | ||
| 735 | if (!xfs_sb_version_hasextflgbit(&mp->m_sb) && | ||
| 736 | !capable(CAP_SYS_ADMIN)) | ||
| 737 | return -EPERM; | ||
| 738 | |||
| 739 | return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg); | ||
| 740 | |||
| 741 | case XFS_IOC_DIOINFO: { | ||
| 742 | struct dioattr da; | ||
| 743 | xfs_buftarg_t *target = | ||
| 744 | XFS_IS_REALTIME_INODE(ip) ? | ||
| 745 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
| 746 | |||
| 747 | da.d_mem = da.d_miniosz = 1 << target->bt_sshift; | ||
| 748 | da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); | ||
| 749 | |||
| 750 | if (copy_to_user(arg, &da, sizeof(da))) | ||
| 751 | return -XFS_ERROR(EFAULT); | ||
| 752 | return 0; | ||
| 753 | } | ||
| 754 | |||
| 755 | case XFS_IOC_FSBULKSTAT_SINGLE: | ||
| 756 | case XFS_IOC_FSBULKSTAT: | ||
| 757 | case XFS_IOC_FSINUMBERS: | ||
| 758 | return xfs_ioc_bulkstat(mp, cmd, arg); | ||
| 759 | |||
| 760 | case XFS_IOC_FSGEOMETRY_V1: | ||
| 761 | return xfs_ioc_fsgeometry_v1(mp, arg); | ||
| 762 | |||
| 763 | case XFS_IOC_FSGEOMETRY: | ||
| 764 | return xfs_ioc_fsgeometry(mp, arg); | ||
| 765 | |||
| 766 | case XFS_IOC_GETVERSION: | ||
| 767 | return put_user(inode->i_generation, (int __user *)arg); | ||
| 768 | |||
| 769 | case XFS_IOC_FSGETXATTR: | ||
| 770 | return xfs_ioc_fsgetxattr(ip, 0, arg); | ||
| 771 | case XFS_IOC_FSGETXATTRA: | ||
| 772 | return xfs_ioc_fsgetxattr(ip, 1, arg); | ||
| 773 | case XFS_IOC_GETXFLAGS: | ||
| 774 | case XFS_IOC_SETXFLAGS: | ||
| 775 | case XFS_IOC_FSSETXATTR: | ||
| 776 | return xfs_ioc_xattr(ip, filp, cmd, arg); | ||
| 777 | |||
| 778 | case XFS_IOC_FSSETDM: { | ||
| 779 | struct fsdmidata dmi; | ||
| 780 | |||
| 781 | if (copy_from_user(&dmi, arg, sizeof(dmi))) | ||
| 782 | return -XFS_ERROR(EFAULT); | ||
| 783 | |||
| 784 | error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, | ||
| 785 | dmi.fsd_dmstate); | ||
| 786 | return -error; | ||
| 787 | } | ||
| 788 | |||
| 789 | case XFS_IOC_GETBMAP: | ||
| 790 | case XFS_IOC_GETBMAPA: | ||
| 791 | return xfs_ioc_getbmap(ip, ioflags, cmd, arg); | ||
| 792 | |||
| 793 | case XFS_IOC_GETBMAPX: | ||
| 794 | return xfs_ioc_getbmapx(ip, arg); | ||
| 795 | |||
| 796 | case XFS_IOC_FD_TO_HANDLE: | ||
| 797 | case XFS_IOC_PATH_TO_HANDLE: | ||
| 798 | case XFS_IOC_PATH_TO_FSHANDLE: | ||
| 799 | return xfs_find_handle(cmd, arg); | ||
| 800 | |||
| 801 | case XFS_IOC_OPEN_BY_HANDLE: | ||
| 802 | return xfs_open_by_handle(mp, arg, filp, inode); | ||
| 803 | |||
| 804 | case XFS_IOC_FSSETDM_BY_HANDLE: | ||
| 805 | return xfs_fssetdm_by_handle(mp, arg, inode); | ||
| 806 | |||
| 807 | case XFS_IOC_READLINK_BY_HANDLE: | ||
| 808 | return xfs_readlink_by_handle(mp, arg, inode); | ||
| 809 | |||
| 810 | case XFS_IOC_ATTRLIST_BY_HANDLE: | ||
| 811 | return xfs_attrlist_by_handle(mp, arg, inode); | ||
| 812 | |||
| 813 | case XFS_IOC_ATTRMULTI_BY_HANDLE: | ||
| 814 | return xfs_attrmulti_by_handle(mp, arg, inode); | ||
| 815 | |||
| 816 | case XFS_IOC_SWAPEXT: { | ||
| 817 | error = xfs_swapext((struct xfs_swapext __user *)arg); | ||
| 818 | return -error; | ||
| 819 | } | ||
| 820 | |||
| 821 | case XFS_IOC_FSCOUNTS: { | ||
| 822 | xfs_fsop_counts_t out; | ||
| 823 | |||
| 824 | error = xfs_fs_counts(mp, &out); | ||
| 825 | if (error) | ||
| 826 | return -error; | ||
| 827 | |||
| 828 | if (copy_to_user(arg, &out, sizeof(out))) | ||
| 829 | return -XFS_ERROR(EFAULT); | ||
| 830 | return 0; | ||
| 831 | } | ||
| 832 | |||
| 833 | case XFS_IOC_SET_RESBLKS: { | ||
| 834 | xfs_fsop_resblks_t inout; | ||
| 835 | __uint64_t in; | ||
| 836 | |||
| 837 | if (!capable(CAP_SYS_ADMIN)) | ||
| 838 | return -EPERM; | ||
| 839 | |||
| 840 | if (copy_from_user(&inout, arg, sizeof(inout))) | ||
| 841 | return -XFS_ERROR(EFAULT); | ||
| 842 | |||
| 843 | /* input parameter is passed in resblks field of structure */ | ||
| 844 | in = inout.resblks; | ||
| 845 | error = xfs_reserve_blocks(mp, &in, &inout); | ||
| 846 | if (error) | ||
| 847 | return -error; | ||
| 848 | |||
| 849 | if (copy_to_user(arg, &inout, sizeof(inout))) | ||
| 850 | return -XFS_ERROR(EFAULT); | ||
| 851 | return 0; | ||
| 852 | } | ||
| 853 | |||
| 854 | case XFS_IOC_GET_RESBLKS: { | ||
| 855 | xfs_fsop_resblks_t out; | ||
| 856 | |||
| 857 | if (!capable(CAP_SYS_ADMIN)) | ||
| 858 | return -EPERM; | ||
| 859 | |||
| 860 | error = xfs_reserve_blocks(mp, NULL, &out); | ||
| 861 | if (error) | ||
| 862 | return -error; | ||
| 863 | |||
| 864 | if (copy_to_user(arg, &out, sizeof(out))) | ||
| 865 | return -XFS_ERROR(EFAULT); | ||
| 866 | |||
| 867 | return 0; | ||
| 868 | } | ||
| 869 | |||
| 870 | case XFS_IOC_FSGROWFSDATA: { | ||
| 871 | xfs_growfs_data_t in; | ||
| 872 | |||
| 873 | if (!capable(CAP_SYS_ADMIN)) | ||
| 874 | return -EPERM; | ||
| 875 | |||
| 876 | if (copy_from_user(&in, arg, sizeof(in))) | ||
| 877 | return -XFS_ERROR(EFAULT); | ||
| 878 | |||
| 879 | error = xfs_growfs_data(mp, &in); | ||
| 880 | return -error; | ||
| 881 | } | ||
| 882 | |||
| 883 | case XFS_IOC_FSGROWFSLOG: { | ||
| 884 | xfs_growfs_log_t in; | ||
| 885 | |||
| 886 | if (!capable(CAP_SYS_ADMIN)) | ||
| 887 | return -EPERM; | ||
| 888 | |||
| 889 | if (copy_from_user(&in, arg, sizeof(in))) | ||
| 890 | return -XFS_ERROR(EFAULT); | ||
| 891 | |||
| 892 | error = xfs_growfs_log(mp, &in); | ||
| 893 | return -error; | ||
| 894 | } | ||
| 895 | |||
| 896 | case XFS_IOC_FSGROWFSRT: { | ||
| 897 | xfs_growfs_rt_t in; | ||
| 898 | |||
| 899 | if (!capable(CAP_SYS_ADMIN)) | ||
| 900 | return -EPERM; | ||
| 901 | |||
| 902 | if (copy_from_user(&in, arg, sizeof(in))) | ||
| 903 | return -XFS_ERROR(EFAULT); | ||
| 904 | |||
| 905 | error = xfs_growfs_rt(mp, &in); | ||
| 906 | return -error; | ||
| 907 | } | ||
| 908 | |||
| 909 | case XFS_IOC_FREEZE: | ||
| 910 | if (!capable(CAP_SYS_ADMIN)) | ||
| 911 | return -EPERM; | ||
| 912 | |||
| 913 | if (inode->i_sb->s_frozen == SB_UNFROZEN) | ||
| 914 | freeze_bdev(inode->i_sb->s_bdev); | ||
| 915 | return 0; | ||
| 916 | |||
| 917 | case XFS_IOC_THAW: | ||
| 918 | if (!capable(CAP_SYS_ADMIN)) | ||
| 919 | return -EPERM; | ||
| 920 | if (inode->i_sb->s_frozen != SB_UNFROZEN) | ||
| 921 | thaw_bdev(inode->i_sb->s_bdev, inode->i_sb); | ||
| 922 | return 0; | ||
| 923 | |||
| 924 | case XFS_IOC_GOINGDOWN: { | ||
| 925 | __uint32_t in; | ||
| 926 | |||
| 927 | if (!capable(CAP_SYS_ADMIN)) | ||
| 928 | return -EPERM; | ||
| 929 | |||
| 930 | if (get_user(in, (__uint32_t __user *)arg)) | ||
| 931 | return -XFS_ERROR(EFAULT); | ||
| 932 | |||
| 933 | error = xfs_fs_goingdown(mp, in); | ||
| 934 | return -error; | ||
| 935 | } | ||
| 936 | |||
| 937 | case XFS_IOC_ERROR_INJECTION: { | ||
| 938 | xfs_error_injection_t in; | ||
| 939 | |||
| 940 | if (!capable(CAP_SYS_ADMIN)) | ||
| 941 | return -EPERM; | ||
| 942 | |||
| 943 | if (copy_from_user(&in, arg, sizeof(in))) | ||
| 944 | return -XFS_ERROR(EFAULT); | ||
| 945 | |||
| 946 | error = xfs_errortag_add(in.errtag, mp); | ||
| 947 | return -error; | ||
| 948 | } | ||
| 949 | |||
| 950 | case XFS_IOC_ERROR_CLEARALL: | ||
| 951 | if (!capable(CAP_SYS_ADMIN)) | ||
| 952 | return -EPERM; | ||
| 953 | |||
| 954 | error = xfs_errortag_clearall(mp, 1); | ||
| 955 | return -error; | ||
| 956 | |||
| 957 | default: | ||
| 958 | return -ENOTTY; | ||
| 959 | } | ||
| 960 | } | ||
| 961 | |||
| 962 | STATIC int | 654 | STATIC int |
| 963 | xfs_ioc_space( | 655 | xfs_ioc_space( |
| 964 | struct xfs_inode *ip, | 656 | struct xfs_inode *ip, |
| @@ -1179,85 +871,85 @@ xfs_ioc_fsgetxattr( | |||
| 1179 | } | 871 | } |
| 1180 | 872 | ||
| 1181 | STATIC int | 873 | STATIC int |
| 1182 | xfs_ioc_xattr( | 874 | xfs_ioc_fssetxattr( |
| 1183 | xfs_inode_t *ip, | 875 | xfs_inode_t *ip, |
| 1184 | struct file *filp, | 876 | struct file *filp, |
| 1185 | unsigned int cmd, | ||
| 1186 | void __user *arg) | 877 | void __user *arg) |
| 1187 | { | 878 | { |
| 1188 | struct fsxattr fa; | 879 | struct fsxattr fa; |
| 1189 | struct bhv_vattr *vattr; | 880 | struct bhv_vattr *vattr; |
| 1190 | int error = 0; | 881 | int error; |
| 1191 | int attr_flags; | 882 | int attr_flags; |
| 1192 | unsigned int flags; | 883 | |
| 884 | if (copy_from_user(&fa, arg, sizeof(fa))) | ||
| 885 | return -EFAULT; | ||
| 1193 | 886 | ||
| 1194 | vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); | 887 | vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); |
| 1195 | if (unlikely(!vattr)) | 888 | if (unlikely(!vattr)) |
| 1196 | return -ENOMEM; | 889 | return -ENOMEM; |
| 1197 | 890 | ||
| 1198 | switch (cmd) { | 891 | attr_flags = 0; |
| 1199 | case XFS_IOC_FSSETXATTR: { | 892 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) |
| 1200 | if (copy_from_user(&fa, arg, sizeof(fa))) { | 893 | attr_flags |= ATTR_NONBLOCK; |
| 1201 | error = -EFAULT; | ||
| 1202 | break; | ||
| 1203 | } | ||
| 1204 | 894 | ||
| 1205 | attr_flags = 0; | 895 | vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID; |
| 1206 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | 896 | vattr->va_xflags = fa.fsx_xflags; |
| 1207 | attr_flags |= ATTR_NONBLOCK; | 897 | vattr->va_extsize = fa.fsx_extsize; |
| 898 | vattr->va_projid = fa.fsx_projid; | ||
| 1208 | 899 | ||
| 1209 | vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID; | 900 | error = -xfs_setattr(ip, vattr, attr_flags, NULL); |
| 1210 | vattr->va_xflags = fa.fsx_xflags; | 901 | if (!error) |
| 1211 | vattr->va_extsize = fa.fsx_extsize; | 902 | vn_revalidate(XFS_ITOV(ip)); /* update flags */ |
| 1212 | vattr->va_projid = fa.fsx_projid; | 903 | kfree(vattr); |
| 904 | return 0; | ||
| 905 | } | ||
| 1213 | 906 | ||
| 1214 | error = xfs_setattr(ip, vattr, attr_flags, NULL); | 907 | STATIC int |
| 1215 | if (likely(!error)) | 908 | xfs_ioc_getxflags( |
| 1216 | vn_revalidate(XFS_ITOV(ip)); /* update flags */ | 909 | xfs_inode_t *ip, |
| 1217 | error = -error; | 910 | void __user *arg) |
| 1218 | break; | 911 | { |
| 1219 | } | 912 | unsigned int flags; |
| 1220 | 913 | ||
| 1221 | case XFS_IOC_GETXFLAGS: { | 914 | flags = xfs_di2lxflags(ip->i_d.di_flags); |
| 1222 | flags = xfs_di2lxflags(ip->i_d.di_flags); | 915 | if (copy_to_user(arg, &flags, sizeof(flags))) |
| 1223 | if (copy_to_user(arg, &flags, sizeof(flags))) | 916 | return -EFAULT; |
| 1224 | error = -EFAULT; | 917 | return 0; |
| 1225 | break; | 918 | } |
| 1226 | } | ||
| 1227 | 919 | ||
| 1228 | case XFS_IOC_SETXFLAGS: { | 920 | STATIC int |
| 1229 | if (copy_from_user(&flags, arg, sizeof(flags))) { | 921 | xfs_ioc_setxflags( |
| 1230 | error = -EFAULT; | 922 | xfs_inode_t *ip, |
| 1231 | break; | 923 | struct file *filp, |
| 1232 | } | 924 | void __user *arg) |
| 925 | { | ||
| 926 | struct bhv_vattr *vattr; | ||
| 927 | unsigned int flags; | ||
| 928 | int attr_flags; | ||
| 929 | int error; | ||
| 1233 | 930 | ||
| 1234 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ | 931 | if (copy_from_user(&flags, arg, sizeof(flags))) |
| 1235 | FS_NOATIME_FL | FS_NODUMP_FL | \ | 932 | return -EFAULT; |
| 1236 | FS_SYNC_FL)) { | ||
| 1237 | error = -EOPNOTSUPP; | ||
| 1238 | break; | ||
| 1239 | } | ||
| 1240 | 933 | ||
| 1241 | attr_flags = 0; | 934 | if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ |
| 1242 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) | 935 | FS_NOATIME_FL | FS_NODUMP_FL | \ |
| 1243 | attr_flags |= ATTR_NONBLOCK; | 936 | FS_SYNC_FL)) |
| 937 | return -EOPNOTSUPP; | ||
| 1244 | 938 | ||
| 1245 | vattr->va_mask = XFS_AT_XFLAGS; | 939 | vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); |
| 1246 | vattr->va_xflags = xfs_merge_ioc_xflags(flags, | 940 | if (unlikely(!vattr)) |
| 1247 | xfs_ip2xflags(ip)); | 941 | return -ENOMEM; |
| 1248 | 942 | ||
| 1249 | error = xfs_setattr(ip, vattr, attr_flags, NULL); | 943 | attr_flags = 0; |
| 1250 | if (likely(!error)) | 944 | if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) |
| 1251 | vn_revalidate(XFS_ITOV(ip)); /* update flags */ | 945 | attr_flags |= ATTR_NONBLOCK; |
| 1252 | error = -error; | ||
| 1253 | break; | ||
| 1254 | } | ||
| 1255 | 946 | ||
| 1256 | default: | 947 | vattr->va_mask = XFS_AT_XFLAGS; |
| 1257 | error = -ENOTTY; | 948 | vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); |
| 1258 | break; | ||
| 1259 | } | ||
| 1260 | 949 | ||
| 950 | error = -xfs_setattr(ip, vattr, attr_flags, NULL); | ||
| 951 | if (likely(!error)) | ||
| 952 | vn_revalidate(XFS_ITOV(ip)); /* update flags */ | ||
| 1261 | kfree(vattr); | 953 | kfree(vattr); |
| 1262 | return error; | 954 | return error; |
| 1263 | } | 955 | } |
| @@ -1332,3 +1024,259 @@ xfs_ioc_getbmapx( | |||
| 1332 | 1024 | ||
| 1333 | return 0; | 1025 | return 0; |
| 1334 | } | 1026 | } |
| 1027 | |||
| 1028 | int | ||
| 1029 | xfs_ioctl( | ||
| 1030 | xfs_inode_t *ip, | ||
| 1031 | struct file *filp, | ||
| 1032 | int ioflags, | ||
| 1033 | unsigned int cmd, | ||
| 1034 | void __user *arg) | ||
| 1035 | { | ||
| 1036 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
| 1037 | xfs_mount_t *mp = ip->i_mount; | ||
| 1038 | int error; | ||
| 1039 | |||
| 1040 | xfs_itrace_entry(XFS_I(inode)); | ||
| 1041 | switch (cmd) { | ||
| 1042 | |||
| 1043 | case XFS_IOC_ALLOCSP: | ||
| 1044 | case XFS_IOC_FREESP: | ||
| 1045 | case XFS_IOC_RESVSP: | ||
| 1046 | case XFS_IOC_UNRESVSP: | ||
| 1047 | case XFS_IOC_ALLOCSP64: | ||
| 1048 | case XFS_IOC_FREESP64: | ||
| 1049 | case XFS_IOC_RESVSP64: | ||
| 1050 | case XFS_IOC_UNRESVSP64: | ||
| 1051 | /* | ||
| 1052 | * Only allow the sys admin to reserve space unless | ||
| 1053 | * unwritten extents are enabled. | ||
| 1054 | */ | ||
| 1055 | if (!xfs_sb_version_hasextflgbit(&mp->m_sb) && | ||
| 1056 | !capable(CAP_SYS_ADMIN)) | ||
| 1057 | return -EPERM; | ||
| 1058 | |||
| 1059 | return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg); | ||
| 1060 | |||
| 1061 | case XFS_IOC_DIOINFO: { | ||
| 1062 | struct dioattr da; | ||
| 1063 | xfs_buftarg_t *target = | ||
| 1064 | XFS_IS_REALTIME_INODE(ip) ? | ||
| 1065 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
| 1066 | |||
| 1067 | da.d_mem = da.d_miniosz = 1 << target->bt_sshift; | ||
| 1068 | da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); | ||
| 1069 | |||
| 1070 | if (copy_to_user(arg, &da, sizeof(da))) | ||
| 1071 | return -XFS_ERROR(EFAULT); | ||
| 1072 | return 0; | ||
| 1073 | } | ||
| 1074 | |||
| 1075 | case XFS_IOC_FSBULKSTAT_SINGLE: | ||
| 1076 | case XFS_IOC_FSBULKSTAT: | ||
| 1077 | case XFS_IOC_FSINUMBERS: | ||
| 1078 | return xfs_ioc_bulkstat(mp, cmd, arg); | ||
| 1079 | |||
| 1080 | case XFS_IOC_FSGEOMETRY_V1: | ||
| 1081 | return xfs_ioc_fsgeometry_v1(mp, arg); | ||
| 1082 | |||
| 1083 | case XFS_IOC_FSGEOMETRY: | ||
| 1084 | return xfs_ioc_fsgeometry(mp, arg); | ||
| 1085 | |||
| 1086 | case XFS_IOC_GETVERSION: | ||
| 1087 | return put_user(inode->i_generation, (int __user *)arg); | ||
| 1088 | |||
| 1089 | case XFS_IOC_FSGETXATTR: | ||
| 1090 | return xfs_ioc_fsgetxattr(ip, 0, arg); | ||
| 1091 | case XFS_IOC_FSGETXATTRA: | ||
| 1092 | return xfs_ioc_fsgetxattr(ip, 1, arg); | ||
| 1093 | case XFS_IOC_FSSETXATTR: | ||
| 1094 | return xfs_ioc_fssetxattr(ip, filp, arg); | ||
| 1095 | case XFS_IOC_GETXFLAGS: | ||
| 1096 | return xfs_ioc_getxflags(ip, arg); | ||
| 1097 | case XFS_IOC_SETXFLAGS: | ||
| 1098 | return xfs_ioc_setxflags(ip, filp, arg); | ||
| 1099 | |||
| 1100 | case XFS_IOC_FSSETDM: { | ||
| 1101 | struct fsdmidata dmi; | ||
| 1102 | |||
| 1103 | if (copy_from_user(&dmi, arg, sizeof(dmi))) | ||
| 1104 | return -XFS_ERROR(EFAULT); | ||
| 1105 | |||
| 1106 | error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, | ||
| 1107 | dmi.fsd_dmstate); | ||
| 1108 | return -error; | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | case XFS_IOC_GETBMAP: | ||
| 1112 | case XFS_IOC_GETBMAPA: | ||
| 1113 | return xfs_ioc_getbmap(ip, ioflags, cmd, arg); | ||
| 1114 | |||
| 1115 | case XFS_IOC_GETBMAPX: | ||
| 1116 | return xfs_ioc_getbmapx(ip, arg); | ||
| 1117 | |||
| 1118 | case XFS_IOC_FD_TO_HANDLE: | ||
| 1119 | case XFS_IOC_PATH_TO_HANDLE: | ||
| 1120 | case XFS_IOC_PATH_TO_FSHANDLE: | ||
| 1121 | return xfs_find_handle(cmd, arg); | ||
| 1122 | |||
| 1123 | case XFS_IOC_OPEN_BY_HANDLE: | ||
| 1124 | return xfs_open_by_handle(mp, arg, filp, inode); | ||
| 1125 | |||
| 1126 | case XFS_IOC_FSSETDM_BY_HANDLE: | ||
| 1127 | return xfs_fssetdm_by_handle(mp, arg, inode); | ||
| 1128 | |||
| 1129 | case XFS_IOC_READLINK_BY_HANDLE: | ||
| 1130 | return xfs_readlink_by_handle(mp, arg, inode); | ||
| 1131 | |||
| 1132 | case XFS_IOC_ATTRLIST_BY_HANDLE: | ||
| 1133 | return xfs_attrlist_by_handle(mp, arg, inode); | ||
| 1134 | |||
| 1135 | case XFS_IOC_ATTRMULTI_BY_HANDLE: | ||
| 1136 | return xfs_attrmulti_by_handle(mp, arg, inode); | ||
| 1137 | |||
| 1138 | case XFS_IOC_SWAPEXT: { | ||
| 1139 | error = xfs_swapext((struct xfs_swapext __user *)arg); | ||
| 1140 | return -error; | ||
| 1141 | } | ||
| 1142 | |||
| 1143 | case XFS_IOC_FSCOUNTS: { | ||
| 1144 | xfs_fsop_counts_t out; | ||
| 1145 | |||
| 1146 | error = xfs_fs_counts(mp, &out); | ||
| 1147 | if (error) | ||
| 1148 | return -error; | ||
| 1149 | |||
| 1150 | if (copy_to_user(arg, &out, sizeof(out))) | ||
| 1151 | return -XFS_ERROR(EFAULT); | ||
| 1152 | return 0; | ||
| 1153 | } | ||
| 1154 | |||
| 1155 | case XFS_IOC_SET_RESBLKS: { | ||
| 1156 | xfs_fsop_resblks_t inout; | ||
| 1157 | __uint64_t in; | ||
| 1158 | |||
| 1159 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1160 | return -EPERM; | ||
| 1161 | |||
| 1162 | if (copy_from_user(&inout, arg, sizeof(inout))) | ||
| 1163 | return -XFS_ERROR(EFAULT); | ||
| 1164 | |||
| 1165 | /* input parameter is passed in resblks field of structure */ | ||
| 1166 | in = inout.resblks; | ||
| 1167 | error = xfs_reserve_blocks(mp, &in, &inout); | ||
| 1168 | if (error) | ||
| 1169 | return -error; | ||
| 1170 | |||
| 1171 | if (copy_to_user(arg, &inout, sizeof(inout))) | ||
| 1172 | return -XFS_ERROR(EFAULT); | ||
| 1173 | return 0; | ||
| 1174 | } | ||
| 1175 | |||
| 1176 | case XFS_IOC_GET_RESBLKS: { | ||
| 1177 | xfs_fsop_resblks_t out; | ||
| 1178 | |||
| 1179 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1180 | return -EPERM; | ||
| 1181 | |||
| 1182 | error = xfs_reserve_blocks(mp, NULL, &out); | ||
| 1183 | if (error) | ||
| 1184 | return -error; | ||
| 1185 | |||
| 1186 | if (copy_to_user(arg, &out, sizeof(out))) | ||
| 1187 | return -XFS_ERROR(EFAULT); | ||
| 1188 | |||
| 1189 | return 0; | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | case XFS_IOC_FSGROWFSDATA: { | ||
| 1193 | xfs_growfs_data_t in; | ||
| 1194 | |||
| 1195 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1196 | return -EPERM; | ||
| 1197 | |||
| 1198 | if (copy_from_user(&in, arg, sizeof(in))) | ||
| 1199 | return -XFS_ERROR(EFAULT); | ||
| 1200 | |||
| 1201 | error = xfs_growfs_data(mp, &in); | ||
| 1202 | return -error; | ||
| 1203 | } | ||
| 1204 | |||
| 1205 | case XFS_IOC_FSGROWFSLOG: { | ||
| 1206 | xfs_growfs_log_t in; | ||
| 1207 | |||
| 1208 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1209 | return -EPERM; | ||
| 1210 | |||
| 1211 | if (copy_from_user(&in, arg, sizeof(in))) | ||
| 1212 | return -XFS_ERROR(EFAULT); | ||
| 1213 | |||
| 1214 | error = xfs_growfs_log(mp, &in); | ||
| 1215 | return -error; | ||
| 1216 | } | ||
| 1217 | |||
| 1218 | case XFS_IOC_FSGROWFSRT: { | ||
| 1219 | xfs_growfs_rt_t in; | ||
| 1220 | |||
| 1221 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1222 | return -EPERM; | ||
| 1223 | |||
| 1224 | if (copy_from_user(&in, arg, sizeof(in))) | ||
| 1225 | return -XFS_ERROR(EFAULT); | ||
| 1226 | |||
| 1227 | error = xfs_growfs_rt(mp, &in); | ||
| 1228 | return -error; | ||
| 1229 | } | ||
| 1230 | |||
| 1231 | case XFS_IOC_FREEZE: | ||
| 1232 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1233 | return -EPERM; | ||
| 1234 | |||
| 1235 | if (inode->i_sb->s_frozen == SB_UNFROZEN) | ||
| 1236 | freeze_bdev(inode->i_sb->s_bdev); | ||
| 1237 | return 0; | ||
| 1238 | |||
| 1239 | case XFS_IOC_THAW: | ||
| 1240 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1241 | return -EPERM; | ||
| 1242 | if (inode->i_sb->s_frozen != SB_UNFROZEN) | ||
| 1243 | thaw_bdev(inode->i_sb->s_bdev, inode->i_sb); | ||
| 1244 | return 0; | ||
| 1245 | |||
| 1246 | case XFS_IOC_GOINGDOWN: { | ||
| 1247 | __uint32_t in; | ||
| 1248 | |||
| 1249 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1250 | return -EPERM; | ||
| 1251 | |||
| 1252 | if (get_user(in, (__uint32_t __user *)arg)) | ||
| 1253 | return -XFS_ERROR(EFAULT); | ||
| 1254 | |||
| 1255 | error = xfs_fs_goingdown(mp, in); | ||
| 1256 | return -error; | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | case XFS_IOC_ERROR_INJECTION: { | ||
| 1260 | xfs_error_injection_t in; | ||
| 1261 | |||
| 1262 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1263 | return -EPERM; | ||
| 1264 | |||
| 1265 | if (copy_from_user(&in, arg, sizeof(in))) | ||
| 1266 | return -XFS_ERROR(EFAULT); | ||
| 1267 | |||
| 1268 | error = xfs_errortag_add(in.errtag, mp); | ||
| 1269 | return -error; | ||
| 1270 | } | ||
| 1271 | |||
| 1272 | case XFS_IOC_ERROR_CLEARALL: | ||
| 1273 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1274 | return -EPERM; | ||
| 1275 | |||
| 1276 | error = xfs_errortag_clearall(mp, 1); | ||
| 1277 | return -error; | ||
| 1278 | |||
| 1279 | default: | ||
| 1280 | return -ENOTTY; | ||
| 1281 | } | ||
| 1282 | } | ||
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index cc4abd3daa49..0c958cf77758 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
| @@ -62,12 +62,11 @@ void | |||
| 62 | xfs_synchronize_atime( | 62 | xfs_synchronize_atime( |
| 63 | xfs_inode_t *ip) | 63 | xfs_inode_t *ip) |
| 64 | { | 64 | { |
| 65 | bhv_vnode_t *vp; | 65 | struct inode *inode = ip->i_vnode; |
| 66 | 66 | ||
| 67 | vp = XFS_ITOV_NULL(ip); | 67 | if (inode) { |
| 68 | if (vp) { | 68 | ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; |
| 69 | ip->i_d.di_atime.t_sec = (__int32_t)vp->i_atime.tv_sec; | 69 | ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; |
| 70 | ip->i_d.di_atime.t_nsec = (__int32_t)vp->i_atime.tv_nsec; | ||
| 71 | } | 70 | } |
| 72 | } | 71 | } |
| 73 | 72 | ||
| @@ -80,11 +79,10 @@ void | |||
| 80 | xfs_mark_inode_dirty_sync( | 79 | xfs_mark_inode_dirty_sync( |
| 81 | xfs_inode_t *ip) | 80 | xfs_inode_t *ip) |
| 82 | { | 81 | { |
| 83 | bhv_vnode_t *vp; | 82 | struct inode *inode = ip->i_vnode; |
| 84 | 83 | ||
| 85 | vp = XFS_ITOV_NULL(ip); | 84 | if (inode) |
| 86 | if (vp) | 85 | mark_inode_dirty_sync(inode); |
| 87 | mark_inode_dirty_sync(vn_to_inode(vp)); | ||
| 88 | } | 86 | } |
| 89 | 87 | ||
| 90 | /* | 88 | /* |
| @@ -215,66 +213,62 @@ xfs_validate_fields( | |||
| 215 | */ | 213 | */ |
| 216 | STATIC int | 214 | STATIC int |
| 217 | xfs_init_security( | 215 | xfs_init_security( |
| 218 | bhv_vnode_t *vp, | 216 | struct inode *inode, |
| 219 | struct inode *dir) | 217 | struct inode *dir) |
| 220 | { | 218 | { |
| 221 | struct inode *ip = vn_to_inode(vp); | 219 | struct xfs_inode *ip = XFS_I(inode); |
| 222 | size_t length; | 220 | size_t length; |
| 223 | void *value; | 221 | void *value; |
| 224 | char *name; | 222 | char *name; |
| 225 | int error; | 223 | int error; |
| 226 | 224 | ||
| 227 | error = security_inode_init_security(ip, dir, &name, &value, &length); | 225 | error = security_inode_init_security(inode, dir, &name, |
| 226 | &value, &length); | ||
| 228 | if (error) { | 227 | if (error) { |
| 229 | if (error == -EOPNOTSUPP) | 228 | if (error == -EOPNOTSUPP) |
| 230 | return 0; | 229 | return 0; |
| 231 | return -error; | 230 | return -error; |
| 232 | } | 231 | } |
| 233 | 232 | ||
| 234 | error = xfs_attr_set(XFS_I(ip), name, value, | 233 | error = xfs_attr_set(ip, name, value, length, ATTR_SECURE); |
| 235 | length, ATTR_SECURE); | ||
| 236 | if (!error) | 234 | if (!error) |
| 237 | xfs_iflags_set(XFS_I(ip), XFS_IMODIFIED); | 235 | xfs_iflags_set(ip, XFS_IMODIFIED); |
| 238 | 236 | ||
| 239 | kfree(name); | 237 | kfree(name); |
| 240 | kfree(value); | 238 | kfree(value); |
| 241 | return error; | 239 | return error; |
| 242 | } | 240 | } |
| 243 | 241 | ||
| 244 | /* | 242 | static void |
| 245 | * Determine whether a process has a valid fs_struct (kernel daemons | 243 | xfs_dentry_to_name( |
| 246 | * like knfsd don't have an fs_struct). | 244 | struct xfs_name *namep, |
| 247 | * | 245 | struct dentry *dentry) |
| 248 | * XXX(hch): nfsd is broken, better fix it instead. | ||
| 249 | */ | ||
| 250 | STATIC_INLINE int | ||
| 251 | xfs_has_fs_struct(struct task_struct *task) | ||
| 252 | { | 246 | { |
| 253 | return (task->fs != init_task.fs); | 247 | namep->name = dentry->d_name.name; |
| 248 | namep->len = dentry->d_name.len; | ||
| 254 | } | 249 | } |
| 255 | 250 | ||
| 256 | STATIC void | 251 | STATIC void |
| 257 | xfs_cleanup_inode( | 252 | xfs_cleanup_inode( |
| 258 | struct inode *dir, | 253 | struct inode *dir, |
| 259 | bhv_vnode_t *vp, | 254 | struct inode *inode, |
| 260 | struct dentry *dentry, | 255 | struct dentry *dentry, |
| 261 | int mode) | 256 | int mode) |
| 262 | { | 257 | { |
| 263 | struct dentry teardown = {}; | 258 | struct xfs_name teardown; |
| 264 | 259 | ||
| 265 | /* Oh, the horror. | 260 | /* Oh, the horror. |
| 266 | * If we can't add the ACL or we fail in | 261 | * If we can't add the ACL or we fail in |
| 267 | * xfs_init_security we must back out. | 262 | * xfs_init_security we must back out. |
| 268 | * ENOSPC can hit here, among other things. | 263 | * ENOSPC can hit here, among other things. |
| 269 | */ | 264 | */ |
| 270 | teardown.d_inode = vn_to_inode(vp); | 265 | xfs_dentry_to_name(&teardown, dentry); |
| 271 | teardown.d_name = dentry->d_name; | ||
| 272 | 266 | ||
| 273 | if (S_ISDIR(mode)) | 267 | if (S_ISDIR(mode)) |
| 274 | xfs_rmdir(XFS_I(dir), &teardown); | 268 | xfs_rmdir(XFS_I(dir), &teardown, XFS_I(inode)); |
| 275 | else | 269 | else |
| 276 | xfs_remove(XFS_I(dir), &teardown); | 270 | xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); |
| 277 | VN_RELE(vp); | 271 | iput(inode); |
| 278 | } | 272 | } |
| 279 | 273 | ||
| 280 | STATIC int | 274 | STATIC int |
| @@ -284,9 +278,10 @@ xfs_vn_mknod( | |||
| 284 | int mode, | 278 | int mode, |
| 285 | dev_t rdev) | 279 | dev_t rdev) |
| 286 | { | 280 | { |
| 287 | struct inode *ip; | 281 | struct inode *inode; |
| 288 | bhv_vnode_t *vp = NULL, *dvp = vn_from_inode(dir); | 282 | struct xfs_inode *ip = NULL; |
| 289 | xfs_acl_t *default_acl = NULL; | 283 | xfs_acl_t *default_acl = NULL; |
| 284 | struct xfs_name name; | ||
| 290 | attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; | 285 | attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; |
| 291 | int error; | 286 | int error; |
| 292 | 287 | ||
| @@ -297,59 +292,67 @@ xfs_vn_mknod( | |||
| 297 | if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) | 292 | if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) |
| 298 | return -EINVAL; | 293 | return -EINVAL; |
| 299 | 294 | ||
| 300 | if (unlikely(test_default_acl && test_default_acl(dvp))) { | 295 | if (test_default_acl && test_default_acl(dir)) { |
| 301 | if (!_ACL_ALLOC(default_acl)) { | 296 | if (!_ACL_ALLOC(default_acl)) { |
| 302 | return -ENOMEM; | 297 | return -ENOMEM; |
| 303 | } | 298 | } |
| 304 | if (!_ACL_GET_DEFAULT(dvp, default_acl)) { | 299 | if (!_ACL_GET_DEFAULT(dir, default_acl)) { |
| 305 | _ACL_FREE(default_acl); | 300 | _ACL_FREE(default_acl); |
| 306 | default_acl = NULL; | 301 | default_acl = NULL; |
| 307 | } | 302 | } |
| 308 | } | 303 | } |
| 309 | 304 | ||
| 310 | if (IS_POSIXACL(dir) && !default_acl && xfs_has_fs_struct(current)) | 305 | xfs_dentry_to_name(&name, dentry); |
| 306 | |||
| 307 | if (IS_POSIXACL(dir) && !default_acl) | ||
| 311 | mode &= ~current->fs->umask; | 308 | mode &= ~current->fs->umask; |
| 312 | 309 | ||
| 313 | switch (mode & S_IFMT) { | 310 | switch (mode & S_IFMT) { |
| 314 | case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: | 311 | case S_IFCHR: |
| 312 | case S_IFBLK: | ||
| 313 | case S_IFIFO: | ||
| 314 | case S_IFSOCK: | ||
| 315 | rdev = sysv_encode_dev(rdev); | 315 | rdev = sysv_encode_dev(rdev); |
| 316 | case S_IFREG: | 316 | case S_IFREG: |
| 317 | error = xfs_create(XFS_I(dir), dentry, mode, rdev, &vp, NULL); | 317 | error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); |
| 318 | break; | 318 | break; |
| 319 | case S_IFDIR: | 319 | case S_IFDIR: |
| 320 | error = xfs_mkdir(XFS_I(dir), dentry, mode, &vp, NULL); | 320 | error = xfs_mkdir(XFS_I(dir), &name, mode, &ip, NULL); |
| 321 | break; | 321 | break; |
| 322 | default: | 322 | default: |
| 323 | error = EINVAL; | 323 | error = EINVAL; |
| 324 | break; | 324 | break; |
| 325 | } | 325 | } |
| 326 | 326 | ||
| 327 | if (unlikely(!error)) { | 327 | if (unlikely(error)) |
| 328 | error = xfs_init_security(vp, dir); | 328 | goto out_free_acl; |
| 329 | if (error) | ||
| 330 | xfs_cleanup_inode(dir, vp, dentry, mode); | ||
| 331 | } | ||
| 332 | 329 | ||
| 333 | if (unlikely(default_acl)) { | 330 | inode = ip->i_vnode; |
| 334 | if (!error) { | 331 | |
| 335 | error = _ACL_INHERIT(vp, mode, default_acl); | 332 | error = xfs_init_security(inode, dir); |
| 336 | if (!error) | 333 | if (unlikely(error)) |
| 337 | xfs_iflags_set(XFS_I(vp), XFS_IMODIFIED); | 334 | goto out_cleanup_inode; |
| 338 | else | 335 | |
| 339 | xfs_cleanup_inode(dir, vp, dentry, mode); | 336 | if (default_acl) { |
| 340 | } | 337 | error = _ACL_INHERIT(inode, mode, default_acl); |
| 338 | if (unlikely(error)) | ||
| 339 | goto out_cleanup_inode; | ||
| 340 | xfs_iflags_set(ip, XFS_IMODIFIED); | ||
| 341 | _ACL_FREE(default_acl); | 341 | _ACL_FREE(default_acl); |
| 342 | } | 342 | } |
| 343 | 343 | ||
| 344 | if (likely(!error)) { | ||
| 345 | ASSERT(vp); | ||
| 346 | ip = vn_to_inode(vp); | ||
| 347 | 344 | ||
| 348 | if (S_ISDIR(mode)) | 345 | if (S_ISDIR(mode)) |
| 349 | xfs_validate_fields(ip); | 346 | xfs_validate_fields(inode); |
| 350 | d_instantiate(dentry, ip); | 347 | d_instantiate(dentry, inode); |
| 351 | xfs_validate_fields(dir); | 348 | xfs_validate_fields(dir); |
| 352 | } | 349 | return -error; |
| 350 | |||
| 351 | out_cleanup_inode: | ||
| 352 | xfs_cleanup_inode(dir, inode, dentry, mode); | ||
| 353 | out_free_acl: | ||
| 354 | if (default_acl) | ||
| 355 | _ACL_FREE(default_acl); | ||
| 353 | return -error; | 356 | return -error; |
| 354 | } | 357 | } |
| 355 | 358 | ||
| @@ -378,13 +381,15 @@ xfs_vn_lookup( | |||
| 378 | struct dentry *dentry, | 381 | struct dentry *dentry, |
| 379 | struct nameidata *nd) | 382 | struct nameidata *nd) |
| 380 | { | 383 | { |
| 381 | bhv_vnode_t *cvp; | 384 | struct xfs_inode *cip; |
| 385 | struct xfs_name name; | ||
| 382 | int error; | 386 | int error; |
| 383 | 387 | ||
| 384 | if (dentry->d_name.len >= MAXNAMELEN) | 388 | if (dentry->d_name.len >= MAXNAMELEN) |
| 385 | return ERR_PTR(-ENAMETOOLONG); | 389 | return ERR_PTR(-ENAMETOOLONG); |
| 386 | 390 | ||
| 387 | error = xfs_lookup(XFS_I(dir), dentry, &cvp); | 391 | xfs_dentry_to_name(&name, dentry); |
| 392 | error = xfs_lookup(XFS_I(dir), &name, &cip); | ||
| 388 | if (unlikely(error)) { | 393 | if (unlikely(error)) { |
| 389 | if (unlikely(error != ENOENT)) | 394 | if (unlikely(error != ENOENT)) |
| 390 | return ERR_PTR(-error); | 395 | return ERR_PTR(-error); |
| @@ -392,7 +397,7 @@ xfs_vn_lookup( | |||
| 392 | return NULL; | 397 | return NULL; |
| 393 | } | 398 | } |
| 394 | 399 | ||
| 395 | return d_splice_alias(vn_to_inode(cvp), dentry); | 400 | return d_splice_alias(cip->i_vnode, dentry); |
| 396 | } | 401 | } |
| 397 | 402 | ||
| 398 | STATIC int | 403 | STATIC int |
| @@ -401,23 +406,24 @@ xfs_vn_link( | |||
| 401 | struct inode *dir, | 406 | struct inode *dir, |
| 402 | struct dentry *dentry) | 407 | struct dentry *dentry) |
| 403 | { | 408 | { |
| 404 | struct inode *ip; /* inode of guy being linked to */ | 409 | struct inode *inode; /* inode of guy being linked to */ |
| 405 | bhv_vnode_t *vp; /* vp of name being linked */ | 410 | struct xfs_name name; |
| 406 | int error; | 411 | int error; |
| 407 | 412 | ||
| 408 | ip = old_dentry->d_inode; /* inode being linked to */ | 413 | inode = old_dentry->d_inode; |
| 409 | vp = vn_from_inode(ip); | 414 | xfs_dentry_to_name(&name, dentry); |
| 410 | 415 | ||
| 411 | VN_HOLD(vp); | 416 | igrab(inode); |
| 412 | error = xfs_link(XFS_I(dir), vp, dentry); | 417 | error = xfs_link(XFS_I(dir), XFS_I(inode), &name); |
| 413 | if (unlikely(error)) { | 418 | if (unlikely(error)) { |
| 414 | VN_RELE(vp); | 419 | iput(inode); |
| 415 | } else { | 420 | return -error; |
| 416 | xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); | ||
| 417 | xfs_validate_fields(ip); | ||
| 418 | d_instantiate(dentry, ip); | ||
| 419 | } | 421 | } |
| 420 | return -error; | 422 | |
| 423 | xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); | ||
| 424 | xfs_validate_fields(inode); | ||
| 425 | d_instantiate(dentry, inode); | ||
| 426 | return 0; | ||
| 421 | } | 427 | } |
| 422 | 428 | ||
| 423 | STATIC int | 429 | STATIC int |
| @@ -426,11 +432,13 @@ xfs_vn_unlink( | |||
| 426 | struct dentry *dentry) | 432 | struct dentry *dentry) |
| 427 | { | 433 | { |
| 428 | struct inode *inode; | 434 | struct inode *inode; |
| 435 | struct xfs_name name; | ||
| 429 | int error; | 436 | int error; |
| 430 | 437 | ||
| 431 | inode = dentry->d_inode; | 438 | inode = dentry->d_inode; |
| 439 | xfs_dentry_to_name(&name, dentry); | ||
| 432 | 440 | ||
| 433 | error = xfs_remove(XFS_I(dir), dentry); | 441 | error = xfs_remove(XFS_I(dir), &name, XFS_I(inode)); |
| 434 | if (likely(!error)) { | 442 | if (likely(!error)) { |
| 435 | xfs_validate_fields(dir); /* size needs update */ | 443 | xfs_validate_fields(dir); /* size needs update */ |
| 436 | xfs_validate_fields(inode); | 444 | xfs_validate_fields(inode); |
| @@ -444,29 +452,34 @@ xfs_vn_symlink( | |||
| 444 | struct dentry *dentry, | 452 | struct dentry *dentry, |
| 445 | const char *symname) | 453 | const char *symname) |
| 446 | { | 454 | { |
| 447 | struct inode *ip; | 455 | struct inode *inode; |
| 448 | bhv_vnode_t *cvp; /* used to lookup symlink to put in dentry */ | 456 | struct xfs_inode *cip = NULL; |
| 457 | struct xfs_name name; | ||
| 449 | int error; | 458 | int error; |
| 450 | mode_t mode; | 459 | mode_t mode; |
| 451 | 460 | ||
| 452 | cvp = NULL; | ||
| 453 | |||
| 454 | mode = S_IFLNK | | 461 | mode = S_IFLNK | |
| 455 | (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); | 462 | (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); |
| 463 | xfs_dentry_to_name(&name, dentry); | ||
| 456 | 464 | ||
| 457 | error = xfs_symlink(XFS_I(dir), dentry, (char *)symname, mode, | 465 | error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL); |
| 458 | &cvp, NULL); | 466 | if (unlikely(error)) |
| 459 | if (likely(!error && cvp)) { | 467 | goto out; |
| 460 | error = xfs_init_security(cvp, dir); | 468 | |
| 461 | if (likely(!error)) { | 469 | inode = cip->i_vnode; |
| 462 | ip = vn_to_inode(cvp); | 470 | |
| 463 | d_instantiate(dentry, ip); | 471 | error = xfs_init_security(inode, dir); |
| 464 | xfs_validate_fields(dir); | 472 | if (unlikely(error)) |
| 465 | xfs_validate_fields(ip); | 473 | goto out_cleanup_inode; |
| 466 | } else { | 474 | |
| 467 | xfs_cleanup_inode(dir, cvp, dentry, 0); | 475 | d_instantiate(dentry, inode); |
| 468 | } | 476 | xfs_validate_fields(dir); |
| 469 | } | 477 | xfs_validate_fields(inode); |
| 478 | return 0; | ||
| 479 | |||
| 480 | out_cleanup_inode: | ||
| 481 | xfs_cleanup_inode(dir, inode, dentry, 0); | ||
| 482 | out: | ||
| 470 | return -error; | 483 | return -error; |
| 471 | } | 484 | } |
| 472 | 485 | ||
| @@ -476,9 +489,12 @@ xfs_vn_rmdir( | |||
| 476 | struct dentry *dentry) | 489 | struct dentry *dentry) |
| 477 | { | 490 | { |
| 478 | struct inode *inode = dentry->d_inode; | 491 | struct inode *inode = dentry->d_inode; |
| 492 | struct xfs_name name; | ||
| 479 | int error; | 493 | int error; |
| 480 | 494 | ||
| 481 | error = xfs_rmdir(XFS_I(dir), dentry); | 495 | xfs_dentry_to_name(&name, dentry); |
| 496 | |||
| 497 | error = xfs_rmdir(XFS_I(dir), &name, XFS_I(inode)); | ||
| 482 | if (likely(!error)) { | 498 | if (likely(!error)) { |
| 483 | xfs_validate_fields(inode); | 499 | xfs_validate_fields(inode); |
| 484 | xfs_validate_fields(dir); | 500 | xfs_validate_fields(dir); |
| @@ -494,12 +510,15 @@ xfs_vn_rename( | |||
| 494 | struct dentry *ndentry) | 510 | struct dentry *ndentry) |
| 495 | { | 511 | { |
| 496 | struct inode *new_inode = ndentry->d_inode; | 512 | struct inode *new_inode = ndentry->d_inode; |
| 497 | bhv_vnode_t *tvp; /* target directory */ | 513 | struct xfs_name oname; |
| 514 | struct xfs_name nname; | ||
| 498 | int error; | 515 | int error; |
| 499 | 516 | ||
| 500 | tvp = vn_from_inode(ndir); | 517 | xfs_dentry_to_name(&oname, odentry); |
| 518 | xfs_dentry_to_name(&nname, ndentry); | ||
| 501 | 519 | ||
| 502 | error = xfs_rename(XFS_I(odir), odentry, tvp, ndentry); | 520 | error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), |
| 521 | XFS_I(ndir), &nname); | ||
| 503 | if (likely(!error)) { | 522 | if (likely(!error)) { |
| 504 | if (new_inode) | 523 | if (new_inode) |
| 505 | xfs_validate_fields(new_inode); | 524 | xfs_validate_fields(new_inode); |
| @@ -700,11 +719,19 @@ xfs_vn_setattr( | |||
| 700 | return -error; | 719 | return -error; |
| 701 | } | 720 | } |
| 702 | 721 | ||
| 722 | /* | ||
| 723 | * block_truncate_page can return an error, but we can't propagate it | ||
| 724 | * at all here. Leave a complaint + stack trace in the syslog because | ||
| 725 | * this could be bad. If it is bad, we need to propagate the error further. | ||
| 726 | */ | ||
| 703 | STATIC void | 727 | STATIC void |
| 704 | xfs_vn_truncate( | 728 | xfs_vn_truncate( |
| 705 | struct inode *inode) | 729 | struct inode *inode) |
| 706 | { | 730 | { |
| 707 | block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_blocks); | 731 | int error; |
| 732 | error = block_truncate_page(inode->i_mapping, inode->i_size, | ||
| 733 | xfs_get_blocks); | ||
| 734 | WARN_ON(error); | ||
| 708 | } | 735 | } |
| 709 | 736 | ||
| 710 | STATIC int | 737 | STATIC int |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 3ca39c4e5d2a..e5143323e71f 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
| @@ -99,7 +99,6 @@ | |||
| 99 | /* | 99 | /* |
| 100 | * Feature macros (disable/enable) | 100 | * Feature macros (disable/enable) |
| 101 | */ | 101 | */ |
| 102 | #undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ | ||
| 103 | #define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */ | 102 | #define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */ |
| 104 | #ifdef CONFIG_SMP | 103 | #ifdef CONFIG_SMP |
| 105 | #define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ | 104 | #define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ |
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 166353388490..21c0dbc74093 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c | |||
| @@ -176,7 +176,6 @@ xfs_read( | |||
| 176 | { | 176 | { |
| 177 | struct file *file = iocb->ki_filp; | 177 | struct file *file = iocb->ki_filp; |
| 178 | struct inode *inode = file->f_mapping->host; | 178 | struct inode *inode = file->f_mapping->host; |
| 179 | bhv_vnode_t *vp = XFS_ITOV(ip); | ||
| 180 | xfs_mount_t *mp = ip->i_mount; | 179 | xfs_mount_t *mp = ip->i_mount; |
| 181 | size_t size = 0; | 180 | size_t size = 0; |
| 182 | ssize_t ret = 0; | 181 | ssize_t ret = 0; |
| @@ -228,11 +227,11 @@ xfs_read( | |||
| 228 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 227 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
| 229 | 228 | ||
| 230 | if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { | 229 | if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { |
| 231 | bhv_vrwlock_t locktype = VRWLOCK_READ; | ||
| 232 | int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); | 230 | int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); |
| 231 | int iolock = XFS_IOLOCK_SHARED; | ||
| 233 | 232 | ||
| 234 | ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *offset, size, | 233 | ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size, |
| 235 | dmflags, &locktype); | 234 | dmflags, &iolock); |
| 236 | if (ret) { | 235 | if (ret) { |
| 237 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 236 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
| 238 | if (unlikely(ioflags & IO_ISDIRECT)) | 237 | if (unlikely(ioflags & IO_ISDIRECT)) |
| @@ -242,7 +241,7 @@ xfs_read( | |||
| 242 | } | 241 | } |
| 243 | 242 | ||
| 244 | if (unlikely(ioflags & IO_ISDIRECT)) { | 243 | if (unlikely(ioflags & IO_ISDIRECT)) { |
| 245 | if (VN_CACHED(vp)) | 244 | if (inode->i_mapping->nrpages) |
| 246 | ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), | 245 | ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), |
| 247 | -1, FI_REMAPF_LOCKED); | 246 | -1, FI_REMAPF_LOCKED); |
| 248 | mutex_unlock(&inode->i_mutex); | 247 | mutex_unlock(&inode->i_mutex); |
| @@ -276,7 +275,6 @@ xfs_splice_read( | |||
| 276 | int flags, | 275 | int flags, |
| 277 | int ioflags) | 276 | int ioflags) |
| 278 | { | 277 | { |
| 279 | bhv_vnode_t *vp = XFS_ITOV(ip); | ||
| 280 | xfs_mount_t *mp = ip->i_mount; | 278 | xfs_mount_t *mp = ip->i_mount; |
| 281 | ssize_t ret; | 279 | ssize_t ret; |
| 282 | 280 | ||
| @@ -287,11 +285,11 @@ xfs_splice_read( | |||
| 287 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 285 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
| 288 | 286 | ||
| 289 | if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { | 287 | if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { |
| 290 | bhv_vrwlock_t locktype = VRWLOCK_READ; | 288 | int iolock = XFS_IOLOCK_SHARED; |
| 291 | int error; | 289 | int error; |
| 292 | 290 | ||
| 293 | error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *ppos, count, | 291 | error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count, |
| 294 | FILP_DELAY_FLAG(infilp), &locktype); | 292 | FILP_DELAY_FLAG(infilp), &iolock); |
| 295 | if (error) { | 293 | if (error) { |
| 296 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 294 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
| 297 | return -error; | 295 | return -error; |
| @@ -317,7 +315,6 @@ xfs_splice_write( | |||
| 317 | int flags, | 315 | int flags, |
| 318 | int ioflags) | 316 | int ioflags) |
| 319 | { | 317 | { |
| 320 | bhv_vnode_t *vp = XFS_ITOV(ip); | ||
| 321 | xfs_mount_t *mp = ip->i_mount; | 318 | xfs_mount_t *mp = ip->i_mount; |
| 322 | ssize_t ret; | 319 | ssize_t ret; |
| 323 | struct inode *inode = outfilp->f_mapping->host; | 320 | struct inode *inode = outfilp->f_mapping->host; |
| @@ -330,11 +327,11 @@ xfs_splice_write( | |||
| 330 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | 327 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
| 331 | 328 | ||
| 332 | if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { | 329 | if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { |
| 333 | bhv_vrwlock_t locktype = VRWLOCK_WRITE; | 330 | int iolock = XFS_IOLOCK_EXCL; |
| 334 | int error; | 331 | int error; |
| 335 | 332 | ||
| 336 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, *ppos, count, | 333 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count, |
| 337 | FILP_DELAY_FLAG(outfilp), &locktype); | 334 | FILP_DELAY_FLAG(outfilp), &iolock); |
| 338 | if (error) { | 335 | if (error) { |
| 339 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 336 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
| 340 | return -error; | 337 | return -error; |
| @@ -573,14 +570,12 @@ xfs_write( | |||
| 573 | struct file *file = iocb->ki_filp; | 570 | struct file *file = iocb->ki_filp; |
| 574 | struct address_space *mapping = file->f_mapping; | 571 | struct address_space *mapping = file->f_mapping; |
| 575 | struct inode *inode = mapping->host; | 572 | struct inode *inode = mapping->host; |
| 576 | bhv_vnode_t *vp = XFS_ITOV(xip); | ||
| 577 | unsigned long segs = nsegs; | 573 | unsigned long segs = nsegs; |
| 578 | xfs_mount_t *mp; | 574 | xfs_mount_t *mp; |
| 579 | ssize_t ret = 0, error = 0; | 575 | ssize_t ret = 0, error = 0; |
| 580 | xfs_fsize_t isize, new_size; | 576 | xfs_fsize_t isize, new_size; |
| 581 | int iolock; | 577 | int iolock; |
| 582 | int eventsent = 0; | 578 | int eventsent = 0; |
| 583 | bhv_vrwlock_t locktype; | ||
| 584 | size_t ocount = 0, count; | 579 | size_t ocount = 0, count; |
| 585 | loff_t pos; | 580 | loff_t pos; |
| 586 | int need_i_mutex; | 581 | int need_i_mutex; |
| @@ -607,11 +602,9 @@ xfs_write( | |||
| 607 | relock: | 602 | relock: |
| 608 | if (ioflags & IO_ISDIRECT) { | 603 | if (ioflags & IO_ISDIRECT) { |
| 609 | iolock = XFS_IOLOCK_SHARED; | 604 | iolock = XFS_IOLOCK_SHARED; |
| 610 | locktype = VRWLOCK_WRITE_DIRECT; | ||
| 611 | need_i_mutex = 0; | 605 | need_i_mutex = 0; |
| 612 | } else { | 606 | } else { |
| 613 | iolock = XFS_IOLOCK_EXCL; | 607 | iolock = XFS_IOLOCK_EXCL; |
| 614 | locktype = VRWLOCK_WRITE; | ||
| 615 | need_i_mutex = 1; | 608 | need_i_mutex = 1; |
| 616 | mutex_lock(&inode->i_mutex); | 609 | mutex_lock(&inode->i_mutex); |
| 617 | } | 610 | } |
| @@ -634,9 +627,8 @@ start: | |||
| 634 | dmflags |= DM_FLAGS_IMUX; | 627 | dmflags |= DM_FLAGS_IMUX; |
| 635 | 628 | ||
| 636 | xfs_iunlock(xip, XFS_ILOCK_EXCL); | 629 | xfs_iunlock(xip, XFS_ILOCK_EXCL); |
| 637 | error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, | 630 | error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip, |
| 638 | pos, count, | 631 | pos, count, dmflags, &iolock); |
| 639 | dmflags, &locktype); | ||
| 640 | if (error) { | 632 | if (error) { |
| 641 | goto out_unlock_internal; | 633 | goto out_unlock_internal; |
| 642 | } | 634 | } |
| @@ -664,10 +656,9 @@ start: | |||
| 664 | return XFS_ERROR(-EINVAL); | 656 | return XFS_ERROR(-EINVAL); |
| 665 | } | 657 | } |
| 666 | 658 | ||
| 667 | if (!need_i_mutex && (VN_CACHED(vp) || pos > xip->i_size)) { | 659 | if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) { |
| 668 | xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); | 660 | xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); |
| 669 | iolock = XFS_IOLOCK_EXCL; | 661 | iolock = XFS_IOLOCK_EXCL; |
| 670 | locktype = VRWLOCK_WRITE; | ||
| 671 | need_i_mutex = 1; | 662 | need_i_mutex = 1; |
| 672 | mutex_lock(&inode->i_mutex); | 663 | mutex_lock(&inode->i_mutex); |
| 673 | xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); | 664 | xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); |
| @@ -727,7 +718,7 @@ retry: | |||
| 727 | current->backing_dev_info = mapping->backing_dev_info; | 718 | current->backing_dev_info = mapping->backing_dev_info; |
| 728 | 719 | ||
| 729 | if ((ioflags & IO_ISDIRECT)) { | 720 | if ((ioflags & IO_ISDIRECT)) { |
| 730 | if (VN_CACHED(vp)) { | 721 | if (mapping->nrpages) { |
| 731 | WARN_ON(need_i_mutex == 0); | 722 | WARN_ON(need_i_mutex == 0); |
| 732 | xfs_inval_cached_trace(xip, pos, -1, | 723 | xfs_inval_cached_trace(xip, pos, -1, |
| 733 | (pos & PAGE_CACHE_MASK), -1); | 724 | (pos & PAGE_CACHE_MASK), -1); |
| @@ -744,7 +735,6 @@ retry: | |||
| 744 | mutex_unlock(&inode->i_mutex); | 735 | mutex_unlock(&inode->i_mutex); |
| 745 | 736 | ||
| 746 | iolock = XFS_IOLOCK_SHARED; | 737 | iolock = XFS_IOLOCK_SHARED; |
| 747 | locktype = VRWLOCK_WRITE_DIRECT; | ||
| 748 | need_i_mutex = 0; | 738 | need_i_mutex = 0; |
| 749 | } | 739 | } |
| 750 | 740 | ||
| @@ -781,15 +771,15 @@ retry: | |||
| 781 | 771 | ||
| 782 | if (ret == -ENOSPC && | 772 | if (ret == -ENOSPC && |
| 783 | DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { | 773 | DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { |
| 784 | xfs_rwunlock(xip, locktype); | 774 | xfs_iunlock(xip, iolock); |
| 785 | if (need_i_mutex) | 775 | if (need_i_mutex) |
| 786 | mutex_unlock(&inode->i_mutex); | 776 | mutex_unlock(&inode->i_mutex); |
| 787 | error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, | 777 | error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip, |
| 788 | DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, | 778 | DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL, |
| 789 | 0, 0, 0); /* Delay flag intentionally unused */ | 779 | 0, 0, 0); /* Delay flag intentionally unused */ |
| 790 | if (need_i_mutex) | 780 | if (need_i_mutex) |
| 791 | mutex_lock(&inode->i_mutex); | 781 | mutex_lock(&inode->i_mutex); |
| 792 | xfs_rwlock(xip, locktype); | 782 | xfs_ilock(xip, iolock); |
| 793 | if (error) | 783 | if (error) |
| 794 | goto out_unlock_internal; | 784 | goto out_unlock_internal; |
| 795 | pos = xip->i_size; | 785 | pos = xip->i_size; |
| @@ -817,7 +807,8 @@ retry: | |||
| 817 | /* Handle various SYNC-type writes */ | 807 | /* Handle various SYNC-type writes */ |
| 818 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { | 808 | if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { |
| 819 | int error2; | 809 | int error2; |
| 820 | xfs_rwunlock(xip, locktype); | 810 | |
| 811 | xfs_iunlock(xip, iolock); | ||
| 821 | if (need_i_mutex) | 812 | if (need_i_mutex) |
| 822 | mutex_unlock(&inode->i_mutex); | 813 | mutex_unlock(&inode->i_mutex); |
| 823 | error2 = sync_page_range(inode, mapping, pos, ret); | 814 | error2 = sync_page_range(inode, mapping, pos, ret); |
| @@ -825,7 +816,7 @@ retry: | |||
| 825 | error = error2; | 816 | error = error2; |
| 826 | if (need_i_mutex) | 817 | if (need_i_mutex) |
| 827 | mutex_lock(&inode->i_mutex); | 818 | mutex_lock(&inode->i_mutex); |
| 828 | xfs_rwlock(xip, locktype); | 819 | xfs_ilock(xip, iolock); |
| 829 | error2 = xfs_write_sync_logforce(mp, xip); | 820 | error2 = xfs_write_sync_logforce(mp, xip); |
| 830 | if (!error) | 821 | if (!error) |
| 831 | error = error2; | 822 | error = error2; |
| @@ -846,7 +837,7 @@ retry: | |||
| 846 | xip->i_d.di_size = xip->i_size; | 837 | xip->i_d.di_size = xip->i_size; |
| 847 | xfs_iunlock(xip, XFS_ILOCK_EXCL); | 838 | xfs_iunlock(xip, XFS_ILOCK_EXCL); |
| 848 | } | 839 | } |
| 849 | xfs_rwunlock(xip, locktype); | 840 | xfs_iunlock(xip, iolock); |
| 850 | out_unlock_mutex: | 841 | out_unlock_mutex: |
| 851 | if (need_i_mutex) | 842 | if (need_i_mutex) |
| 852 | mutex_unlock(&inode->i_mutex); | 843 | mutex_unlock(&inode->i_mutex); |
| @@ -884,28 +875,23 @@ xfs_bdstrat_cb(struct xfs_buf *bp) | |||
| 884 | } | 875 | } |
| 885 | 876 | ||
| 886 | /* | 877 | /* |
| 887 | * Wrapper around bdstrat so that we can stop data | 878 | * Wrapper around bdstrat so that we can stop data from going to disk in case |
| 888 | * from going to disk in case we are shutting down the filesystem. | 879 | * we are shutting down the filesystem. Typically user data goes thru this |
| 889 | * Typically user data goes thru this path; one of the exceptions | 880 | * path; one of the exceptions is the superblock. |
| 890 | * is the superblock. | ||
| 891 | */ | 881 | */ |
| 892 | int | 882 | void |
| 893 | xfsbdstrat( | 883 | xfsbdstrat( |
| 894 | struct xfs_mount *mp, | 884 | struct xfs_mount *mp, |
| 895 | struct xfs_buf *bp) | 885 | struct xfs_buf *bp) |
| 896 | { | 886 | { |
| 897 | ASSERT(mp); | 887 | ASSERT(mp); |
| 898 | if (!XFS_FORCED_SHUTDOWN(mp)) { | 888 | if (!XFS_FORCED_SHUTDOWN(mp)) { |
| 899 | /* Grio redirection would go here | ||
| 900 | * if (XFS_BUF_IS_GRIO(bp)) { | ||
| 901 | */ | ||
| 902 | |||
| 903 | xfs_buf_iorequest(bp); | 889 | xfs_buf_iorequest(bp); |
| 904 | return 0; | 890 | return; |
| 905 | } | 891 | } |
| 906 | 892 | ||
| 907 | xfs_buftrace("XFSBDSTRAT IOERROR", bp); | 893 | xfs_buftrace("XFSBDSTRAT IOERROR", bp); |
| 908 | return (xfs_bioerror_relse(bp)); | 894 | xfs_bioerror_relse(bp); |
| 909 | } | 895 | } |
| 910 | 896 | ||
| 911 | /* | 897 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h index e200253139cf..e1d498b4ba7a 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.h +++ b/fs/xfs/linux-2.6/xfs_lrw.h | |||
| @@ -68,7 +68,8 @@ extern void xfs_inval_cached_trace(struct xfs_inode *, | |||
| 68 | #define xfs_inval_cached_trace(ip, offset, len, first, last) | 68 | #define xfs_inval_cached_trace(ip, offset, len, first, last) |
| 69 | #endif | 69 | #endif |
| 70 | 70 | ||
| 71 | extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *); | 71 | /* errors from xfsbdstrat() must be extracted from the buffer */ |
| 72 | extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); | ||
| 72 | extern int xfs_bdstrat_cb(struct xfs_buf *); | 73 | extern int xfs_bdstrat_cb(struct xfs_buf *); |
| 73 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); | 74 | extern int xfs_dev_is_read_only(struct xfs_mount *, char *); |
| 74 | 75 | ||
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h index 8ba7a2fa6c1d..afd0b0d5fdb2 100644 --- a/fs/xfs/linux-2.6/xfs_stats.h +++ b/fs/xfs/linux-2.6/xfs_stats.h | |||
| @@ -144,8 +144,8 @@ extern void xfs_cleanup_procfs(void); | |||
| 144 | # define XFS_STATS_DEC(count) | 144 | # define XFS_STATS_DEC(count) |
| 145 | # define XFS_STATS_ADD(count, inc) | 145 | # define XFS_STATS_ADD(count, inc) |
| 146 | 146 | ||
| 147 | static __inline void xfs_init_procfs(void) { }; | 147 | static inline void xfs_init_procfs(void) { }; |
| 148 | static __inline void xfs_cleanup_procfs(void) { }; | 148 | static inline void xfs_cleanup_procfs(void) { }; |
| 149 | 149 | ||
| 150 | #endif /* !CONFIG_PROC_FS */ | 150 | #endif /* !CONFIG_PROC_FS */ |
| 151 | 151 | ||
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 8831d9518790..865eb708aa95 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
| @@ -896,7 +896,8 @@ xfs_fs_write_inode( | |||
| 896 | struct inode *inode, | 896 | struct inode *inode, |
| 897 | int sync) | 897 | int sync) |
| 898 | { | 898 | { |
| 899 | int error = 0, flags = FLUSH_INODE; | 899 | int error = 0; |
| 900 | int flags = 0; | ||
| 900 | 901 | ||
| 901 | xfs_itrace_entry(XFS_I(inode)); | 902 | xfs_itrace_entry(XFS_I(inode)); |
| 902 | if (sync) { | 903 | if (sync) { |
| @@ -934,7 +935,7 @@ xfs_fs_clear_inode( | |||
| 934 | xfs_inactive(ip); | 935 | xfs_inactive(ip); |
| 935 | xfs_iflags_clear(ip, XFS_IMODIFIED); | 936 | xfs_iflags_clear(ip, XFS_IMODIFIED); |
| 936 | if (xfs_reclaim(ip)) | 937 | if (xfs_reclaim(ip)) |
| 937 | panic("%s: cannot reclaim 0x%p\n", __FUNCTION__, inode); | 938 | panic("%s: cannot reclaim 0x%p\n", __func__, inode); |
| 938 | } | 939 | } |
| 939 | 940 | ||
| 940 | ASSERT(XFS_I(inode) == NULL); | 941 | ASSERT(XFS_I(inode) == NULL); |
| @@ -1027,8 +1028,7 @@ xfs_sync_worker( | |||
| 1027 | int error; | 1028 | int error; |
| 1028 | 1029 | ||
| 1029 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) | 1030 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) |
| 1030 | error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR | | 1031 | error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR); |
| 1031 | SYNC_REFCACHE | SYNC_SUPER); | ||
| 1032 | mp->m_sync_seq++; | 1032 | mp->m_sync_seq++; |
| 1033 | wake_up(&mp->m_wait_single_sync_task); | 1033 | wake_up(&mp->m_wait_single_sync_task); |
| 1034 | } | 1034 | } |
| @@ -1306,7 +1306,7 @@ xfs_fs_fill_super( | |||
| 1306 | void *data, | 1306 | void *data, |
| 1307 | int silent) | 1307 | int silent) |
| 1308 | { | 1308 | { |
| 1309 | struct inode *rootvp; | 1309 | struct inode *root; |
| 1310 | struct xfs_mount *mp = NULL; | 1310 | struct xfs_mount *mp = NULL; |
| 1311 | struct xfs_mount_args *args = xfs_args_allocate(sb, silent); | 1311 | struct xfs_mount_args *args = xfs_args_allocate(sb, silent); |
| 1312 | int error; | 1312 | int error; |
| @@ -1344,19 +1344,18 @@ xfs_fs_fill_super( | |||
| 1344 | sb->s_time_gran = 1; | 1344 | sb->s_time_gran = 1; |
| 1345 | set_posix_acl_flag(sb); | 1345 | set_posix_acl_flag(sb); |
| 1346 | 1346 | ||
| 1347 | rootvp = igrab(mp->m_rootip->i_vnode); | 1347 | root = igrab(mp->m_rootip->i_vnode); |
| 1348 | if (!rootvp) { | 1348 | if (!root) { |
| 1349 | error = ENOENT; | 1349 | error = ENOENT; |
| 1350 | goto fail_unmount; | 1350 | goto fail_unmount; |
| 1351 | } | 1351 | } |
| 1352 | 1352 | if (is_bad_inode(root)) { | |
| 1353 | sb->s_root = d_alloc_root(vn_to_inode(rootvp)); | 1353 | error = EINVAL; |
| 1354 | if (!sb->s_root) { | ||
| 1355 | error = ENOMEM; | ||
| 1356 | goto fail_vnrele; | 1354 | goto fail_vnrele; |
| 1357 | } | 1355 | } |
| 1358 | if (is_bad_inode(sb->s_root->d_inode)) { | 1356 | sb->s_root = d_alloc_root(root); |
| 1359 | error = EINVAL; | 1357 | if (!sb->s_root) { |
| 1358 | error = ENOMEM; | ||
| 1360 | goto fail_vnrele; | 1359 | goto fail_vnrele; |
| 1361 | } | 1360 | } |
| 1362 | 1361 | ||
| @@ -1378,7 +1377,7 @@ fail_vnrele: | |||
| 1378 | dput(sb->s_root); | 1377 | dput(sb->s_root); |
| 1379 | sb->s_root = NULL; | 1378 | sb->s_root = NULL; |
| 1380 | } else { | 1379 | } else { |
| 1381 | VN_RELE(rootvp); | 1380 | iput(root); |
| 1382 | } | 1381 | } |
| 1383 | 1382 | ||
| 1384 | fail_unmount: | 1383 | fail_unmount: |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 3efcf45b14ab..3efb7c6d3303 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
| @@ -50,13 +50,7 @@ extern void xfs_qm_exit(void); | |||
| 50 | # define set_posix_acl_flag(sb) do { } while (0) | 50 | # define set_posix_acl_flag(sb) do { } while (0) |
| 51 | #endif | 51 | #endif |
| 52 | 52 | ||
| 53 | #ifdef CONFIG_XFS_SECURITY | 53 | #define XFS_SECURITY_STRING "security attributes, " |
| 54 | # define XFS_SECURITY_STRING "security attributes, " | ||
| 55 | # define ENOSECURITY 0 | ||
| 56 | #else | ||
| 57 | # define XFS_SECURITY_STRING | ||
| 58 | # define ENOSECURITY EOPNOTSUPP | ||
| 59 | #endif | ||
| 60 | 54 | ||
| 61 | #ifdef CONFIG_XFS_RT | 55 | #ifdef CONFIG_XFS_RT |
| 62 | # define XFS_REALTIME_STRING "realtime, " | 56 | # define XFS_REALTIME_STRING "realtime, " |
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h index 4da03a4e3520..7e60c7776b1c 100644 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ b/fs/xfs/linux-2.6/xfs_vfs.h | |||
| @@ -49,7 +49,6 @@ typedef struct bhv_vfs_sync_work { | |||
| 49 | #define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ | 49 | #define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ |
| 50 | #define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ | 50 | #define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ |
| 51 | #define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ | 51 | #define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ |
| 52 | #define SYNC_SUPER 0x0200 /* flush superblock to disk */ | ||
| 53 | 52 | ||
| 54 | /* | 53 | /* |
| 55 | * When remounting a filesystem read-only or freezing the filesystem, | 54 | * When remounting a filesystem read-only or freezing the filesystem, |
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index b5ea418693b1..8b4d63ce8694 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h | |||
| @@ -23,8 +23,6 @@ struct bhv_vattr; | |||
| 23 | struct xfs_iomap; | 23 | struct xfs_iomap; |
| 24 | struct attrlist_cursor_kern; | 24 | struct attrlist_cursor_kern; |
| 25 | 25 | ||
| 26 | typedef struct dentry bhv_vname_t; | ||
| 27 | typedef __u64 bhv_vnumber_t; | ||
| 28 | typedef struct inode bhv_vnode_t; | 26 | typedef struct inode bhv_vnode_t; |
| 29 | 27 | ||
| 30 | #define VN_ISLNK(vp) S_ISLNK((vp)->i_mode) | 28 | #define VN_ISLNK(vp) S_ISLNK((vp)->i_mode) |
| @@ -46,18 +44,6 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode) | |||
| 46 | } | 44 | } |
| 47 | 45 | ||
| 48 | /* | 46 | /* |
| 49 | * Values for the vop_rwlock/rwunlock flags parameter. | ||
| 50 | */ | ||
| 51 | typedef enum bhv_vrwlock { | ||
| 52 | VRWLOCK_NONE, | ||
| 53 | VRWLOCK_READ, | ||
| 54 | VRWLOCK_WRITE, | ||
| 55 | VRWLOCK_WRITE_DIRECT, | ||
| 56 | VRWLOCK_TRY_READ, | ||
| 57 | VRWLOCK_TRY_WRITE | ||
| 58 | } bhv_vrwlock_t; | ||
| 59 | |||
| 60 | /* | ||
| 61 | * Return values for xfs_inactive. A return value of | 47 | * Return values for xfs_inactive. A return value of |
| 62 | * VN_INACTIVE_NOCACHE implies that the file system behavior | 48 | * VN_INACTIVE_NOCACHE implies that the file system behavior |
| 63 | * has disassociated its state and bhv_desc_t from the vnode. | 49 | * has disassociated its state and bhv_desc_t from the vnode. |
| @@ -73,12 +59,9 @@ typedef enum bhv_vrwlock { | |||
| 73 | #define IO_INVIS 0x00020 /* don't update inode timestamps */ | 59 | #define IO_INVIS 0x00020 /* don't update inode timestamps */ |
| 74 | 60 | ||
| 75 | /* | 61 | /* |
| 76 | * Flags for vop_iflush call | 62 | * Flags for xfs_inode_flush |
| 77 | */ | 63 | */ |
| 78 | #define FLUSH_SYNC 1 /* wait for flush to complete */ | 64 | #define FLUSH_SYNC 1 /* wait for flush to complete */ |
| 79 | #define FLUSH_INODE 2 /* flush the inode itself */ | ||
| 80 | #define FLUSH_LOG 4 /* force the last log entry for | ||
| 81 | * this inode out to disk */ | ||
| 82 | 65 | ||
| 83 | /* | 66 | /* |
| 84 | * Flush/Invalidate options for vop_toss/flush/flushinval_pages. | 67 | * Flush/Invalidate options for vop_toss/flush/flushinval_pages. |
| @@ -226,13 +209,6 @@ static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp) | |||
| 226 | } | 209 | } |
| 227 | 210 | ||
| 228 | /* | 211 | /* |
| 229 | * Vname handling macros. | ||
| 230 | */ | ||
| 231 | #define VNAME(dentry) ((char *) (dentry)->d_name.name) | ||
| 232 | #define VNAMELEN(dentry) ((dentry)->d_name.len) | ||
| 233 | #define VNAME_TO_VNODE(dentry) (vn_from_inode((dentry)->d_inode)) | ||
| 234 | |||
| 235 | /* | ||
| 236 | * Dealing with bad inodes | 212 | * Dealing with bad inodes |
| 237 | */ | 213 | */ |
| 238 | static inline int VN_BAD(bhv_vnode_t *vp) | 214 | static inline int VN_BAD(bhv_vnode_t *vp) |
| @@ -303,9 +279,9 @@ extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *); | |||
| 303 | extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); | 279 | extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); |
| 304 | extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); | 280 | extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); |
| 305 | #define xfs_itrace_entry(ip) \ | 281 | #define xfs_itrace_entry(ip) \ |
| 306 | _xfs_itrace_entry(ip, __FUNCTION__, (inst_t *)__return_address) | 282 | _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address) |
| 307 | #define xfs_itrace_exit(ip) \ | 283 | #define xfs_itrace_exit(ip) \ |
| 308 | _xfs_itrace_exit(ip, __FUNCTION__, (inst_t *)__return_address) | 284 | _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address) |
| 309 | #define xfs_itrace_exit_tag(ip, tag) \ | 285 | #define xfs_itrace_exit_tag(ip, tag) \ |
| 310 | _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) | 286 | _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) |
| 311 | #define xfs_itrace_ref(ip) \ | 287 | #define xfs_itrace_ref(ip) \ |
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 665babcca6a6..631ebb31b295 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
| @@ -1291,7 +1291,7 @@ xfs_qm_dqflush( | |||
| 1291 | if (flags & XFS_QMOPT_DELWRI) { | 1291 | if (flags & XFS_QMOPT_DELWRI) { |
| 1292 | xfs_bdwrite(mp, bp); | 1292 | xfs_bdwrite(mp, bp); |
| 1293 | } else if (flags & XFS_QMOPT_ASYNC) { | 1293 | } else if (flags & XFS_QMOPT_ASYNC) { |
| 1294 | xfs_bawrite(mp, bp); | 1294 | error = xfs_bawrite(mp, bp); |
| 1295 | } else { | 1295 | } else { |
| 1296 | error = xfs_bwrite(mp, bp); | 1296 | error = xfs_bwrite(mp, bp); |
| 1297 | } | 1297 | } |
| @@ -1439,9 +1439,7 @@ xfs_qm_dqpurge( | |||
| 1439 | uint flags) | 1439 | uint flags) |
| 1440 | { | 1440 | { |
| 1441 | xfs_dqhash_t *thishash; | 1441 | xfs_dqhash_t *thishash; |
| 1442 | xfs_mount_t *mp; | 1442 | xfs_mount_t *mp = dqp->q_mount; |
| 1443 | |||
| 1444 | mp = dqp->q_mount; | ||
| 1445 | 1443 | ||
| 1446 | ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); | 1444 | ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); |
| 1447 | ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash)); | 1445 | ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash)); |
| @@ -1485,6 +1483,7 @@ xfs_qm_dqpurge( | |||
| 1485 | * we're unmounting, we do care, so we flush it and wait. | 1483 | * we're unmounting, we do care, so we flush it and wait. |
| 1486 | */ | 1484 | */ |
| 1487 | if (XFS_DQ_IS_DIRTY(dqp)) { | 1485 | if (XFS_DQ_IS_DIRTY(dqp)) { |
| 1486 | int error; | ||
| 1488 | xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY"); | 1487 | xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY"); |
| 1489 | /* dqflush unlocks dqflock */ | 1488 | /* dqflush unlocks dqflock */ |
| 1490 | /* | 1489 | /* |
| @@ -1495,7 +1494,10 @@ xfs_qm_dqpurge( | |||
| 1495 | * We don't care about getting disk errors here. We need | 1494 | * We don't care about getting disk errors here. We need |
| 1496 | * to purge this dquot anyway, so we go ahead regardless. | 1495 | * to purge this dquot anyway, so we go ahead regardless. |
| 1497 | */ | 1496 | */ |
| 1498 | (void) xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); | 1497 | error = xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); |
| 1498 | if (error) | ||
| 1499 | xfs_fs_cmn_err(CE_WARN, mp, | ||
| 1500 | "xfs_qm_dqpurge: dquot %p flush failed", dqp); | ||
| 1499 | xfs_dqflock(dqp); | 1501 | xfs_dqflock(dqp); |
| 1500 | } | 1502 | } |
| 1501 | ASSERT(dqp->q_pincount == 0); | 1503 | ASSERT(dqp->q_pincount == 0); |
| @@ -1580,12 +1582,18 @@ xfs_qm_dqflock_pushbuf_wait( | |||
| 1580 | XFS_INCORE_TRYLOCK); | 1582 | XFS_INCORE_TRYLOCK); |
| 1581 | if (bp != NULL) { | 1583 | if (bp != NULL) { |
| 1582 | if (XFS_BUF_ISDELAYWRITE(bp)) { | 1584 | if (XFS_BUF_ISDELAYWRITE(bp)) { |
| 1585 | int error; | ||
| 1583 | if (XFS_BUF_ISPINNED(bp)) { | 1586 | if (XFS_BUF_ISPINNED(bp)) { |
| 1584 | xfs_log_force(dqp->q_mount, | 1587 | xfs_log_force(dqp->q_mount, |
| 1585 | (xfs_lsn_t)0, | 1588 | (xfs_lsn_t)0, |
| 1586 | XFS_LOG_FORCE); | 1589 | XFS_LOG_FORCE); |
| 1587 | } | 1590 | } |
| 1588 | xfs_bawrite(dqp->q_mount, bp); | 1591 | error = xfs_bawrite(dqp->q_mount, bp); |
| 1592 | if (error) | ||
| 1593 | xfs_fs_cmn_err(CE_WARN, dqp->q_mount, | ||
| 1594 | "xfs_qm_dqflock_pushbuf_wait: " | ||
| 1595 | "pushbuf error %d on dqp %p, bp %p", | ||
| 1596 | error, dqp, bp); | ||
| 1589 | } else { | 1597 | } else { |
| 1590 | xfs_buf_relse(bp); | 1598 | xfs_buf_relse(bp); |
| 1591 | } | 1599 | } |
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 1800e8d1f646..36e05ca78412 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c | |||
| @@ -146,6 +146,7 @@ xfs_qm_dquot_logitem_push( | |||
| 146 | xfs_dq_logitem_t *logitem) | 146 | xfs_dq_logitem_t *logitem) |
| 147 | { | 147 | { |
| 148 | xfs_dquot_t *dqp; | 148 | xfs_dquot_t *dqp; |
| 149 | int error; | ||
| 149 | 150 | ||
| 150 | dqp = logitem->qli_dquot; | 151 | dqp = logitem->qli_dquot; |
| 151 | 152 | ||
| @@ -161,7 +162,11 @@ xfs_qm_dquot_logitem_push( | |||
| 161 | * lock without sleeping, then there must not have been | 162 | * lock without sleeping, then there must not have been |
| 162 | * anyone in the process of flushing the dquot. | 163 | * anyone in the process of flushing the dquot. |
| 163 | */ | 164 | */ |
| 164 | xfs_qm_dqflush(dqp, XFS_B_DELWRI); | 165 | error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); |
| 166 | if (error) | ||
| 167 | xfs_fs_cmn_err(CE_WARN, dqp->q_mount, | ||
| 168 | "xfs_qm_dquot_logitem_push: push error %d on dqp %p", | ||
| 169 | error, dqp); | ||
| 165 | xfs_dqunlock(dqp); | 170 | xfs_dqunlock(dqp); |
| 166 | } | 171 | } |
| 167 | 172 | ||
| @@ -262,11 +267,16 @@ xfs_qm_dquot_logitem_pushbuf( | |||
| 262 | XFS_LOG_FORCE); | 267 | XFS_LOG_FORCE); |
| 263 | } | 268 | } |
| 264 | if (dopush) { | 269 | if (dopush) { |
| 270 | int error; | ||
| 265 | #ifdef XFSRACEDEBUG | 271 | #ifdef XFSRACEDEBUG |
| 266 | delay_for_intr(); | 272 | delay_for_intr(); |
| 267 | delay(300); | 273 | delay(300); |
| 268 | #endif | 274 | #endif |
| 269 | xfs_bawrite(mp, bp); | 275 | error = xfs_bawrite(mp, bp); |
| 276 | if (error) | ||
| 277 | xfs_fs_cmn_err(CE_WARN, mp, | ||
| 278 | "xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p", | ||
| 279 | error, qip, bp); | ||
| 270 | } else { | 280 | } else { |
| 271 | xfs_buf_relse(bp); | 281 | xfs_buf_relse(bp); |
| 272 | } | 282 | } |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 8e9c5ae6504d..40ea56409561 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
| @@ -304,8 +304,11 @@ xfs_qm_unmount_quotadestroy( | |||
| 304 | * necessary data structures like quotainfo. This is also responsible for | 304 | * necessary data structures like quotainfo. This is also responsible for |
| 305 | * running a quotacheck as necessary. We are guaranteed that the superblock | 305 | * running a quotacheck as necessary. We are guaranteed that the superblock |
| 306 | * is consistently read in at this point. | 306 | * is consistently read in at this point. |
| 307 | * | ||
| 308 | * If we fail here, the mount will continue with quota turned off. We don't | ||
| 309 | * need to inidicate success or failure at all. | ||
| 307 | */ | 310 | */ |
| 308 | int | 311 | void |
| 309 | xfs_qm_mount_quotas( | 312 | xfs_qm_mount_quotas( |
| 310 | xfs_mount_t *mp, | 313 | xfs_mount_t *mp, |
| 311 | int mfsi_flags) | 314 | int mfsi_flags) |
| @@ -313,7 +316,6 @@ xfs_qm_mount_quotas( | |||
| 313 | int error = 0; | 316 | int error = 0; |
| 314 | uint sbf; | 317 | uint sbf; |
| 315 | 318 | ||
| 316 | |||
| 317 | /* | 319 | /* |
| 318 | * If quotas on realtime volumes is not supported, we disable | 320 | * If quotas on realtime volumes is not supported, we disable |
| 319 | * quotas immediately. | 321 | * quotas immediately. |
| @@ -332,7 +334,8 @@ xfs_qm_mount_quotas( | |||
| 332 | * Allocate the quotainfo structure inside the mount struct, and | 334 | * Allocate the quotainfo structure inside the mount struct, and |
| 333 | * create quotainode(s), and change/rev superblock if necessary. | 335 | * create quotainode(s), and change/rev superblock if necessary. |
| 334 | */ | 336 | */ |
| 335 | if ((error = xfs_qm_init_quotainfo(mp))) { | 337 | error = xfs_qm_init_quotainfo(mp); |
| 338 | if (error) { | ||
| 336 | /* | 339 | /* |
| 337 | * We must turn off quotas. | 340 | * We must turn off quotas. |
| 338 | */ | 341 | */ |
| @@ -344,12 +347,11 @@ xfs_qm_mount_quotas( | |||
| 344 | * If any of the quotas are not consistent, do a quotacheck. | 347 | * If any of the quotas are not consistent, do a quotacheck. |
| 345 | */ | 348 | */ |
| 346 | if (XFS_QM_NEED_QUOTACHECK(mp) && | 349 | if (XFS_QM_NEED_QUOTACHECK(mp) && |
| 347 | !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { | 350 | !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { |
| 348 | if ((error = xfs_qm_quotacheck(mp))) { | 351 | error = xfs_qm_quotacheck(mp); |
| 349 | /* Quotacheck has failed and quotas have | 352 | if (error) { |
| 350 | * been disabled. | 353 | /* Quotacheck failed and disabled quotas. */ |
| 351 | */ | 354 | return; |
| 352 | return XFS_ERROR(error); | ||
| 353 | } | 355 | } |
| 354 | } | 356 | } |
| 355 | /* | 357 | /* |
| @@ -357,12 +359,10 @@ xfs_qm_mount_quotas( | |||
| 357 | * quotachecked status, since we won't be doing accounting for | 359 | * quotachecked status, since we won't be doing accounting for |
| 358 | * that type anymore. | 360 | * that type anymore. |
| 359 | */ | 361 | */ |
| 360 | if (!XFS_IS_UQUOTA_ON(mp)) { | 362 | if (!XFS_IS_UQUOTA_ON(mp)) |
| 361 | mp->m_qflags &= ~XFS_UQUOTA_CHKD; | 363 | mp->m_qflags &= ~XFS_UQUOTA_CHKD; |
| 362 | } | 364 | if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) |
| 363 | if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) { | ||
| 364 | mp->m_qflags &= ~XFS_OQUOTA_CHKD; | 365 | mp->m_qflags &= ~XFS_OQUOTA_CHKD; |
| 365 | } | ||
| 366 | 366 | ||
| 367 | write_changes: | 367 | write_changes: |
| 368 | /* | 368 | /* |
| @@ -392,7 +392,7 @@ xfs_qm_mount_quotas( | |||
| 392 | xfs_fs_cmn_err(CE_WARN, mp, | 392 | xfs_fs_cmn_err(CE_WARN, mp, |
| 393 | "Failed to initialize disk quotas."); | 393 | "Failed to initialize disk quotas."); |
| 394 | } | 394 | } |
| 395 | return XFS_ERROR(error); | 395 | return; |
| 396 | } | 396 | } |
| 397 | 397 | ||
| 398 | /* | 398 | /* |
| @@ -1438,7 +1438,7 @@ xfs_qm_qino_alloc( | |||
| 1438 | } | 1438 | } |
| 1439 | 1439 | ||
| 1440 | 1440 | ||
| 1441 | STATIC int | 1441 | STATIC void |
| 1442 | xfs_qm_reset_dqcounts( | 1442 | xfs_qm_reset_dqcounts( |
| 1443 | xfs_mount_t *mp, | 1443 | xfs_mount_t *mp, |
| 1444 | xfs_buf_t *bp, | 1444 | xfs_buf_t *bp, |
| @@ -1478,8 +1478,6 @@ xfs_qm_reset_dqcounts( | |||
| 1478 | ddq->d_rtbwarns = 0; | 1478 | ddq->d_rtbwarns = 0; |
| 1479 | ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); | 1479 | ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); |
| 1480 | } | 1480 | } |
| 1481 | |||
| 1482 | return 0; | ||
| 1483 | } | 1481 | } |
| 1484 | 1482 | ||
| 1485 | STATIC int | 1483 | STATIC int |
| @@ -1520,7 +1518,7 @@ xfs_qm_dqiter_bufs( | |||
| 1520 | if (error) | 1518 | if (error) |
| 1521 | break; | 1519 | break; |
| 1522 | 1520 | ||
| 1523 | (void) xfs_qm_reset_dqcounts(mp, bp, firstid, type); | 1521 | xfs_qm_reset_dqcounts(mp, bp, firstid, type); |
| 1524 | xfs_bdwrite(mp, bp); | 1522 | xfs_bdwrite(mp, bp); |
| 1525 | /* | 1523 | /* |
| 1526 | * goto the next block. | 1524 | * goto the next block. |
| @@ -1810,7 +1808,7 @@ xfs_qm_dqusage_adjust( | |||
| 1810 | * Now release the inode. This will send it to 'inactive', and | 1808 | * Now release the inode. This will send it to 'inactive', and |
| 1811 | * possibly even free blocks. | 1809 | * possibly even free blocks. |
| 1812 | */ | 1810 | */ |
| 1813 | VN_RELE(XFS_ITOV(ip)); | 1811 | IRELE(ip); |
| 1814 | 1812 | ||
| 1815 | /* | 1813 | /* |
| 1816 | * Goto next inode. | 1814 | * Goto next inode. |
| @@ -1880,6 +1878,14 @@ xfs_qm_quotacheck( | |||
| 1880 | } while (! done); | 1878 | } while (! done); |
| 1881 | 1879 | ||
| 1882 | /* | 1880 | /* |
| 1881 | * We've made all the changes that we need to make incore. | ||
| 1882 | * Flush them down to disk buffers if everything was updated | ||
| 1883 | * successfully. | ||
| 1884 | */ | ||
| 1885 | if (!error) | ||
| 1886 | error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI); | ||
| 1887 | |||
| 1888 | /* | ||
| 1883 | * We can get this error if we couldn't do a dquot allocation inside | 1889 | * We can get this error if we couldn't do a dquot allocation inside |
| 1884 | * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the | 1890 | * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the |
| 1885 | * dirty dquots that might be cached, we just want to get rid of them | 1891 | * dirty dquots that might be cached, we just want to get rid of them |
| @@ -1890,11 +1896,6 @@ xfs_qm_quotacheck( | |||
| 1890 | xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF); | 1896 | xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF); |
| 1891 | goto error_return; | 1897 | goto error_return; |
| 1892 | } | 1898 | } |
| 1893 | /* | ||
| 1894 | * We've made all the changes that we need to make incore. | ||
| 1895 | * Now flush_them down to disk buffers. | ||
| 1896 | */ | ||
| 1897 | xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI); | ||
| 1898 | 1899 | ||
| 1899 | /* | 1900 | /* |
| 1900 | * We didn't log anything, because if we crashed, we'll have to | 1901 | * We didn't log anything, because if we crashed, we'll have to |
| @@ -1926,7 +1927,10 @@ xfs_qm_quotacheck( | |||
| 1926 | ASSERT(mp->m_quotainfo != NULL); | 1927 | ASSERT(mp->m_quotainfo != NULL); |
| 1927 | ASSERT(xfs_Gqm != NULL); | 1928 | ASSERT(xfs_Gqm != NULL); |
| 1928 | xfs_qm_destroy_quotainfo(mp); | 1929 | xfs_qm_destroy_quotainfo(mp); |
| 1929 | (void)xfs_mount_reset_sbqflags(mp); | 1930 | if (xfs_mount_reset_sbqflags(mp)) { |
| 1931 | cmn_err(CE_WARN, "XFS quotacheck %s: " | ||
| 1932 | "Failed to reset quota flags.", mp->m_fsname); | ||
| 1933 | } | ||
| 1930 | } else { | 1934 | } else { |
| 1931 | cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); | 1935 | cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); |
| 1932 | } | 1936 | } |
| @@ -1968,7 +1972,7 @@ xfs_qm_init_quotainos( | |||
| 1968 | if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, | 1972 | if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, |
| 1969 | 0, 0, &gip, 0))) { | 1973 | 0, 0, &gip, 0))) { |
| 1970 | if (uip) | 1974 | if (uip) |
| 1971 | VN_RELE(XFS_ITOV(uip)); | 1975 | IRELE(uip); |
| 1972 | return XFS_ERROR(error); | 1976 | return XFS_ERROR(error); |
| 1973 | } | 1977 | } |
| 1974 | } | 1978 | } |
| @@ -1999,7 +2003,7 @@ xfs_qm_init_quotainos( | |||
| 1999 | sbflags | XFS_SB_GQUOTINO, flags); | 2003 | sbflags | XFS_SB_GQUOTINO, flags); |
| 2000 | if (error) { | 2004 | if (error) { |
| 2001 | if (uip) | 2005 | if (uip) |
| 2002 | VN_RELE(XFS_ITOV(uip)); | 2006 | IRELE(uip); |
| 2003 | 2007 | ||
| 2004 | return XFS_ERROR(error); | 2008 | return XFS_ERROR(error); |
| 2005 | } | 2009 | } |
| @@ -2093,12 +2097,17 @@ xfs_qm_shake_freelist( | |||
| 2093 | * dirty dquots. | 2097 | * dirty dquots. |
| 2094 | */ | 2098 | */ |
| 2095 | if (XFS_DQ_IS_DIRTY(dqp)) { | 2099 | if (XFS_DQ_IS_DIRTY(dqp)) { |
| 2100 | int error; | ||
| 2096 | xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY"); | 2101 | xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY"); |
| 2097 | /* | 2102 | /* |
| 2098 | * We flush it delayed write, so don't bother | 2103 | * We flush it delayed write, so don't bother |
| 2099 | * releasing the mplock. | 2104 | * releasing the mplock. |
| 2100 | */ | 2105 | */ |
| 2101 | (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); | 2106 | error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); |
| 2107 | if (error) { | ||
| 2108 | xfs_fs_cmn_err(CE_WARN, dqp->q_mount, | ||
| 2109 | "xfs_qm_dqflush_all: dquot %p flush failed", dqp); | ||
| 2110 | } | ||
| 2102 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ | 2111 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ |
| 2103 | dqp = dqp->dq_flnext; | 2112 | dqp = dqp->dq_flnext; |
| 2104 | continue; | 2113 | continue; |
| @@ -2265,12 +2274,17 @@ xfs_qm_dqreclaim_one(void) | |||
| 2265 | * dirty dquots. | 2274 | * dirty dquots. |
| 2266 | */ | 2275 | */ |
| 2267 | if (XFS_DQ_IS_DIRTY(dqp)) { | 2276 | if (XFS_DQ_IS_DIRTY(dqp)) { |
| 2277 | int error; | ||
| 2268 | xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY"); | 2278 | xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY"); |
| 2269 | /* | 2279 | /* |
| 2270 | * We flush it delayed write, so don't bother | 2280 | * We flush it delayed write, so don't bother |
| 2271 | * releasing the freelist lock. | 2281 | * releasing the freelist lock. |
| 2272 | */ | 2282 | */ |
| 2273 | (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); | 2283 | error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI); |
| 2284 | if (error) { | ||
| 2285 | xfs_fs_cmn_err(CE_WARN, dqp->q_mount, | ||
| 2286 | "xfs_qm_dqreclaim: dquot %p flush failed", dqp); | ||
| 2287 | } | ||
| 2274 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ | 2288 | xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ |
| 2275 | continue; | 2289 | continue; |
| 2276 | } | 2290 | } |
| @@ -2378,9 +2392,9 @@ xfs_qm_write_sb_changes( | |||
| 2378 | } | 2392 | } |
| 2379 | 2393 | ||
| 2380 | xfs_mod_sb(tp, flags); | 2394 | xfs_mod_sb(tp, flags); |
| 2381 | (void) xfs_trans_commit(tp, 0); | 2395 | error = xfs_trans_commit(tp, 0); |
| 2382 | 2396 | ||
| 2383 | return 0; | 2397 | return error; |
| 2384 | } | 2398 | } |
| 2385 | 2399 | ||
| 2386 | 2400 | ||
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index baf537c1c177..cd2300e374af 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h | |||
| @@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct { | |||
| 165 | #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) | 165 | #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) |
| 166 | 166 | ||
| 167 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); | 167 | extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); |
| 168 | extern int xfs_qm_mount_quotas(xfs_mount_t *, int); | 168 | extern void xfs_qm_mount_quotas(xfs_mount_t *, int); |
| 169 | extern int xfs_qm_quotacheck(xfs_mount_t *); | 169 | extern int xfs_qm_quotacheck(xfs_mount_t *); |
| 170 | extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); | 170 | extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); |
| 171 | extern int xfs_qm_unmount_quotas(xfs_mount_t *); | 171 | extern int xfs_qm_unmount_quotas(xfs_mount_t *); |
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h index a50ffabcf554..5b964fc0dc09 100644 --- a/fs/xfs/quota/xfs_qm_stats.h +++ b/fs/xfs/quota/xfs_qm_stats.h | |||
| @@ -45,8 +45,8 @@ extern void xfs_qm_cleanup_procfs(void); | |||
| 45 | 45 | ||
| 46 | # define XQM_STATS_INC(count) do { } while (0) | 46 | # define XQM_STATS_INC(count) do { } while (0) |
| 47 | 47 | ||
| 48 | static __inline void xfs_qm_init_procfs(void) { }; | 48 | static inline void xfs_qm_init_procfs(void) { }; |
| 49 | static __inline void xfs_qm_cleanup_procfs(void) { }; | 49 | static inline void xfs_qm_cleanup_procfs(void) { }; |
| 50 | 50 | ||
| 51 | #endif | 51 | #endif |
| 52 | 52 | ||
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index d2b8be7e75f9..8342823dbdc3 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
| @@ -279,9 +279,12 @@ xfs_qm_scall_quotaoff( | |||
| 279 | 279 | ||
| 280 | /* | 280 | /* |
| 281 | * Write the LI_QUOTAOFF log record, and do SB changes atomically, | 281 | * Write the LI_QUOTAOFF log record, and do SB changes atomically, |
| 282 | * and synchronously. | 282 | * and synchronously. If we fail to write, we should abort the |
| 283 | * operation as it cannot be recovered safely if we crash. | ||
| 283 | */ | 284 | */ |
| 284 | xfs_qm_log_quotaoff(mp, &qoffstart, flags); | 285 | error = xfs_qm_log_quotaoff(mp, &qoffstart, flags); |
| 286 | if (error) | ||
| 287 | goto out_error; | ||
| 285 | 288 | ||
| 286 | /* | 289 | /* |
| 287 | * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct | 290 | * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct |
| @@ -337,7 +340,12 @@ xfs_qm_scall_quotaoff( | |||
| 337 | * So, we have QUOTAOFF start and end logitems; the start | 340 | * So, we have QUOTAOFF start and end logitems; the start |
| 338 | * logitem won't get overwritten until the end logitem appears... | 341 | * logitem won't get overwritten until the end logitem appears... |
| 339 | */ | 342 | */ |
| 340 | xfs_qm_log_quotaoff_end(mp, qoffstart, flags); | 343 | error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags); |
| 344 | if (error) { | ||
| 345 | /* We're screwed now. Shutdown is the only option. */ | ||
| 346 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
| 347 | goto out_error; | ||
| 348 | } | ||
| 341 | 349 | ||
| 342 | /* | 350 | /* |
| 343 | * If quotas is completely disabled, close shop. | 351 | * If quotas is completely disabled, close shop. |
| @@ -361,6 +369,7 @@ xfs_qm_scall_quotaoff( | |||
| 361 | XFS_PURGE_INODE(XFS_QI_GQIP(mp)); | 369 | XFS_PURGE_INODE(XFS_QI_GQIP(mp)); |
| 362 | XFS_QI_GQIP(mp) = NULL; | 370 | XFS_QI_GQIP(mp) = NULL; |
| 363 | } | 371 | } |
| 372 | out_error: | ||
| 364 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); | 373 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); |
| 365 | 374 | ||
| 366 | return (error); | 375 | return (error); |
| @@ -371,12 +380,11 @@ xfs_qm_scall_trunc_qfiles( | |||
| 371 | xfs_mount_t *mp, | 380 | xfs_mount_t *mp, |
| 372 | uint flags) | 381 | uint flags) |
| 373 | { | 382 | { |
| 374 | int error; | 383 | int error = 0, error2 = 0; |
| 375 | xfs_inode_t *qip; | 384 | xfs_inode_t *qip; |
| 376 | 385 | ||
| 377 | if (!capable(CAP_SYS_ADMIN)) | 386 | if (!capable(CAP_SYS_ADMIN)) |
| 378 | return XFS_ERROR(EPERM); | 387 | return XFS_ERROR(EPERM); |
| 379 | error = 0; | ||
| 380 | if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { | 388 | if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { |
| 381 | qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); | 389 | qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); |
| 382 | return XFS_ERROR(EINVAL); | 390 | return XFS_ERROR(EINVAL); |
| @@ -384,22 +392,22 @@ xfs_qm_scall_trunc_qfiles( | |||
| 384 | 392 | ||
| 385 | if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { | 393 | if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { |
| 386 | error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); | 394 | error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); |
| 387 | if (! error) { | 395 | if (!error) { |
| 388 | (void) xfs_truncate_file(mp, qip); | 396 | error = xfs_truncate_file(mp, qip); |
| 389 | VN_RELE(XFS_ITOV(qip)); | 397 | IRELE(qip); |
| 390 | } | 398 | } |
| 391 | } | 399 | } |
| 392 | 400 | ||
| 393 | if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && | 401 | if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && |
| 394 | mp->m_sb.sb_gquotino != NULLFSINO) { | 402 | mp->m_sb.sb_gquotino != NULLFSINO) { |
| 395 | error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); | 403 | error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); |
| 396 | if (! error) { | 404 | if (!error2) { |
| 397 | (void) xfs_truncate_file(mp, qip); | 405 | error2 = xfs_truncate_file(mp, qip); |
| 398 | VN_RELE(XFS_ITOV(qip)); | 406 | IRELE(qip); |
| 399 | } | 407 | } |
| 400 | } | 408 | } |
| 401 | 409 | ||
| 402 | return (error); | 410 | return error ? error : error2; |
| 403 | } | 411 | } |
| 404 | 412 | ||
| 405 | 413 | ||
| @@ -552,13 +560,13 @@ xfs_qm_scall_getqstat( | |||
| 552 | out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; | 560 | out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; |
| 553 | out->qs_uquota.qfs_nextents = uip->i_d.di_nextents; | 561 | out->qs_uquota.qfs_nextents = uip->i_d.di_nextents; |
| 554 | if (tempuqip) | 562 | if (tempuqip) |
| 555 | VN_RELE(XFS_ITOV(uip)); | 563 | IRELE(uip); |
| 556 | } | 564 | } |
| 557 | if (gip) { | 565 | if (gip) { |
| 558 | out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks; | 566 | out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks; |
| 559 | out->qs_gquota.qfs_nextents = gip->i_d.di_nextents; | 567 | out->qs_gquota.qfs_nextents = gip->i_d.di_nextents; |
| 560 | if (tempgqip) | 568 | if (tempgqip) |
| 561 | VN_RELE(XFS_ITOV(gip)); | 569 | IRELE(gip); |
| 562 | } | 570 | } |
| 563 | if (mp->m_quotainfo) { | 571 | if (mp->m_quotainfo) { |
| 564 | out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp); | 572 | out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp); |
| @@ -726,12 +734,12 @@ xfs_qm_scall_setqlim( | |||
| 726 | xfs_trans_log_dquot(tp, dqp); | 734 | xfs_trans_log_dquot(tp, dqp); |
| 727 | 735 | ||
| 728 | xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT"); | 736 | xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT"); |
| 729 | xfs_trans_commit(tp, 0); | 737 | error = xfs_trans_commit(tp, 0); |
| 730 | xfs_qm_dqprint(dqp); | 738 | xfs_qm_dqprint(dqp); |
| 731 | xfs_qm_dqrele(dqp); | 739 | xfs_qm_dqrele(dqp); |
| 732 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); | 740 | mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); |
| 733 | 741 | ||
| 734 | return (0); | 742 | return error; |
| 735 | } | 743 | } |
| 736 | 744 | ||
| 737 | STATIC int | 745 | STATIC int |
| @@ -1095,7 +1103,7 @@ again: | |||
| 1095 | * inactive code in hell. | 1103 | * inactive code in hell. |
| 1096 | */ | 1104 | */ |
| 1097 | if (vnode_refd) | 1105 | if (vnode_refd) |
| 1098 | VN_RELE(vp); | 1106 | IRELE(ip); |
| 1099 | XFS_MOUNT_ILOCK(mp); | 1107 | XFS_MOUNT_ILOCK(mp); |
| 1100 | /* | 1108 | /* |
| 1101 | * If an inode was inserted or removed, we gotta | 1109 | * If an inode was inserted or removed, we gotta |
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c index 129067cfcb86..0b75d302508f 100644 --- a/fs/xfs/support/ktrace.c +++ b/fs/xfs/support/ktrace.c | |||
| @@ -24,7 +24,7 @@ static int ktrace_zentries; | |||
| 24 | void __init | 24 | void __init |
| 25 | ktrace_init(int zentries) | 25 | ktrace_init(int zentries) |
| 26 | { | 26 | { |
| 27 | ktrace_zentries = zentries; | 27 | ktrace_zentries = roundup_pow_of_two(zentries); |
| 28 | 28 | ||
| 29 | ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t), | 29 | ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t), |
| 30 | "ktrace_hdr"); | 30 | "ktrace_hdr"); |
| @@ -47,13 +47,16 @@ ktrace_uninit(void) | |||
| 47 | * ktrace_alloc() | 47 | * ktrace_alloc() |
| 48 | * | 48 | * |
| 49 | * Allocate a ktrace header and enough buffering for the given | 49 | * Allocate a ktrace header and enough buffering for the given |
| 50 | * number of entries. | 50 | * number of entries. Round the number of entries up to a |
| 51 | * power of 2 so we can do fast masking to get the index from | ||
| 52 | * the atomic index counter. | ||
| 51 | */ | 53 | */ |
| 52 | ktrace_t * | 54 | ktrace_t * |
| 53 | ktrace_alloc(int nentries, unsigned int __nocast sleep) | 55 | ktrace_alloc(int nentries, unsigned int __nocast sleep) |
| 54 | { | 56 | { |
| 55 | ktrace_t *ktp; | 57 | ktrace_t *ktp; |
| 56 | ktrace_entry_t *ktep; | 58 | ktrace_entry_t *ktep; |
| 59 | int entries; | ||
| 57 | 60 | ||
| 58 | ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep); | 61 | ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep); |
| 59 | 62 | ||
| @@ -70,11 +73,12 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep) | |||
| 70 | /* | 73 | /* |
| 71 | * Special treatment for buffers with the ktrace_zentries entries | 74 | * Special treatment for buffers with the ktrace_zentries entries |
| 72 | */ | 75 | */ |
| 73 | if (nentries == ktrace_zentries) { | 76 | entries = roundup_pow_of_two(nentries); |
| 77 | if (entries == ktrace_zentries) { | ||
| 74 | ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone, | 78 | ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone, |
| 75 | sleep); | 79 | sleep); |
| 76 | } else { | 80 | } else { |
| 77 | ktep = (ktrace_entry_t*)kmem_zalloc((nentries * sizeof(*ktep)), | 81 | ktep = (ktrace_entry_t*)kmem_zalloc((entries * sizeof(*ktep)), |
| 78 | sleep | KM_LARGE); | 82 | sleep | KM_LARGE); |
| 79 | } | 83 | } |
| 80 | 84 | ||
| @@ -91,8 +95,10 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep) | |||
| 91 | } | 95 | } |
| 92 | 96 | ||
| 93 | ktp->kt_entries = ktep; | 97 | ktp->kt_entries = ktep; |
| 94 | ktp->kt_nentries = nentries; | 98 | ktp->kt_nentries = entries; |
| 95 | ktp->kt_index = 0; | 99 | ASSERT(is_power_of_2(entries)); |
| 100 | ktp->kt_index_mask = entries - 1; | ||
| 101 | atomic_set(&ktp->kt_index, 0); | ||
| 96 | ktp->kt_rollover = 0; | 102 | ktp->kt_rollover = 0; |
| 97 | return ktp; | 103 | return ktp; |
| 98 | } | 104 | } |
| @@ -151,8 +157,6 @@ ktrace_enter( | |||
| 151 | void *val14, | 157 | void *val14, |
| 152 | void *val15) | 158 | void *val15) |
| 153 | { | 159 | { |
| 154 | static DEFINE_SPINLOCK(wrap_lock); | ||
| 155 | unsigned long flags; | ||
| 156 | int index; | 160 | int index; |
| 157 | ktrace_entry_t *ktep; | 161 | ktrace_entry_t *ktep; |
| 158 | 162 | ||
| @@ -161,12 +165,8 @@ ktrace_enter( | |||
| 161 | /* | 165 | /* |
| 162 | * Grab an entry by pushing the index up to the next one. | 166 | * Grab an entry by pushing the index up to the next one. |
| 163 | */ | 167 | */ |
| 164 | spin_lock_irqsave(&wrap_lock, flags); | 168 | index = atomic_add_return(1, &ktp->kt_index); |
| 165 | index = ktp->kt_index; | 169 | index = (index - 1) & ktp->kt_index_mask; |
| 166 | if (++ktp->kt_index == ktp->kt_nentries) | ||
| 167 | ktp->kt_index = 0; | ||
| 168 | spin_unlock_irqrestore(&wrap_lock, flags); | ||
| 169 | |||
| 170 | if (!ktp->kt_rollover && index == ktp->kt_nentries - 1) | 170 | if (!ktp->kt_rollover && index == ktp->kt_nentries - 1) |
| 171 | ktp->kt_rollover = 1; | 171 | ktp->kt_rollover = 1; |
| 172 | 172 | ||
| @@ -199,11 +199,12 @@ int | |||
| 199 | ktrace_nentries( | 199 | ktrace_nentries( |
| 200 | ktrace_t *ktp) | 200 | ktrace_t *ktp) |
| 201 | { | 201 | { |
| 202 | if (ktp == NULL) { | 202 | int index; |
| 203 | if (ktp == NULL) | ||
| 203 | return 0; | 204 | return 0; |
| 204 | } | ||
| 205 | 205 | ||
| 206 | return (ktp->kt_rollover ? ktp->kt_nentries : ktp->kt_index); | 206 | index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask; |
| 207 | return (ktp->kt_rollover ? ktp->kt_nentries : index); | ||
| 207 | } | 208 | } |
| 208 | 209 | ||
| 209 | /* | 210 | /* |
| @@ -228,7 +229,7 @@ ktrace_first(ktrace_t *ktp, ktrace_snap_t *ktsp) | |||
| 228 | int nentries; | 229 | int nentries; |
| 229 | 230 | ||
| 230 | if (ktp->kt_rollover) | 231 | if (ktp->kt_rollover) |
| 231 | index = ktp->kt_index; | 232 | index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask; |
| 232 | else | 233 | else |
| 233 | index = 0; | 234 | index = 0; |
| 234 | 235 | ||
diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h index 56e72b40a859..741d6947ca60 100644 --- a/fs/xfs/support/ktrace.h +++ b/fs/xfs/support/ktrace.h | |||
| @@ -30,7 +30,8 @@ typedef struct ktrace_entry { | |||
| 30 | */ | 30 | */ |
| 31 | typedef struct ktrace { | 31 | typedef struct ktrace { |
| 32 | int kt_nentries; /* number of entries in trace buf */ | 32 | int kt_nentries; /* number of entries in trace buf */ |
| 33 | int kt_index; /* current index in entries */ | 33 | atomic_t kt_index; /* current index in entries */ |
| 34 | unsigned int kt_index_mask; | ||
| 34 | int kt_rollover; | 35 | int kt_rollover; |
| 35 | ktrace_entry_t *kt_entries; /* buffer of entries */ | 36 | ktrace_entry_t *kt_entries; /* buffer of entries */ |
| 36 | } ktrace_t; | 37 | } ktrace_t; |
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index 540e4c989825..765aaf65e2d3 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | #define STATIC | 22 | #define STATIC |
| 23 | #define DEBUG 1 | 23 | #define DEBUG 1 |
| 24 | #define XFS_BUF_LOCK_TRACKING 1 | 24 | #define XFS_BUF_LOCK_TRACKING 1 |
| 25 | /* #define QUOTADEBUG 1 */ | 25 | #define QUOTADEBUG 1 |
| 26 | #endif | 26 | #endif |
| 27 | 27 | ||
| 28 | #ifdef CONFIG_XFS_TRACE | 28 | #ifdef CONFIG_XFS_TRACE |
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 7272fe39a92d..8e130b9720ae 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c | |||
| @@ -307,12 +307,13 @@ xfs_acl_vset( | |||
| 307 | 307 | ||
| 308 | VN_HOLD(vp); | 308 | VN_HOLD(vp); |
| 309 | error = xfs_acl_allow_set(vp, kind); | 309 | error = xfs_acl_allow_set(vp, kind); |
| 310 | if (error) | ||
| 311 | goto out; | ||
| 312 | 310 | ||
| 313 | /* Incoming ACL exists, set file mode based on its value */ | 311 | /* Incoming ACL exists, set file mode based on its value */ |
| 314 | if (kind == _ACL_TYPE_ACCESS) | 312 | if (!error && kind == _ACL_TYPE_ACCESS) |
| 315 | xfs_acl_setmode(vp, xfs_acl, &basicperms); | 313 | error = xfs_acl_setmode(vp, xfs_acl, &basicperms); |
| 314 | |||
| 315 | if (error) | ||
| 316 | goto out; | ||
| 316 | 317 | ||
| 317 | /* | 318 | /* |
| 318 | * If we have more than std unix permissions, set up the actual attr. | 319 | * If we have more than std unix permissions, set up the actual attr. |
| @@ -323,7 +324,7 @@ xfs_acl_vset( | |||
| 323 | if (!basicperms) { | 324 | if (!basicperms) { |
| 324 | xfs_acl_set_attr(vp, xfs_acl, kind, &error); | 325 | xfs_acl_set_attr(vp, xfs_acl, kind, &error); |
| 325 | } else { | 326 | } else { |
| 326 | xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); | 327 | error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); |
| 327 | } | 328 | } |
| 328 | 329 | ||
| 329 | out: | 330 | out: |
| @@ -707,7 +708,9 @@ xfs_acl_inherit( | |||
| 707 | 708 | ||
| 708 | memcpy(cacl, pdaclp, sizeof(xfs_acl_t)); | 709 | memcpy(cacl, pdaclp, sizeof(xfs_acl_t)); |
| 709 | xfs_acl_filter_mode(mode, cacl); | 710 | xfs_acl_filter_mode(mode, cacl); |
| 710 | xfs_acl_setmode(vp, cacl, &basicperms); | 711 | error = xfs_acl_setmode(vp, cacl, &basicperms); |
| 712 | if (error) | ||
| 713 | goto out_error; | ||
| 711 | 714 | ||
| 712 | /* | 715 | /* |
| 713 | * Set the Default and Access ACL on the file. The mode is already | 716 | * Set the Default and Access ACL on the file. The mode is already |
| @@ -720,6 +723,7 @@ xfs_acl_inherit( | |||
| 720 | xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); | 723 | xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error); |
| 721 | if (!error && !basicperms) | 724 | if (!error && !basicperms) |
| 722 | xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); | 725 | xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error); |
| 726 | out_error: | ||
| 723 | _ACL_FREE(cacl); | 727 | _ACL_FREE(cacl); |
| 724 | return error; | 728 | return error; |
| 725 | } | 729 | } |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index bdbfbbee4959..1956f83489f1 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
| @@ -45,7 +45,7 @@ | |||
| 45 | #define XFSA_FIXUP_BNO_OK 1 | 45 | #define XFSA_FIXUP_BNO_OK 1 |
| 46 | #define XFSA_FIXUP_CNT_OK 2 | 46 | #define XFSA_FIXUP_CNT_OK 2 |
| 47 | 47 | ||
| 48 | STATIC int | 48 | STATIC void |
| 49 | xfs_alloc_search_busy(xfs_trans_t *tp, | 49 | xfs_alloc_search_busy(xfs_trans_t *tp, |
| 50 | xfs_agnumber_t agno, | 50 | xfs_agnumber_t agno, |
| 51 | xfs_agblock_t bno, | 51 | xfs_agblock_t bno, |
| @@ -55,24 +55,24 @@ xfs_alloc_search_busy(xfs_trans_t *tp, | |||
| 55 | ktrace_t *xfs_alloc_trace_buf; | 55 | ktrace_t *xfs_alloc_trace_buf; |
| 56 | 56 | ||
| 57 | #define TRACE_ALLOC(s,a) \ | 57 | #define TRACE_ALLOC(s,a) \ |
| 58 | xfs_alloc_trace_alloc(__FUNCTION__, s, a, __LINE__) | 58 | xfs_alloc_trace_alloc(__func__, s, a, __LINE__) |
| 59 | #define TRACE_FREE(s,a,b,x,f) \ | 59 | #define TRACE_FREE(s,a,b,x,f) \ |
| 60 | xfs_alloc_trace_free(__FUNCTION__, s, mp, a, b, x, f, __LINE__) | 60 | xfs_alloc_trace_free(__func__, s, mp, a, b, x, f, __LINE__) |
| 61 | #define TRACE_MODAGF(s,a,f) \ | 61 | #define TRACE_MODAGF(s,a,f) \ |
| 62 | xfs_alloc_trace_modagf(__FUNCTION__, s, mp, a, f, __LINE__) | 62 | xfs_alloc_trace_modagf(__func__, s, mp, a, f, __LINE__) |
| 63 | #define TRACE_BUSY(__FUNCTION__,s,ag,agb,l,sl,tp) \ | 63 | #define TRACE_BUSY(__func__,s,ag,agb,l,sl,tp) \ |
| 64 | xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) | 64 | xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) |
| 65 | #define TRACE_UNBUSY(__FUNCTION__,s,ag,sl,tp) \ | 65 | #define TRACE_UNBUSY(__func__,s,ag,sl,tp) \ |
| 66 | xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) | 66 | xfs_alloc_trace_busy(__func__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) |
| 67 | #define TRACE_BUSYSEARCH(__FUNCTION__,s,ag,agb,l,sl,tp) \ | 67 | #define TRACE_BUSYSEARCH(__func__,s,ag,agb,l,tp) \ |
| 68 | xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) | 68 | xfs_alloc_trace_busy(__func__, s, mp, ag, agb, l, 0, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) |
| 69 | #else | 69 | #else |
| 70 | #define TRACE_ALLOC(s,a) | 70 | #define TRACE_ALLOC(s,a) |
| 71 | #define TRACE_FREE(s,a,b,x,f) | 71 | #define TRACE_FREE(s,a,b,x,f) |
| 72 | #define TRACE_MODAGF(s,a,f) | 72 | #define TRACE_MODAGF(s,a,f) |
| 73 | #define TRACE_BUSY(s,a,ag,agb,l,sl,tp) | 73 | #define TRACE_BUSY(s,a,ag,agb,l,sl,tp) |
| 74 | #define TRACE_UNBUSY(fname,s,ag,sl,tp) | 74 | #define TRACE_UNBUSY(fname,s,ag,sl,tp) |
| 75 | #define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp) | 75 | #define TRACE_BUSYSEARCH(fname,s,ag,agb,l,tp) |
| 76 | #endif /* XFS_ALLOC_TRACE */ | 76 | #endif /* XFS_ALLOC_TRACE */ |
| 77 | 77 | ||
| 78 | /* | 78 | /* |
| @@ -93,7 +93,7 @@ STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, | |||
| 93 | * Compute aligned version of the found extent. | 93 | * Compute aligned version of the found extent. |
| 94 | * Takes alignment and min length into account. | 94 | * Takes alignment and min length into account. |
| 95 | */ | 95 | */ |
| 96 | STATIC int /* success (>= minlen) */ | 96 | STATIC void |
| 97 | xfs_alloc_compute_aligned( | 97 | xfs_alloc_compute_aligned( |
| 98 | xfs_agblock_t foundbno, /* starting block in found extent */ | 98 | xfs_agblock_t foundbno, /* starting block in found extent */ |
| 99 | xfs_extlen_t foundlen, /* length in found extent */ | 99 | xfs_extlen_t foundlen, /* length in found extent */ |
| @@ -116,7 +116,6 @@ xfs_alloc_compute_aligned( | |||
| 116 | } | 116 | } |
| 117 | *resbno = bno; | 117 | *resbno = bno; |
| 118 | *reslen = len; | 118 | *reslen = len; |
| 119 | return len >= minlen; | ||
| 120 | } | 119 | } |
| 121 | 120 | ||
| 122 | /* | 121 | /* |
| @@ -837,9 +836,9 @@ xfs_alloc_ag_vextent_near( | |||
| 837 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) | 836 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) |
| 838 | goto error0; | 837 | goto error0; |
| 839 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 838 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
| 840 | if (!xfs_alloc_compute_aligned(ltbno, ltlen, | 839 | xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, |
| 841 | args->alignment, args->minlen, | 840 | args->minlen, <bnoa, <lena); |
| 842 | <bnoa, <lena)) | 841 | if (ltlena < args->minlen) |
| 843 | continue; | 842 | continue; |
| 844 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 843 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
| 845 | xfs_alloc_fix_len(args); | 844 | xfs_alloc_fix_len(args); |
| @@ -958,9 +957,9 @@ xfs_alloc_ag_vextent_near( | |||
| 958 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) | 957 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) |
| 959 | goto error0; | 958 | goto error0; |
| 960 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 959 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
| 961 | if (xfs_alloc_compute_aligned(ltbno, ltlen, | 960 | xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, |
| 962 | args->alignment, args->minlen, | 961 | args->minlen, <bnoa, <lena); |
| 963 | <bnoa, <lena)) | 962 | if (ltlena >= args->minlen) |
| 964 | break; | 963 | break; |
| 965 | if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) | 964 | if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) |
| 966 | goto error0; | 965 | goto error0; |
| @@ -974,9 +973,9 @@ xfs_alloc_ag_vextent_near( | |||
| 974 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) | 973 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) |
| 975 | goto error0; | 974 | goto error0; |
| 976 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 975 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
| 977 | if (xfs_alloc_compute_aligned(gtbno, gtlen, | 976 | xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment, |
| 978 | args->alignment, args->minlen, | 977 | args->minlen, >bnoa, >lena); |
| 979 | >bnoa, >lena)) | 978 | if (gtlena >= args->minlen) |
| 980 | break; | 979 | break; |
| 981 | if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) | 980 | if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) |
| 982 | goto error0; | 981 | goto error0; |
| @@ -2562,9 +2561,10 @@ xfs_alloc_clear_busy(xfs_trans_t *tp, | |||
| 2562 | 2561 | ||
| 2563 | 2562 | ||
| 2564 | /* | 2563 | /* |
| 2565 | * returns non-zero if any of (agno,bno):len is in a busy list | 2564 | * If we find the extent in the busy list, force the log out to get the |
| 2565 | * extent out of the busy list so the caller can use it straight away. | ||
| 2566 | */ | 2566 | */ |
| 2567 | STATIC int | 2567 | STATIC void |
| 2568 | xfs_alloc_search_busy(xfs_trans_t *tp, | 2568 | xfs_alloc_search_busy(xfs_trans_t *tp, |
| 2569 | xfs_agnumber_t agno, | 2569 | xfs_agnumber_t agno, |
| 2570 | xfs_agblock_t bno, | 2570 | xfs_agblock_t bno, |
| @@ -2572,7 +2572,6 @@ xfs_alloc_search_busy(xfs_trans_t *tp, | |||
| 2572 | { | 2572 | { |
| 2573 | xfs_mount_t *mp; | 2573 | xfs_mount_t *mp; |
| 2574 | xfs_perag_busy_t *bsy; | 2574 | xfs_perag_busy_t *bsy; |
| 2575 | int n; | ||
| 2576 | xfs_agblock_t uend, bend; | 2575 | xfs_agblock_t uend, bend; |
| 2577 | xfs_lsn_t lsn; | 2576 | xfs_lsn_t lsn; |
| 2578 | int cnt; | 2577 | int cnt; |
| @@ -2585,21 +2584,18 @@ xfs_alloc_search_busy(xfs_trans_t *tp, | |||
| 2585 | uend = bno + len - 1; | 2584 | uend = bno + len - 1; |
| 2586 | 2585 | ||
| 2587 | /* search pagb_list for this slot, skipping open slots */ | 2586 | /* search pagb_list for this slot, skipping open slots */ |
| 2588 | for (bsy = mp->m_perag[agno].pagb_list, n = 0; | 2587 | for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) { |
| 2589 | cnt; bsy++, n++) { | ||
| 2590 | 2588 | ||
| 2591 | /* | 2589 | /* |
| 2592 | * (start1,length1) within (start2, length2) | 2590 | * (start1,length1) within (start2, length2) |
| 2593 | */ | 2591 | */ |
| 2594 | if (bsy->busy_tp != NULL) { | 2592 | if (bsy->busy_tp != NULL) { |
| 2595 | bend = bsy->busy_start + bsy->busy_length - 1; | 2593 | bend = bsy->busy_start + bsy->busy_length - 1; |
| 2596 | if ((bno > bend) || | 2594 | if ((bno > bend) || (uend < bsy->busy_start)) { |
| 2597 | (uend < bsy->busy_start)) { | ||
| 2598 | cnt--; | 2595 | cnt--; |
| 2599 | } else { | 2596 | } else { |
| 2600 | TRACE_BUSYSEARCH("xfs_alloc_search_busy", | 2597 | TRACE_BUSYSEARCH("xfs_alloc_search_busy", |
| 2601 | "found1", agno, bno, len, n, | 2598 | "found1", agno, bno, len, tp); |
| 2602 | tp); | ||
| 2603 | break; | 2599 | break; |
| 2604 | } | 2600 | } |
| 2605 | } | 2601 | } |
| @@ -2610,15 +2606,12 @@ xfs_alloc_search_busy(xfs_trans_t *tp, | |||
| 2610 | * transaction that freed the block | 2606 | * transaction that freed the block |
| 2611 | */ | 2607 | */ |
| 2612 | if (cnt) { | 2608 | if (cnt) { |
| 2613 | TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp); | 2609 | TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, tp); |
| 2614 | lsn = bsy->busy_tp->t_commit_lsn; | 2610 | lsn = bsy->busy_tp->t_commit_lsn; |
| 2615 | spin_unlock(&mp->m_perag[agno].pagb_lock); | 2611 | spin_unlock(&mp->m_perag[agno].pagb_lock); |
| 2616 | xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); | 2612 | xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); |
| 2617 | } else { | 2613 | } else { |
| 2618 | TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp); | 2614 | TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, tp); |
| 2619 | n = -1; | ||
| 2620 | spin_unlock(&mp->m_perag[agno].pagb_lock); | 2615 | spin_unlock(&mp->m_perag[agno].pagb_lock); |
| 2621 | } | 2616 | } |
| 2622 | |||
| 2623 | return n; | ||
| 2624 | } | 2617 | } |
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index e58f321fdae9..36d781ee5fcc 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
| @@ -2647,14 +2647,6 @@ attr_trusted_capable( | |||
| 2647 | } | 2647 | } |
| 2648 | 2648 | ||
| 2649 | STATIC int | 2649 | STATIC int |
| 2650 | attr_secure_capable( | ||
| 2651 | bhv_vnode_t *vp, | ||
| 2652 | cred_t *cred) | ||
| 2653 | { | ||
| 2654 | return -ENOSECURITY; | ||
| 2655 | } | ||
| 2656 | |||
| 2657 | STATIC int | ||
| 2658 | attr_system_set( | 2650 | attr_system_set( |
| 2659 | bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) | 2651 | bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) |
| 2660 | { | 2652 | { |
| @@ -2724,7 +2716,7 @@ struct attrnames attr_secure = { | |||
| 2724 | .attr_get = attr_generic_get, | 2716 | .attr_get = attr_generic_get, |
| 2725 | .attr_set = attr_generic_set, | 2717 | .attr_set = attr_generic_set, |
| 2726 | .attr_remove = attr_generic_remove, | 2718 | .attr_remove = attr_generic_remove, |
| 2727 | .attr_capable = attr_secure_capable, | 2719 | .attr_capable = (attrcapable_t)fs_noerr, |
| 2728 | }; | 2720 | }; |
| 2729 | 2721 | ||
| 2730 | struct attrnames attr_user = { | 2722 | struct attrnames attr_user = { |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 96ba6aa4ed8c..303d41e4217b 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
| @@ -166,7 +166,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) | |||
| 166 | 166 | ||
| 167 | if (!(mp->m_flags & XFS_MOUNT_ATTR2)) { | 167 | if (!(mp->m_flags & XFS_MOUNT_ATTR2)) { |
| 168 | if (bytes <= XFS_IFORK_ASIZE(dp)) | 168 | if (bytes <= XFS_IFORK_ASIZE(dp)) |
| 169 | return mp->m_attroffset >> 3; | 169 | return dp->i_d.di_forkoff; |
| 170 | return 0; | 170 | return 0; |
| 171 | } | 171 | } |
| 172 | 172 | ||
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 2def273855a2..eb198c01c35d 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
| @@ -323,13 +323,13 @@ xfs_bmap_trace_pre_update( | |||
| 323 | int whichfork); /* data or attr fork */ | 323 | int whichfork); /* data or attr fork */ |
| 324 | 324 | ||
| 325 | #define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \ | 325 | #define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) \ |
| 326 | xfs_bmap_trace_delete(__FUNCTION__,d,ip,i,c,w) | 326 | xfs_bmap_trace_delete(__func__,d,ip,i,c,w) |
| 327 | #define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \ | 327 | #define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) \ |
| 328 | xfs_bmap_trace_insert(__FUNCTION__,d,ip,i,c,r1,r2,w) | 328 | xfs_bmap_trace_insert(__func__,d,ip,i,c,r1,r2,w) |
| 329 | #define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \ | 329 | #define XFS_BMAP_TRACE_POST_UPDATE(d,ip,i,w) \ |
| 330 | xfs_bmap_trace_post_update(__FUNCTION__,d,ip,i,w) | 330 | xfs_bmap_trace_post_update(__func__,d,ip,i,w) |
| 331 | #define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \ | 331 | #define XFS_BMAP_TRACE_PRE_UPDATE(d,ip,i,w) \ |
| 332 | xfs_bmap_trace_pre_update(__FUNCTION__,d,ip,i,w) | 332 | xfs_bmap_trace_pre_update(__func__,d,ip,i,w) |
| 333 | #else | 333 | #else |
| 334 | #define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) | 334 | #define XFS_BMAP_TRACE_DELETE(d,ip,i,c,w) |
| 335 | #define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) | 335 | #define XFS_BMAP_TRACE_INSERT(d,ip,i,c,r1,r2,w) |
| @@ -2402,7 +2402,7 @@ xfs_bmap_extsize_align( | |||
| 2402 | 2402 | ||
| 2403 | #define XFS_ALLOC_GAP_UNITS 4 | 2403 | #define XFS_ALLOC_GAP_UNITS 4 |
| 2404 | 2404 | ||
| 2405 | STATIC int | 2405 | STATIC void |
| 2406 | xfs_bmap_adjacent( | 2406 | xfs_bmap_adjacent( |
| 2407 | xfs_bmalloca_t *ap) /* bmap alloc argument struct */ | 2407 | xfs_bmalloca_t *ap) /* bmap alloc argument struct */ |
| 2408 | { | 2408 | { |
| @@ -2548,7 +2548,6 @@ xfs_bmap_adjacent( | |||
| 2548 | ap->rval = gotbno; | 2548 | ap->rval = gotbno; |
| 2549 | } | 2549 | } |
| 2550 | #undef ISVALID | 2550 | #undef ISVALID |
| 2551 | return 0; | ||
| 2552 | } | 2551 | } |
| 2553 | 2552 | ||
| 2554 | STATIC int | 2553 | STATIC int |
| @@ -4154,16 +4153,21 @@ xfs_bmap_compute_maxlevels( | |||
| 4154 | * number of leaf entries, is controlled by the type of di_nextents | 4153 | * number of leaf entries, is controlled by the type of di_nextents |
| 4155 | * (a signed 32-bit number, xfs_extnum_t), or by di_anextents | 4154 | * (a signed 32-bit number, xfs_extnum_t), or by di_anextents |
| 4156 | * (a signed 16-bit number, xfs_aextnum_t). | 4155 | * (a signed 16-bit number, xfs_aextnum_t). |
| 4156 | * | ||
| 4157 | * Note that we can no longer assume that if we are in ATTR1 that | ||
| 4158 | * the fork offset of all the inodes will be (m_attroffset >> 3) | ||
| 4159 | * because we could have mounted with ATTR2 and then mounted back | ||
| 4160 | * with ATTR1, keeping the di_forkoff's fixed but probably at | ||
| 4161 | * various positions. Therefore, for both ATTR1 and ATTR2 | ||
| 4162 | * we have to assume the worst case scenario of a minimum size | ||
| 4163 | * available. | ||
| 4157 | */ | 4164 | */ |
| 4158 | if (whichfork == XFS_DATA_FORK) { | 4165 | if (whichfork == XFS_DATA_FORK) { |
| 4159 | maxleafents = MAXEXTNUM; | 4166 | maxleafents = MAXEXTNUM; |
| 4160 | sz = (mp->m_flags & XFS_MOUNT_ATTR2) ? | 4167 | sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); |
| 4161 | XFS_BMDR_SPACE_CALC(MINDBTPTRS) : mp->m_attroffset; | ||
| 4162 | } else { | 4168 | } else { |
| 4163 | maxleafents = MAXAEXTNUM; | 4169 | maxleafents = MAXAEXTNUM; |
| 4164 | sz = (mp->m_flags & XFS_MOUNT_ATTR2) ? | 4170 | sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); |
| 4165 | XFS_BMDR_SPACE_CALC(MINABTPTRS) : | ||
| 4166 | mp->m_sb.sb_inodesize - mp->m_attroffset; | ||
| 4167 | } | 4171 | } |
| 4168 | maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); | 4172 | maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0); |
| 4169 | minleafrecs = mp->m_bmap_dmnr[0]; | 4173 | minleafrecs = mp->m_bmap_dmnr[0]; |
| @@ -5772,7 +5776,6 @@ xfs_getbmap( | |||
| 5772 | int error; /* return value */ | 5776 | int error; /* return value */ |
| 5773 | __int64_t fixlen; /* length for -1 case */ | 5777 | __int64_t fixlen; /* length for -1 case */ |
| 5774 | int i; /* extent number */ | 5778 | int i; /* extent number */ |
| 5775 | bhv_vnode_t *vp; /* corresponding vnode */ | ||
| 5776 | int lock; /* lock state */ | 5779 | int lock; /* lock state */ |
| 5777 | xfs_bmbt_irec_t *map; /* buffer for user's data */ | 5780 | xfs_bmbt_irec_t *map; /* buffer for user's data */ |
| 5778 | xfs_mount_t *mp; /* file system mount point */ | 5781 | xfs_mount_t *mp; /* file system mount point */ |
| @@ -5789,7 +5792,6 @@ xfs_getbmap( | |||
| 5789 | int bmapi_flags; /* flags for xfs_bmapi */ | 5792 | int bmapi_flags; /* flags for xfs_bmapi */ |
| 5790 | __int32_t oflags; /* getbmapx bmv_oflags field */ | 5793 | __int32_t oflags; /* getbmapx bmv_oflags field */ |
| 5791 | 5794 | ||
| 5792 | vp = XFS_ITOV(ip); | ||
| 5793 | mp = ip->i_mount; | 5795 | mp = ip->i_mount; |
| 5794 | 5796 | ||
| 5795 | whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; | 5797 | whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; |
| @@ -5811,7 +5813,7 @@ xfs_getbmap( | |||
| 5811 | if ((interface & BMV_IF_NO_DMAPI_READ) == 0 && | 5813 | if ((interface & BMV_IF_NO_DMAPI_READ) == 0 && |
| 5812 | DM_EVENT_ENABLED(ip, DM_EVENT_READ) && | 5814 | DM_EVENT_ENABLED(ip, DM_EVENT_READ) && |
| 5813 | whichfork == XFS_DATA_FORK) { | 5815 | whichfork == XFS_DATA_FORK) { |
| 5814 | error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 0, 0, 0, NULL); | 5816 | error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); |
| 5815 | if (error) | 5817 | if (error) |
| 5816 | return XFS_ERROR(error); | 5818 | return XFS_ERROR(error); |
| 5817 | } | 5819 | } |
| @@ -5869,6 +5871,10 @@ xfs_getbmap( | |||
| 5869 | /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ | 5871 | /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ |
| 5870 | error = xfs_flush_pages(ip, (xfs_off_t)0, | 5872 | error = xfs_flush_pages(ip, (xfs_off_t)0, |
| 5871 | -1, 0, FI_REMAPF); | 5873 | -1, 0, FI_REMAPF); |
| 5874 | if (error) { | ||
| 5875 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | ||
| 5876 | return error; | ||
| 5877 | } | ||
| 5872 | } | 5878 | } |
| 5873 | 5879 | ||
| 5874 | ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0); | 5880 | ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0); |
| @@ -6162,10 +6168,10 @@ xfs_check_block( | |||
| 6162 | } | 6168 | } |
| 6163 | if (*thispa == *pp) { | 6169 | if (*thispa == *pp) { |
| 6164 | cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", | 6170 | cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", |
| 6165 | __FUNCTION__, j, i, | 6171 | __func__, j, i, |
| 6166 | (unsigned long long)be64_to_cpu(*thispa)); | 6172 | (unsigned long long)be64_to_cpu(*thispa)); |
| 6167 | panic("%s: ptrs are equal in node\n", | 6173 | panic("%s: ptrs are equal in node\n", |
| 6168 | __FUNCTION__); | 6174 | __func__); |
| 6169 | } | 6175 | } |
| 6170 | } | 6176 | } |
| 6171 | } | 6177 | } |
| @@ -6192,7 +6198,7 @@ xfs_bmap_check_leaf_extents( | |||
| 6192 | xfs_mount_t *mp; /* file system mount structure */ | 6198 | xfs_mount_t *mp; /* file system mount structure */ |
| 6193 | __be64 *pp; /* pointer to block address */ | 6199 | __be64 *pp; /* pointer to block address */ |
| 6194 | xfs_bmbt_rec_t *ep; /* pointer to current extent */ | 6200 | xfs_bmbt_rec_t *ep; /* pointer to current extent */ |
| 6195 | xfs_bmbt_rec_t *lastp; /* pointer to previous extent */ | 6201 | xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ |
| 6196 | xfs_bmbt_rec_t *nextp; /* pointer to next extent */ | 6202 | xfs_bmbt_rec_t *nextp; /* pointer to next extent */ |
| 6197 | int bp_release = 0; | 6203 | int bp_release = 0; |
| 6198 | 6204 | ||
| @@ -6262,7 +6268,6 @@ xfs_bmap_check_leaf_extents( | |||
| 6262 | /* | 6268 | /* |
| 6263 | * Loop over all leaf nodes checking that all extents are in the right order. | 6269 | * Loop over all leaf nodes checking that all extents are in the right order. |
| 6264 | */ | 6270 | */ |
| 6265 | lastp = NULL; | ||
| 6266 | for (;;) { | 6271 | for (;;) { |
| 6267 | xfs_fsblock_t nextbno; | 6272 | xfs_fsblock_t nextbno; |
| 6268 | xfs_extnum_t num_recs; | 6273 | xfs_extnum_t num_recs; |
| @@ -6283,18 +6288,16 @@ xfs_bmap_check_leaf_extents( | |||
| 6283 | */ | 6288 | */ |
| 6284 | 6289 | ||
| 6285 | ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); | 6290 | ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1); |
| 6291 | if (i) { | ||
| 6292 | xfs_btree_check_rec(XFS_BTNUM_BMAP, &last, ep); | ||
| 6293 | } | ||
| 6286 | for (j = 1; j < num_recs; j++) { | 6294 | for (j = 1; j < num_recs; j++) { |
| 6287 | nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1); | 6295 | nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1); |
| 6288 | if (lastp) { | 6296 | xfs_btree_check_rec(XFS_BTNUM_BMAP, ep, nextp); |
| 6289 | xfs_btree_check_rec(XFS_BTNUM_BMAP, | ||
| 6290 | (void *)lastp, (void *)ep); | ||
| 6291 | } | ||
| 6292 | xfs_btree_check_rec(XFS_BTNUM_BMAP, (void *)ep, | ||
| 6293 | (void *)(nextp)); | ||
| 6294 | lastp = ep; | ||
| 6295 | ep = nextp; | 6297 | ep = nextp; |
| 6296 | } | 6298 | } |
| 6297 | 6299 | ||
| 6300 | last = *ep; | ||
| 6298 | i += num_recs; | 6301 | i += num_recs; |
| 6299 | if (bp_release) { | 6302 | if (bp_release) { |
| 6300 | bp_release = 0; | 6303 | bp_release = 0; |
| @@ -6325,13 +6328,13 @@ xfs_bmap_check_leaf_extents( | |||
| 6325 | return; | 6328 | return; |
| 6326 | 6329 | ||
| 6327 | error0: | 6330 | error0: |
| 6328 | cmn_err(CE_WARN, "%s: at error0", __FUNCTION__); | 6331 | cmn_err(CE_WARN, "%s: at error0", __func__); |
| 6329 | if (bp_release) | 6332 | if (bp_release) |
| 6330 | xfs_trans_brelse(NULL, bp); | 6333 | xfs_trans_brelse(NULL, bp); |
| 6331 | error_norelse: | 6334 | error_norelse: |
| 6332 | cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents", | 6335 | cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents", |
| 6333 | __FUNCTION__, i); | 6336 | __func__, i); |
| 6334 | panic("%s: CORRUPTED BTREE OR SOMETHING", __FUNCTION__); | 6337 | panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); |
| 6335 | return; | 6338 | return; |
| 6336 | } | 6339 | } |
| 6337 | #endif | 6340 | #endif |
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 87224b7d7984..6ff70cda451c 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
| @@ -151,7 +151,7 @@ xfs_bmap_trace_exlist( | |||
| 151 | xfs_extnum_t cnt, /* count of entries in list */ | 151 | xfs_extnum_t cnt, /* count of entries in list */ |
| 152 | int whichfork); /* data or attr fork */ | 152 | int whichfork); /* data or attr fork */ |
| 153 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ | 153 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) \ |
| 154 | xfs_bmap_trace_exlist(__FUNCTION__,ip,c,w) | 154 | xfs_bmap_trace_exlist(__func__,ip,c,w) |
| 155 | #else | 155 | #else |
| 156 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) | 156 | #define XFS_BMAP_TRACE_EXLIST(ip,c,w) |
| 157 | #endif | 157 | #endif |
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index bd18987326a3..4f0e849d973e 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c | |||
| @@ -275,21 +275,21 @@ xfs_bmbt_trace_cursor( | |||
| 275 | } | 275 | } |
| 276 | 276 | ||
| 277 | #define XFS_BMBT_TRACE_ARGBI(c,b,i) \ | 277 | #define XFS_BMBT_TRACE_ARGBI(c,b,i) \ |
| 278 | xfs_bmbt_trace_argbi(__FUNCTION__, c, b, i, __LINE__) | 278 | xfs_bmbt_trace_argbi(__func__, c, b, i, __LINE__) |
| 279 | #define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ | 279 | #define XFS_BMBT_TRACE_ARGBII(c,b,i,j) \ |
| 280 | xfs_bmbt_trace_argbii(__FUNCTION__, c, b, i, j, __LINE__) | 280 | xfs_bmbt_trace_argbii(__func__, c, b, i, j, __LINE__) |
| 281 | #define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ | 281 | #define XFS_BMBT_TRACE_ARGFFFI(c,o,b,i,j) \ |
| 282 | xfs_bmbt_trace_argfffi(__FUNCTION__, c, o, b, i, j, __LINE__) | 282 | xfs_bmbt_trace_argfffi(__func__, c, o, b, i, j, __LINE__) |
| 283 | #define XFS_BMBT_TRACE_ARGI(c,i) \ | 283 | #define XFS_BMBT_TRACE_ARGI(c,i) \ |
| 284 | xfs_bmbt_trace_argi(__FUNCTION__, c, i, __LINE__) | 284 | xfs_bmbt_trace_argi(__func__, c, i, __LINE__) |
| 285 | #define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ | 285 | #define XFS_BMBT_TRACE_ARGIFK(c,i,f,s) \ |
| 286 | xfs_bmbt_trace_argifk(__FUNCTION__, c, i, f, s, __LINE__) | 286 | xfs_bmbt_trace_argifk(__func__, c, i, f, s, __LINE__) |
| 287 | #define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ | 287 | #define XFS_BMBT_TRACE_ARGIFR(c,i,f,r) \ |
| 288 | xfs_bmbt_trace_argifr(__FUNCTION__, c, i, f, r, __LINE__) | 288 | xfs_bmbt_trace_argifr(__func__, c, i, f, r, __LINE__) |
| 289 | #define XFS_BMBT_TRACE_ARGIK(c,i,k) \ | 289 | #define XFS_BMBT_TRACE_ARGIK(c,i,k) \ |
| 290 | xfs_bmbt_trace_argik(__FUNCTION__, c, i, k, __LINE__) | 290 | xfs_bmbt_trace_argik(__func__, c, i, k, __LINE__) |
| 291 | #define XFS_BMBT_TRACE_CURSOR(c,s) \ | 291 | #define XFS_BMBT_TRACE_CURSOR(c,s) \ |
| 292 | xfs_bmbt_trace_cursor(__FUNCTION__, c, s, __LINE__) | 292 | xfs_bmbt_trace_cursor(__func__, c, s, __LINE__) |
| 293 | #else | 293 | #else |
| 294 | #define XFS_BMBT_TRACE_ARGBI(c,b,i) | 294 | #define XFS_BMBT_TRACE_ARGBI(c,b,i) |
| 295 | #define XFS_BMBT_TRACE_ARGBII(c,b,i,j) | 295 | #define XFS_BMBT_TRACE_ARGBII(c,b,i,j) |
| @@ -2027,6 +2027,24 @@ xfs_bmbt_increment( | |||
| 2027 | 2027 | ||
| 2028 | /* | 2028 | /* |
| 2029 | * Insert the current record at the point referenced by cur. | 2029 | * Insert the current record at the point referenced by cur. |
| 2030 | * | ||
| 2031 | * A multi-level split of the tree on insert will invalidate the original | ||
| 2032 | * cursor. It appears, however, that some callers assume that the cursor is | ||
| 2033 | * always valid. Hence if we do a multi-level split we need to revalidate the | ||
| 2034 | * cursor. | ||
| 2035 | * | ||
| 2036 | * When a split occurs, we will see a new cursor returned. Use that as a | ||
| 2037 | * trigger to determine if we need to revalidate the original cursor. If we get | ||
| 2038 | * a split, then use the original irec to lookup up the path of the record we | ||
| 2039 | * just inserted. | ||
| 2040 | * | ||
| 2041 | * Note that the fact that the btree root is in the inode means that we can | ||
| 2042 | * have the level of the tree change without a "split" occurring at the root | ||
| 2043 | * level. What happens is that the root is migrated to an allocated block and | ||
| 2044 | * the inode root is pointed to it. This means a single split can change the | ||
| 2045 | * level of the tree (level 2 -> level 3) and invalidate the old cursor. Hence | ||
| 2046 | * the level change should be accounted as a split so as to correctly trigger a | ||
| 2047 | * revalidation of the old cursor. | ||
| 2030 | */ | 2048 | */ |
| 2031 | int /* error */ | 2049 | int /* error */ |
| 2032 | xfs_bmbt_insert( | 2050 | xfs_bmbt_insert( |
| @@ -2039,11 +2057,14 @@ xfs_bmbt_insert( | |||
| 2039 | xfs_fsblock_t nbno; | 2057 | xfs_fsblock_t nbno; |
| 2040 | xfs_btree_cur_t *ncur; | 2058 | xfs_btree_cur_t *ncur; |
| 2041 | xfs_bmbt_rec_t nrec; | 2059 | xfs_bmbt_rec_t nrec; |
| 2060 | xfs_bmbt_irec_t oirec; /* original irec */ | ||
| 2042 | xfs_btree_cur_t *pcur; | 2061 | xfs_btree_cur_t *pcur; |
| 2062 | int splits = 0; | ||
| 2043 | 2063 | ||
| 2044 | XFS_BMBT_TRACE_CURSOR(cur, ENTRY); | 2064 | XFS_BMBT_TRACE_CURSOR(cur, ENTRY); |
| 2045 | level = 0; | 2065 | level = 0; |
| 2046 | nbno = NULLFSBLOCK; | 2066 | nbno = NULLFSBLOCK; |
| 2067 | oirec = cur->bc_rec.b; | ||
| 2047 | xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); | 2068 | xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); |
| 2048 | ncur = NULL; | 2069 | ncur = NULL; |
| 2049 | pcur = cur; | 2070 | pcur = cur; |
| @@ -2052,11 +2073,13 @@ xfs_bmbt_insert( | |||
| 2052 | &i))) { | 2073 | &i))) { |
| 2053 | if (pcur != cur) | 2074 | if (pcur != cur) |
| 2054 | xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); | 2075 | xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); |
| 2055 | XFS_BMBT_TRACE_CURSOR(cur, ERROR); | 2076 | goto error0; |
| 2056 | return error; | ||
| 2057 | } | 2077 | } |
| 2058 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 2078 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
| 2059 | if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) { | 2079 | if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) { |
| 2080 | /* allocating a new root is effectively a split */ | ||
| 2081 | if (cur->bc_nlevels != pcur->bc_nlevels) | ||
| 2082 | splits++; | ||
| 2060 | cur->bc_nlevels = pcur->bc_nlevels; | 2083 | cur->bc_nlevels = pcur->bc_nlevels; |
| 2061 | cur->bc_private.b.allocated += | 2084 | cur->bc_private.b.allocated += |
| 2062 | pcur->bc_private.b.allocated; | 2085 | pcur->bc_private.b.allocated; |
| @@ -2070,10 +2093,21 @@ xfs_bmbt_insert( | |||
| 2070 | xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); | 2093 | xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); |
| 2071 | } | 2094 | } |
| 2072 | if (ncur) { | 2095 | if (ncur) { |
| 2096 | splits++; | ||
| 2073 | pcur = ncur; | 2097 | pcur = ncur; |
| 2074 | ncur = NULL; | 2098 | ncur = NULL; |
| 2075 | } | 2099 | } |
| 2076 | } while (nbno != NULLFSBLOCK); | 2100 | } while (nbno != NULLFSBLOCK); |
| 2101 | |||
| 2102 | if (splits > 1) { | ||
| 2103 | /* revalidate the old cursor as we had a multi-level split */ | ||
| 2104 | error = xfs_bmbt_lookup_eq(cur, oirec.br_startoff, | ||
| 2105 | oirec.br_startblock, oirec.br_blockcount, &i); | ||
| 2106 | if (error) | ||
| 2107 | goto error0; | ||
| 2108 | ASSERT(i == 1); | ||
| 2109 | } | ||
| 2110 | |||
| 2077 | XFS_BMBT_TRACE_CURSOR(cur, EXIT); | 2111 | XFS_BMBT_TRACE_CURSOR(cur, EXIT); |
| 2078 | *stat = i; | 2112 | *stat = i; |
| 2079 | return 0; | 2113 | return 0; |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 63debd147eb5..53a71c62025d 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
| @@ -645,7 +645,12 @@ xfs_buf_item_push( | |||
| 645 | bp = bip->bli_buf; | 645 | bp = bip->bli_buf; |
| 646 | 646 | ||
| 647 | if (XFS_BUF_ISDELAYWRITE(bp)) { | 647 | if (XFS_BUF_ISDELAYWRITE(bp)) { |
| 648 | xfs_bawrite(bip->bli_item.li_mountp, bp); | 648 | int error; |
| 649 | error = xfs_bawrite(bip->bli_item.li_mountp, bp); | ||
| 650 | if (error) | ||
| 651 | xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp, | ||
| 652 | "xfs_buf_item_push: pushbuf error %d on bip %p, bp %p", | ||
| 653 | error, bip, bp); | ||
| 649 | } else { | 654 | } else { |
| 650 | xfs_buf_relse(bp); | 655 | xfs_buf_relse(bp); |
| 651 | } | 656 | } |
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index e92e73f0e6af..7cb26529766b 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c | |||
| @@ -44,6 +44,7 @@ | |||
| 44 | #include "xfs_error.h" | 44 | #include "xfs_error.h" |
| 45 | #include "xfs_vnodeops.h" | 45 | #include "xfs_vnodeops.h" |
| 46 | 46 | ||
| 47 | struct xfs_name xfs_name_dotdot = {"..", 2}; | ||
| 47 | 48 | ||
| 48 | void | 49 | void |
| 49 | xfs_dir_mount( | 50 | xfs_dir_mount( |
| @@ -146,8 +147,7 @@ int | |||
| 146 | xfs_dir_createname( | 147 | xfs_dir_createname( |
| 147 | xfs_trans_t *tp, | 148 | xfs_trans_t *tp, |
| 148 | xfs_inode_t *dp, | 149 | xfs_inode_t *dp, |
| 149 | char *name, | 150 | struct xfs_name *name, |
| 150 | int namelen, | ||
| 151 | xfs_ino_t inum, /* new entry inode number */ | 151 | xfs_ino_t inum, /* new entry inode number */ |
| 152 | xfs_fsblock_t *first, /* bmap's firstblock */ | 152 | xfs_fsblock_t *first, /* bmap's firstblock */ |
| 153 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | 153 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ |
| @@ -162,9 +162,9 @@ xfs_dir_createname( | |||
| 162 | return rval; | 162 | return rval; |
| 163 | XFS_STATS_INC(xs_dir_create); | 163 | XFS_STATS_INC(xs_dir_create); |
| 164 | 164 | ||
| 165 | args.name = name; | 165 | args.name = name->name; |
| 166 | args.namelen = namelen; | 166 | args.namelen = name->len; |
| 167 | args.hashval = xfs_da_hashname(name, namelen); | 167 | args.hashval = xfs_da_hashname(name->name, name->len); |
| 168 | args.inumber = inum; | 168 | args.inumber = inum; |
| 169 | args.dp = dp; | 169 | args.dp = dp; |
| 170 | args.firstblock = first; | 170 | args.firstblock = first; |
| @@ -197,8 +197,7 @@ int | |||
| 197 | xfs_dir_lookup( | 197 | xfs_dir_lookup( |
| 198 | xfs_trans_t *tp, | 198 | xfs_trans_t *tp, |
| 199 | xfs_inode_t *dp, | 199 | xfs_inode_t *dp, |
| 200 | char *name, | 200 | struct xfs_name *name, |
| 201 | int namelen, | ||
| 202 | xfs_ino_t *inum) /* out: inode number */ | 201 | xfs_ino_t *inum) /* out: inode number */ |
| 203 | { | 202 | { |
| 204 | xfs_da_args_t args; | 203 | xfs_da_args_t args; |
| @@ -207,18 +206,14 @@ xfs_dir_lookup( | |||
| 207 | 206 | ||
| 208 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 207 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); |
| 209 | XFS_STATS_INC(xs_dir_lookup); | 208 | XFS_STATS_INC(xs_dir_lookup); |
| 209 | memset(&args, 0, sizeof(xfs_da_args_t)); | ||
| 210 | 210 | ||
| 211 | args.name = name; | 211 | args.name = name->name; |
| 212 | args.namelen = namelen; | 212 | args.namelen = name->len; |
| 213 | args.hashval = xfs_da_hashname(name, namelen); | 213 | args.hashval = xfs_da_hashname(name->name, name->len); |
| 214 | args.inumber = 0; | ||
| 215 | args.dp = dp; | 214 | args.dp = dp; |
| 216 | args.firstblock = NULL; | ||
| 217 | args.flist = NULL; | ||
| 218 | args.total = 0; | ||
| 219 | args.whichfork = XFS_DATA_FORK; | 215 | args.whichfork = XFS_DATA_FORK; |
| 220 | args.trans = tp; | 216 | args.trans = tp; |
| 221 | args.justcheck = args.addname = 0; | ||
| 222 | args.oknoent = 1; | 217 | args.oknoent = 1; |
| 223 | 218 | ||
| 224 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 219 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) |
| @@ -247,8 +242,7 @@ int | |||
| 247 | xfs_dir_removename( | 242 | xfs_dir_removename( |
| 248 | xfs_trans_t *tp, | 243 | xfs_trans_t *tp, |
| 249 | xfs_inode_t *dp, | 244 | xfs_inode_t *dp, |
| 250 | char *name, | 245 | struct xfs_name *name, |
| 251 | int namelen, | ||
| 252 | xfs_ino_t ino, | 246 | xfs_ino_t ino, |
| 253 | xfs_fsblock_t *first, /* bmap's firstblock */ | 247 | xfs_fsblock_t *first, /* bmap's firstblock */ |
| 254 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | 248 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ |
| @@ -261,9 +255,9 @@ xfs_dir_removename( | |||
| 261 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 255 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); |
| 262 | XFS_STATS_INC(xs_dir_remove); | 256 | XFS_STATS_INC(xs_dir_remove); |
| 263 | 257 | ||
| 264 | args.name = name; | 258 | args.name = name->name; |
| 265 | args.namelen = namelen; | 259 | args.namelen = name->len; |
| 266 | args.hashval = xfs_da_hashname(name, namelen); | 260 | args.hashval = xfs_da_hashname(name->name, name->len); |
| 267 | args.inumber = ino; | 261 | args.inumber = ino; |
| 268 | args.dp = dp; | 262 | args.dp = dp; |
| 269 | args.firstblock = first; | 263 | args.firstblock = first; |
| @@ -329,8 +323,7 @@ int | |||
| 329 | xfs_dir_replace( | 323 | xfs_dir_replace( |
| 330 | xfs_trans_t *tp, | 324 | xfs_trans_t *tp, |
| 331 | xfs_inode_t *dp, | 325 | xfs_inode_t *dp, |
| 332 | char *name, /* name of entry to replace */ | 326 | struct xfs_name *name, /* name of entry to replace */ |
| 333 | int namelen, | ||
| 334 | xfs_ino_t inum, /* new inode number */ | 327 | xfs_ino_t inum, /* new inode number */ |
| 335 | xfs_fsblock_t *first, /* bmap's firstblock */ | 328 | xfs_fsblock_t *first, /* bmap's firstblock */ |
| 336 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | 329 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ |
| @@ -345,9 +338,9 @@ xfs_dir_replace( | |||
| 345 | if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) | 338 | if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) |
| 346 | return rval; | 339 | return rval; |
| 347 | 340 | ||
| 348 | args.name = name; | 341 | args.name = name->name; |
| 349 | args.namelen = namelen; | 342 | args.namelen = name->len; |
| 350 | args.hashval = xfs_da_hashname(name, namelen); | 343 | args.hashval = xfs_da_hashname(name->name, name->len); |
| 351 | args.inumber = inum; | 344 | args.inumber = inum; |
| 352 | args.dp = dp; | 345 | args.dp = dp; |
| 353 | args.firstblock = first; | 346 | args.firstblock = first; |
| @@ -374,28 +367,29 @@ xfs_dir_replace( | |||
| 374 | 367 | ||
| 375 | /* | 368 | /* |
| 376 | * See if this entry can be added to the directory without allocating space. | 369 | * See if this entry can be added to the directory without allocating space. |
| 370 | * First checks that the caller couldn't reserve enough space (resblks = 0). | ||
| 377 | */ | 371 | */ |
| 378 | int | 372 | int |
| 379 | xfs_dir_canenter( | 373 | xfs_dir_canenter( |
| 380 | xfs_trans_t *tp, | 374 | xfs_trans_t *tp, |
| 381 | xfs_inode_t *dp, | 375 | xfs_inode_t *dp, |
| 382 | char *name, /* name of entry to add */ | 376 | struct xfs_name *name, /* name of entry to add */ |
| 383 | int namelen) | 377 | uint resblks) |
| 384 | { | 378 | { |
| 385 | xfs_da_args_t args; | 379 | xfs_da_args_t args; |
| 386 | int rval; | 380 | int rval; |
| 387 | int v; /* type-checking value */ | 381 | int v; /* type-checking value */ |
| 388 | 382 | ||
| 383 | if (resblks) | ||
| 384 | return 0; | ||
| 385 | |||
| 389 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); | 386 | ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); |
| 387 | memset(&args, 0, sizeof(xfs_da_args_t)); | ||
| 390 | 388 | ||
| 391 | args.name = name; | 389 | args.name = name->name; |
| 392 | args.namelen = namelen; | 390 | args.namelen = name->len; |
| 393 | args.hashval = xfs_da_hashname(name, namelen); | 391 | args.hashval = xfs_da_hashname(name->name, name->len); |
| 394 | args.inumber = 0; | ||
| 395 | args.dp = dp; | 392 | args.dp = dp; |
| 396 | args.firstblock = NULL; | ||
| 397 | args.flist = NULL; | ||
| 398 | args.total = 0; | ||
| 399 | args.whichfork = XFS_DATA_FORK; | 393 | args.whichfork = XFS_DATA_FORK; |
| 400 | args.trans = tp; | 394 | args.trans = tp; |
| 401 | args.justcheck = args.addname = args.oknoent = 1; | 395 | args.justcheck = args.addname = args.oknoent = 1; |
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h index b265197e74cf..6392f939029f 100644 --- a/fs/xfs/xfs_dir2.h +++ b/fs/xfs/xfs_dir2.h | |||
| @@ -59,6 +59,8 @@ typedef __uint32_t xfs_dir2_db_t; | |||
| 59 | */ | 59 | */ |
| 60 | typedef xfs_off_t xfs_dir2_off_t; | 60 | typedef xfs_off_t xfs_dir2_off_t; |
| 61 | 61 | ||
| 62 | extern struct xfs_name xfs_name_dotdot; | ||
| 63 | |||
| 62 | /* | 64 | /* |
| 63 | * Generic directory interface routines | 65 | * Generic directory interface routines |
| 64 | */ | 66 | */ |
| @@ -68,21 +70,21 @@ extern int xfs_dir_isempty(struct xfs_inode *dp); | |||
| 68 | extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp, | 70 | extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp, |
| 69 | struct xfs_inode *pdp); | 71 | struct xfs_inode *pdp); |
| 70 | extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, | 72 | extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, |
| 71 | char *name, int namelen, xfs_ino_t inum, | 73 | struct xfs_name *name, xfs_ino_t inum, |
| 72 | xfs_fsblock_t *first, | 74 | xfs_fsblock_t *first, |
| 73 | struct xfs_bmap_free *flist, xfs_extlen_t tot); | 75 | struct xfs_bmap_free *flist, xfs_extlen_t tot); |
| 74 | extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, | 76 | extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, |
| 75 | char *name, int namelen, xfs_ino_t *inum); | 77 | struct xfs_name *name, xfs_ino_t *inum); |
| 76 | extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, | 78 | extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, |
| 77 | char *name, int namelen, xfs_ino_t ino, | 79 | struct xfs_name *name, xfs_ino_t ino, |
| 78 | xfs_fsblock_t *first, | 80 | xfs_fsblock_t *first, |
| 79 | struct xfs_bmap_free *flist, xfs_extlen_t tot); | 81 | struct xfs_bmap_free *flist, xfs_extlen_t tot); |
| 80 | extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, | 82 | extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, |
| 81 | char *name, int namelen, xfs_ino_t inum, | 83 | struct xfs_name *name, xfs_ino_t inum, |
| 82 | xfs_fsblock_t *first, | 84 | xfs_fsblock_t *first, |
| 83 | struct xfs_bmap_free *flist, xfs_extlen_t tot); | 85 | struct xfs_bmap_free *flist, xfs_extlen_t tot); |
| 84 | extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, | 86 | extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, |
| 85 | char *name, int namelen); | 87 | struct xfs_name *name, uint resblks); |
| 86 | extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); | 88 | extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino); |
| 87 | 89 | ||
| 88 | /* | 90 | /* |
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index eb03eab5ca52..3f3785b10804 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c | |||
| @@ -73,7 +73,7 @@ xfs_filestreams_trace( | |||
| 73 | #define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0) | 73 | #define TRACE4(mp,t,a0,a1,a2,a3) TRACE6(mp,t,a0,a1,a2,a3,0,0) |
| 74 | #define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0) | 74 | #define TRACE5(mp,t,a0,a1,a2,a3,a4) TRACE6(mp,t,a0,a1,a2,a3,a4,0) |
| 75 | #define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ | 75 | #define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ |
| 76 | xfs_filestreams_trace(mp, t, __FUNCTION__, __LINE__, \ | 76 | xfs_filestreams_trace(mp, t, __func__, __LINE__, \ |
| 77 | (__psunsigned_t)a0, (__psunsigned_t)a1, \ | 77 | (__psunsigned_t)a0, (__psunsigned_t)a1, \ |
| 78 | (__psunsigned_t)a2, (__psunsigned_t)a3, \ | 78 | (__psunsigned_t)a2, (__psunsigned_t)a3, \ |
| 79 | (__psunsigned_t)a4, (__psunsigned_t)a5) | 79 | (__psunsigned_t)a4, (__psunsigned_t)a5) |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 5a146cb22980..a64dfbd565a5 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
| @@ -107,6 +107,16 @@ xfs_ialloc_log_di( | |||
| 107 | /* | 107 | /* |
| 108 | * Allocation group level functions. | 108 | * Allocation group level functions. |
| 109 | */ | 109 | */ |
| 110 | static inline int | ||
| 111 | xfs_ialloc_cluster_alignment( | ||
| 112 | xfs_alloc_arg_t *args) | ||
| 113 | { | ||
| 114 | if (xfs_sb_version_hasalign(&args->mp->m_sb) && | ||
| 115 | args->mp->m_sb.sb_inoalignmt >= | ||
| 116 | XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) | ||
| 117 | return args->mp->m_sb.sb_inoalignmt; | ||
| 118 | return 1; | ||
| 119 | } | ||
| 110 | 120 | ||
| 111 | /* | 121 | /* |
| 112 | * Allocate new inodes in the allocation group specified by agbp. | 122 | * Allocate new inodes in the allocation group specified by agbp. |
| @@ -167,10 +177,24 @@ xfs_ialloc_ag_alloc( | |||
| 167 | args.mod = args.total = args.wasdel = args.isfl = | 177 | args.mod = args.total = args.wasdel = args.isfl = |
| 168 | args.userdata = args.minalignslop = 0; | 178 | args.userdata = args.minalignslop = 0; |
| 169 | args.prod = 1; | 179 | args.prod = 1; |
| 170 | args.alignment = 1; | 180 | |
| 171 | /* | 181 | /* |
| 172 | * Allow space for the inode btree to split. | 182 | * We need to take into account alignment here to ensure that |
| 183 | * we don't modify the free list if we fail to have an exact | ||
| 184 | * block. If we don't have an exact match, and every oher | ||
| 185 | * attempt allocation attempt fails, we'll end up cancelling | ||
| 186 | * a dirty transaction and shutting down. | ||
| 187 | * | ||
| 188 | * For an exact allocation, alignment must be 1, | ||
| 189 | * however we need to take cluster alignment into account when | ||
| 190 | * fixing up the freelist. Use the minalignslop field to | ||
| 191 | * indicate that extra blocks might be required for alignment, | ||
| 192 | * but not to use them in the actual exact allocation. | ||
| 173 | */ | 193 | */ |
| 194 | args.alignment = 1; | ||
| 195 | args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; | ||
| 196 | |||
| 197 | /* Allow space for the inode btree to split. */ | ||
| 174 | args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; | 198 | args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; |
| 175 | if ((error = xfs_alloc_vextent(&args))) | 199 | if ((error = xfs_alloc_vextent(&args))) |
| 176 | return error; | 200 | return error; |
| @@ -191,13 +215,8 @@ xfs_ialloc_ag_alloc( | |||
| 191 | ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); | 215 | ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); |
| 192 | args.alignment = args.mp->m_dalign; | 216 | args.alignment = args.mp->m_dalign; |
| 193 | isaligned = 1; | 217 | isaligned = 1; |
| 194 | } else if (xfs_sb_version_hasalign(&args.mp->m_sb) && | 218 | } else |
| 195 | args.mp->m_sb.sb_inoalignmt >= | 219 | args.alignment = xfs_ialloc_cluster_alignment(&args); |
| 196 | XFS_B_TO_FSBT(args.mp, | ||
| 197 | XFS_INODE_CLUSTER_SIZE(args.mp))) | ||
| 198 | args.alignment = args.mp->m_sb.sb_inoalignmt; | ||
| 199 | else | ||
| 200 | args.alignment = 1; | ||
| 201 | /* | 220 | /* |
| 202 | * Need to figure out where to allocate the inode blocks. | 221 | * Need to figure out where to allocate the inode blocks. |
| 203 | * Ideally they should be spaced out through the a.g. | 222 | * Ideally they should be spaced out through the a.g. |
| @@ -230,12 +249,7 @@ xfs_ialloc_ag_alloc( | |||
| 230 | args.agbno = be32_to_cpu(agi->agi_root); | 249 | args.agbno = be32_to_cpu(agi->agi_root); |
| 231 | args.fsbno = XFS_AGB_TO_FSB(args.mp, | 250 | args.fsbno = XFS_AGB_TO_FSB(args.mp, |
| 232 | be32_to_cpu(agi->agi_seqno), args.agbno); | 251 | be32_to_cpu(agi->agi_seqno), args.agbno); |
| 233 | if (xfs_sb_version_hasalign(&args.mp->m_sb) && | 252 | args.alignment = xfs_ialloc_cluster_alignment(&args); |
| 234 | args.mp->m_sb.sb_inoalignmt >= | ||
| 235 | XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp))) | ||
| 236 | args.alignment = args.mp->m_sb.sb_inoalignmt; | ||
| 237 | else | ||
| 238 | args.alignment = 1; | ||
| 239 | if ((error = xfs_alloc_vextent(&args))) | 253 | if ((error = xfs_alloc_vextent(&args))) |
| 240 | return error; | 254 | return error; |
| 241 | } | 255 | } |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 8e09b71f4104..e657c5128460 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
| @@ -78,7 +78,6 @@ xfs_iget_core( | |||
| 78 | xfs_inode_t *ip; | 78 | xfs_inode_t *ip; |
| 79 | xfs_inode_t *iq; | 79 | xfs_inode_t *iq; |
| 80 | int error; | 80 | int error; |
| 81 | xfs_icluster_t *icl, *new_icl = NULL; | ||
| 82 | unsigned long first_index, mask; | 81 | unsigned long first_index, mask; |
| 83 | xfs_perag_t *pag; | 82 | xfs_perag_t *pag; |
| 84 | xfs_agino_t agino; | 83 | xfs_agino_t agino; |
| @@ -229,11 +228,9 @@ finish_inode: | |||
| 229 | } | 228 | } |
| 230 | 229 | ||
| 231 | /* | 230 | /* |
| 232 | * This is a bit messy - we preallocate everything we _might_ | 231 | * Preload the radix tree so we can insert safely under the |
| 233 | * need before we pick up the ici lock. That way we don't have to | 232 | * write spinlock. |
| 234 | * juggle locks and go all the way back to the start. | ||
| 235 | */ | 233 | */ |
| 236 | new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP); | ||
| 237 | if (radix_tree_preload(GFP_KERNEL)) { | 234 | if (radix_tree_preload(GFP_KERNEL)) { |
| 238 | xfs_idestroy(ip); | 235 | xfs_idestroy(ip); |
| 239 | delay(1); | 236 | delay(1); |
| @@ -242,17 +239,6 @@ finish_inode: | |||
| 242 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | 239 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); |
| 243 | first_index = agino & mask; | 240 | first_index = agino & mask; |
| 244 | write_lock(&pag->pag_ici_lock); | 241 | write_lock(&pag->pag_ici_lock); |
| 245 | |||
| 246 | /* | ||
| 247 | * Find the cluster if it exists | ||
| 248 | */ | ||
| 249 | icl = NULL; | ||
| 250 | if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq, | ||
| 251 | first_index, 1)) { | ||
| 252 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index) | ||
| 253 | icl = iq->i_cluster; | ||
| 254 | } | ||
| 255 | |||
| 256 | /* | 242 | /* |
| 257 | * insert the new inode | 243 | * insert the new inode |
| 258 | */ | 244 | */ |
| @@ -267,30 +253,13 @@ finish_inode: | |||
| 267 | } | 253 | } |
| 268 | 254 | ||
| 269 | /* | 255 | /* |
| 270 | * These values _must_ be set before releasing ihlock! | 256 | * These values _must_ be set before releasing the radix tree lock! |
| 271 | */ | 257 | */ |
| 272 | ip->i_udquot = ip->i_gdquot = NULL; | 258 | ip->i_udquot = ip->i_gdquot = NULL; |
| 273 | xfs_iflags_set(ip, XFS_INEW); | 259 | xfs_iflags_set(ip, XFS_INEW); |
| 274 | 260 | ||
| 275 | ASSERT(ip->i_cluster == NULL); | ||
| 276 | |||
| 277 | if (!icl) { | ||
| 278 | spin_lock_init(&new_icl->icl_lock); | ||
| 279 | INIT_HLIST_HEAD(&new_icl->icl_inodes); | ||
| 280 | icl = new_icl; | ||
| 281 | new_icl = NULL; | ||
| 282 | } else { | ||
| 283 | ASSERT(!hlist_empty(&icl->icl_inodes)); | ||
| 284 | } | ||
| 285 | spin_lock(&icl->icl_lock); | ||
| 286 | hlist_add_head(&ip->i_cnode, &icl->icl_inodes); | ||
| 287 | ip->i_cluster = icl; | ||
| 288 | spin_unlock(&icl->icl_lock); | ||
| 289 | |||
| 290 | write_unlock(&pag->pag_ici_lock); | 261 | write_unlock(&pag->pag_ici_lock); |
| 291 | radix_tree_preload_end(); | 262 | radix_tree_preload_end(); |
| 292 | if (new_icl) | ||
| 293 | kmem_zone_free(xfs_icluster_zone, new_icl); | ||
| 294 | 263 | ||
| 295 | /* | 264 | /* |
| 296 | * Link ip to its mount and thread it on the mount's inode list. | 265 | * Link ip to its mount and thread it on the mount's inode list. |
| @@ -529,18 +498,6 @@ xfs_iextract( | |||
| 529 | xfs_put_perag(mp, pag); | 498 | xfs_put_perag(mp, pag); |
| 530 | 499 | ||
| 531 | /* | 500 | /* |
| 532 | * Remove from cluster list | ||
| 533 | */ | ||
| 534 | mp = ip->i_mount; | ||
| 535 | spin_lock(&ip->i_cluster->icl_lock); | ||
| 536 | hlist_del(&ip->i_cnode); | ||
| 537 | spin_unlock(&ip->i_cluster->icl_lock); | ||
| 538 | |||
| 539 | /* was last inode in cluster? */ | ||
| 540 | if (hlist_empty(&ip->i_cluster->icl_inodes)) | ||
| 541 | kmem_zone_free(xfs_icluster_zone, ip->i_cluster); | ||
| 542 | |||
| 543 | /* | ||
| 544 | * Remove from mount's inode list. | 501 | * Remove from mount's inode list. |
| 545 | */ | 502 | */ |
| 546 | XFS_MOUNT_ILOCK(mp); | 503 | XFS_MOUNT_ILOCK(mp); |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f43a6e01d68f..ca12acb90394 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
| @@ -55,7 +55,6 @@ | |||
| 55 | 55 | ||
| 56 | kmem_zone_t *xfs_ifork_zone; | 56 | kmem_zone_t *xfs_ifork_zone; |
| 57 | kmem_zone_t *xfs_inode_zone; | 57 | kmem_zone_t *xfs_inode_zone; |
| 58 | kmem_zone_t *xfs_icluster_zone; | ||
| 59 | 58 | ||
| 60 | /* | 59 | /* |
| 61 | * Used in xfs_itruncate(). This is the maximum number of extents | 60 | * Used in xfs_itruncate(). This is the maximum number of extents |
| @@ -126,6 +125,90 @@ xfs_inobp_check( | |||
| 126 | #endif | 125 | #endif |
| 127 | 126 | ||
| 128 | /* | 127 | /* |
| 128 | * Find the buffer associated with the given inode map | ||
| 129 | * We do basic validation checks on the buffer once it has been | ||
| 130 | * retrieved from disk. | ||
| 131 | */ | ||
| 132 | STATIC int | ||
| 133 | xfs_imap_to_bp( | ||
| 134 | xfs_mount_t *mp, | ||
| 135 | xfs_trans_t *tp, | ||
| 136 | xfs_imap_t *imap, | ||
| 137 | xfs_buf_t **bpp, | ||
| 138 | uint buf_flags, | ||
| 139 | uint imap_flags) | ||
| 140 | { | ||
| 141 | int error; | ||
| 142 | int i; | ||
| 143 | int ni; | ||
| 144 | xfs_buf_t *bp; | ||
| 145 | |||
| 146 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, | ||
| 147 | (int)imap->im_len, buf_flags, &bp); | ||
| 148 | if (error) { | ||
| 149 | if (error != EAGAIN) { | ||
| 150 | cmn_err(CE_WARN, | ||
| 151 | "xfs_imap_to_bp: xfs_trans_read_buf()returned " | ||
| 152 | "an error %d on %s. Returning error.", | ||
| 153 | error, mp->m_fsname); | ||
| 154 | } else { | ||
| 155 | ASSERT(buf_flags & XFS_BUF_TRYLOCK); | ||
| 156 | } | ||
| 157 | return error; | ||
| 158 | } | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Validate the magic number and version of every inode in the buffer | ||
| 162 | * (if DEBUG kernel) or the first inode in the buffer, otherwise. | ||
| 163 | */ | ||
| 164 | #ifdef DEBUG | ||
| 165 | ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; | ||
| 166 | #else /* usual case */ | ||
| 167 | ni = 1; | ||
| 168 | #endif | ||
| 169 | |||
| 170 | for (i = 0; i < ni; i++) { | ||
| 171 | int di_ok; | ||
| 172 | xfs_dinode_t *dip; | ||
| 173 | |||
| 174 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | ||
| 175 | (i << mp->m_sb.sb_inodelog)); | ||
| 176 | di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && | ||
| 177 | XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); | ||
| 178 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | ||
| 179 | XFS_ERRTAG_ITOBP_INOTOBP, | ||
| 180 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
| 181 | if (imap_flags & XFS_IMAP_BULKSTAT) { | ||
| 182 | xfs_trans_brelse(tp, bp); | ||
| 183 | return XFS_ERROR(EINVAL); | ||
| 184 | } | ||
| 185 | XFS_CORRUPTION_ERROR("xfs_imap_to_bp", | ||
| 186 | XFS_ERRLEVEL_HIGH, mp, dip); | ||
| 187 | #ifdef DEBUG | ||
| 188 | cmn_err(CE_PANIC, | ||
| 189 | "Device %s - bad inode magic/vsn " | ||
| 190 | "daddr %lld #%d (magic=%x)", | ||
| 191 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | ||
| 192 | (unsigned long long)imap->im_blkno, i, | ||
| 193 | be16_to_cpu(dip->di_core.di_magic)); | ||
| 194 | #endif | ||
| 195 | xfs_trans_brelse(tp, bp); | ||
| 196 | return XFS_ERROR(EFSCORRUPTED); | ||
| 197 | } | ||
| 198 | } | ||
| 199 | |||
| 200 | xfs_inobp_check(mp, bp); | ||
| 201 | |||
| 202 | /* | ||
| 203 | * Mark the buffer as an inode buffer now that it looks good | ||
| 204 | */ | ||
| 205 | XFS_BUF_SET_VTYPE(bp, B_FS_INO); | ||
| 206 | |||
| 207 | *bpp = bp; | ||
| 208 | return 0; | ||
| 209 | } | ||
| 210 | |||
| 211 | /* | ||
| 129 | * This routine is called to map an inode number within a file | 212 | * This routine is called to map an inode number within a file |
| 130 | * system to the buffer containing the on-disk version of the | 213 | * system to the buffer containing the on-disk version of the |
| 131 | * inode. It returns a pointer to the buffer containing the | 214 | * inode. It returns a pointer to the buffer containing the |
| @@ -147,72 +230,19 @@ xfs_inotobp( | |||
| 147 | xfs_buf_t **bpp, | 230 | xfs_buf_t **bpp, |
| 148 | int *offset) | 231 | int *offset) |
| 149 | { | 232 | { |
| 150 | int di_ok; | ||
| 151 | xfs_imap_t imap; | 233 | xfs_imap_t imap; |
| 152 | xfs_buf_t *bp; | 234 | xfs_buf_t *bp; |
| 153 | int error; | 235 | int error; |
| 154 | xfs_dinode_t *dip; | ||
| 155 | 236 | ||
| 156 | /* | ||
| 157 | * Call the space management code to find the location of the | ||
| 158 | * inode on disk. | ||
| 159 | */ | ||
| 160 | imap.im_blkno = 0; | 237 | imap.im_blkno = 0; |
| 161 | error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); | 238 | error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); |
| 162 | if (error != 0) { | 239 | if (error) |
| 163 | cmn_err(CE_WARN, | ||
| 164 | "xfs_inotobp: xfs_imap() returned an " | ||
| 165 | "error %d on %s. Returning error.", error, mp->m_fsname); | ||
| 166 | return error; | 240 | return error; |
| 167 | } | ||
| 168 | 241 | ||
| 169 | /* | 242 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0); |
| 170 | * If the inode number maps to a block outside the bounds of the | 243 | if (error) |
| 171 | * file system then return NULL rather than calling read_buf | ||
| 172 | * and panicing when we get an error from the driver. | ||
| 173 | */ | ||
| 174 | if ((imap.im_blkno + imap.im_len) > | ||
| 175 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | ||
| 176 | cmn_err(CE_WARN, | ||
| 177 | "xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds " | ||
| 178 | "of the file system %s. Returning EINVAL.", | ||
| 179 | (unsigned long long)imap.im_blkno, | ||
| 180 | imap.im_len, mp->m_fsname); | ||
| 181 | return XFS_ERROR(EINVAL); | ||
| 182 | } | ||
| 183 | |||
| 184 | /* | ||
| 185 | * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will | ||
| 186 | * default to just a read_buf() call. | ||
| 187 | */ | ||
| 188 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, | ||
| 189 | (int)imap.im_len, XFS_BUF_LOCK, &bp); | ||
| 190 | |||
| 191 | if (error) { | ||
| 192 | cmn_err(CE_WARN, | ||
| 193 | "xfs_inotobp: xfs_trans_read_buf() returned an " | ||
| 194 | "error %d on %s. Returning error.", error, mp->m_fsname); | ||
| 195 | return error; | 244 | return error; |
| 196 | } | ||
| 197 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0); | ||
| 198 | di_ok = | ||
| 199 | be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && | ||
| 200 | XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); | ||
| 201 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, | ||
| 202 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
| 203 | XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip); | ||
| 204 | xfs_trans_brelse(tp, bp); | ||
| 205 | cmn_err(CE_WARN, | ||
| 206 | "xfs_inotobp: XFS_TEST_ERROR() returned an " | ||
| 207 | "error on %s. Returning EFSCORRUPTED.", mp->m_fsname); | ||
| 208 | return XFS_ERROR(EFSCORRUPTED); | ||
| 209 | } | ||
| 210 | 245 | ||
| 211 | xfs_inobp_check(mp, bp); | ||
| 212 | |||
| 213 | /* | ||
| 214 | * Set *dipp to point to the on-disk inode in the buffer. | ||
| 215 | */ | ||
| 216 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); | 246 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); |
| 217 | *bpp = bp; | 247 | *bpp = bp; |
| 218 | *offset = imap.im_boffset; | 248 | *offset = imap.im_boffset; |
| @@ -248,46 +278,21 @@ xfs_itobp( | |||
| 248 | xfs_dinode_t **dipp, | 278 | xfs_dinode_t **dipp, |
| 249 | xfs_buf_t **bpp, | 279 | xfs_buf_t **bpp, |
| 250 | xfs_daddr_t bno, | 280 | xfs_daddr_t bno, |
| 251 | uint imap_flags) | 281 | uint imap_flags, |
| 282 | uint buf_flags) | ||
| 252 | { | 283 | { |
| 253 | xfs_imap_t imap; | 284 | xfs_imap_t imap; |
| 254 | xfs_buf_t *bp; | 285 | xfs_buf_t *bp; |
| 255 | int error; | 286 | int error; |
| 256 | int i; | ||
| 257 | int ni; | ||
| 258 | 287 | ||
| 259 | if (ip->i_blkno == (xfs_daddr_t)0) { | 288 | if (ip->i_blkno == (xfs_daddr_t)0) { |
| 260 | /* | ||
| 261 | * Call the space management code to find the location of the | ||
| 262 | * inode on disk. | ||
| 263 | */ | ||
| 264 | imap.im_blkno = bno; | 289 | imap.im_blkno = bno; |
| 265 | if ((error = xfs_imap(mp, tp, ip->i_ino, &imap, | 290 | error = xfs_imap(mp, tp, ip->i_ino, &imap, |
| 266 | XFS_IMAP_LOOKUP | imap_flags))) | 291 | XFS_IMAP_LOOKUP | imap_flags); |
| 292 | if (error) | ||
| 267 | return error; | 293 | return error; |
| 268 | 294 | ||
| 269 | /* | 295 | /* |
| 270 | * If the inode number maps to a block outside the bounds | ||
| 271 | * of the file system then return NULL rather than calling | ||
| 272 | * read_buf and panicing when we get an error from the | ||
| 273 | * driver. | ||
| 274 | */ | ||
| 275 | if ((imap.im_blkno + imap.im_len) > | ||
| 276 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | ||
| 277 | #ifdef DEBUG | ||
| 278 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " | ||
| 279 | "(imap.im_blkno (0x%llx) " | ||
| 280 | "+ imap.im_len (0x%llx)) > " | ||
| 281 | " XFS_FSB_TO_BB(mp, " | ||
| 282 | "mp->m_sb.sb_dblocks) (0x%llx)", | ||
| 283 | (unsigned long long) imap.im_blkno, | ||
| 284 | (unsigned long long) imap.im_len, | ||
| 285 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); | ||
| 286 | #endif /* DEBUG */ | ||
| 287 | return XFS_ERROR(EINVAL); | ||
| 288 | } | ||
| 289 | |||
| 290 | /* | ||
| 291 | * Fill in the fields in the inode that will be used to | 296 | * Fill in the fields in the inode that will be used to |
| 292 | * map the inode to its buffer from now on. | 297 | * map the inode to its buffer from now on. |
| 293 | */ | 298 | */ |
| @@ -305,76 +310,17 @@ xfs_itobp( | |||
| 305 | } | 310 | } |
| 306 | ASSERT(bno == 0 || bno == imap.im_blkno); | 311 | ASSERT(bno == 0 || bno == imap.im_blkno); |
| 307 | 312 | ||
| 308 | /* | 313 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags); |
| 309 | * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will | 314 | if (error) |
| 310 | * default to just a read_buf() call. | ||
| 311 | */ | ||
| 312 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, | ||
| 313 | (int)imap.im_len, XFS_BUF_LOCK, &bp); | ||
| 314 | if (error) { | ||
| 315 | #ifdef DEBUG | ||
| 316 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " | ||
| 317 | "xfs_trans_read_buf() returned error %d, " | ||
| 318 | "imap.im_blkno 0x%llx, imap.im_len 0x%llx", | ||
| 319 | error, (unsigned long long) imap.im_blkno, | ||
| 320 | (unsigned long long) imap.im_len); | ||
| 321 | #endif /* DEBUG */ | ||
| 322 | return error; | 315 | return error; |
| 323 | } | ||
| 324 | |||
| 325 | /* | ||
| 326 | * Validate the magic number and version of every inode in the buffer | ||
| 327 | * (if DEBUG kernel) or the first inode in the buffer, otherwise. | ||
| 328 | * No validation is done here in userspace (xfs_repair). | ||
| 329 | */ | ||
| 330 | #if !defined(__KERNEL__) | ||
| 331 | ni = 0; | ||
| 332 | #elif defined(DEBUG) | ||
| 333 | ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; | ||
| 334 | #else /* usual case */ | ||
| 335 | ni = 1; | ||
| 336 | #endif | ||
| 337 | |||
| 338 | for (i = 0; i < ni; i++) { | ||
| 339 | int di_ok; | ||
| 340 | xfs_dinode_t *dip; | ||
| 341 | 316 | ||
| 342 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | 317 | if (!bp) { |
| 343 | (i << mp->m_sb.sb_inodelog)); | 318 | ASSERT(buf_flags & XFS_BUF_TRYLOCK); |
| 344 | di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && | 319 | ASSERT(tp == NULL); |
| 345 | XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); | 320 | *bpp = NULL; |
| 346 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | 321 | return EAGAIN; |
| 347 | XFS_ERRTAG_ITOBP_INOTOBP, | ||
| 348 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
| 349 | if (imap_flags & XFS_IMAP_BULKSTAT) { | ||
| 350 | xfs_trans_brelse(tp, bp); | ||
| 351 | return XFS_ERROR(EINVAL); | ||
| 352 | } | ||
| 353 | #ifdef DEBUG | ||
| 354 | cmn_err(CE_ALERT, | ||
| 355 | "Device %s - bad inode magic/vsn " | ||
| 356 | "daddr %lld #%d (magic=%x)", | ||
| 357 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | ||
| 358 | (unsigned long long)imap.im_blkno, i, | ||
| 359 | be16_to_cpu(dip->di_core.di_magic)); | ||
| 360 | #endif | ||
| 361 | XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH, | ||
| 362 | mp, dip); | ||
| 363 | xfs_trans_brelse(tp, bp); | ||
| 364 | return XFS_ERROR(EFSCORRUPTED); | ||
| 365 | } | ||
| 366 | } | 322 | } |
| 367 | 323 | ||
| 368 | xfs_inobp_check(mp, bp); | ||
| 369 | |||
| 370 | /* | ||
| 371 | * Mark the buffer as an inode buffer now that it looks good | ||
| 372 | */ | ||
| 373 | XFS_BUF_SET_VTYPE(bp, B_FS_INO); | ||
| 374 | |||
| 375 | /* | ||
| 376 | * Set *dipp to point to the on-disk inode in the buffer. | ||
| 377 | */ | ||
| 378 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); | 324 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); |
| 379 | *bpp = bp; | 325 | *bpp = bp; |
| 380 | return 0; | 326 | return 0; |
| @@ -878,7 +824,7 @@ xfs_iread( | |||
| 878 | * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will | 824 | * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will |
| 879 | * know that this is a new incore inode. | 825 | * know that this is a new incore inode. |
| 880 | */ | 826 | */ |
| 881 | error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags); | 827 | error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); |
| 882 | if (error) { | 828 | if (error) { |
| 883 | kmem_zone_free(xfs_inode_zone, ip); | 829 | kmem_zone_free(xfs_inode_zone, ip); |
| 884 | return error; | 830 | return error; |
| @@ -1518,51 +1464,50 @@ xfs_itruncate_start( | |||
| 1518 | } | 1464 | } |
| 1519 | 1465 | ||
| 1520 | /* | 1466 | /* |
| 1521 | * Shrink the file to the given new_size. The new | 1467 | * Shrink the file to the given new_size. The new size must be smaller than |
| 1522 | * size must be smaller than the current size. | 1468 | * the current size. This will free up the underlying blocks in the removed |
| 1523 | * This will free up the underlying blocks | 1469 | * range after a call to xfs_itruncate_start() or xfs_atruncate_start(). |
| 1524 | * in the removed range after a call to xfs_itruncate_start() | ||
| 1525 | * or xfs_atruncate_start(). | ||
| 1526 | * | 1470 | * |
| 1527 | * The transaction passed to this routine must have made | 1471 | * The transaction passed to this routine must have made a permanent log |
| 1528 | * a permanent log reservation of at least XFS_ITRUNCATE_LOG_RES. | 1472 | * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the |
| 1529 | * This routine may commit the given transaction and | 1473 | * given transaction and start new ones, so make sure everything involved in |
| 1530 | * start new ones, so make sure everything involved in | 1474 | * the transaction is tidy before calling here. Some transaction will be |
| 1531 | * the transaction is tidy before calling here. | 1475 | * returned to the caller to be committed. The incoming transaction must |
| 1532 | * Some transaction will be returned to the caller to be | 1476 | * already include the inode, and both inode locks must be held exclusively. |
| 1533 | * committed. The incoming transaction must already include | 1477 | * The inode must also be "held" within the transaction. On return the inode |
| 1534 | * the inode, and both inode locks must be held exclusively. | 1478 | * will be "held" within the returned transaction. This routine does NOT |
| 1535 | * The inode must also be "held" within the transaction. On | 1479 | * require any disk space to be reserved for it within the transaction. |
| 1536 | * return the inode will be "held" within the returned transaction. | ||
| 1537 | * This routine does NOT require any disk space to be reserved | ||
| 1538 | * for it within the transaction. | ||
| 1539 | * | 1480 | * |
| 1540 | * The fork parameter must be either xfs_attr_fork or xfs_data_fork, | 1481 | * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it |
| 1541 | * and it indicates the fork which is to be truncated. For the | 1482 | * indicates the fork which is to be truncated. For the attribute fork we only |
| 1542 | * attribute fork we only support truncation to size 0. | 1483 | * support truncation to size 0. |
| 1543 | * | 1484 | * |
| 1544 | * We use the sync parameter to indicate whether or not the first | 1485 | * We use the sync parameter to indicate whether or not the first transaction |
| 1545 | * transaction we perform might have to be synchronous. For the attr fork, | 1486 | * we perform might have to be synchronous. For the attr fork, it needs to be |
| 1546 | * it needs to be so if the unlink of the inode is not yet known to be | 1487 | * so if the unlink of the inode is not yet known to be permanent in the log. |
| 1547 | * permanent in the log. This keeps us from freeing and reusing the | 1488 | * This keeps us from freeing and reusing the blocks of the attribute fork |
| 1548 | * blocks of the attribute fork before the unlink of the inode becomes | 1489 | * before the unlink of the inode becomes permanent. |
| 1549 | * permanent. | ||
| 1550 | * | 1490 | * |
| 1551 | * For the data fork, we normally have to run synchronously if we're | 1491 | * For the data fork, we normally have to run synchronously if we're being |
| 1552 | * being called out of the inactive path or we're being called | 1492 | * called out of the inactive path or we're being called out of the create path |
| 1553 | * out of the create path where we're truncating an existing file. | 1493 | * where we're truncating an existing file. Either way, the truncate needs to |
| 1554 | * Either way, the truncate needs to be sync so blocks don't reappear | 1494 | * be sync so blocks don't reappear in the file with altered data in case of a |
| 1555 | * in the file with altered data in case of a crash. wsync filesystems | 1495 | * crash. wsync filesystems can run the first case async because anything that |
| 1556 | * can run the first case async because anything that shrinks the inode | 1496 | * shrinks the inode has to run sync so by the time we're called here from |
| 1557 | * has to run sync so by the time we're called here from inactive, the | 1497 | * inactive, the inode size is permanently set to 0. |
| 1558 | * inode size is permanently set to 0. | ||
| 1559 | * | 1498 | * |
| 1560 | * Calls from the truncate path always need to be sync unless we're | 1499 | * Calls from the truncate path always need to be sync unless we're in a wsync |
| 1561 | * in a wsync filesystem and the file has already been unlinked. | 1500 | * filesystem and the file has already been unlinked. |
| 1562 | * | 1501 | * |
| 1563 | * The caller is responsible for correctly setting the sync parameter. | 1502 | * The caller is responsible for correctly setting the sync parameter. It gets |
| 1564 | * It gets too hard for us to guess here which path we're being called | 1503 | * too hard for us to guess here which path we're being called out of just |
| 1565 | * out of just based on inode state. | 1504 | * based on inode state. |
| 1505 | * | ||
| 1506 | * If we get an error, we must return with the inode locked and linked into the | ||
| 1507 | * current transaction. This keeps things simple for the higher level code, | ||
| 1508 | * because it always knows that the inode is locked and held in the transaction | ||
| 1509 | * that returns to it whether errors occur or not. We don't mark the inode | ||
| 1510 | * dirty on error so that transactions can be easily aborted if possible. | ||
| 1566 | */ | 1511 | */ |
| 1567 | int | 1512 | int |
| 1568 | xfs_itruncate_finish( | 1513 | xfs_itruncate_finish( |
| @@ -1741,65 +1686,51 @@ xfs_itruncate_finish( | |||
| 1741 | */ | 1686 | */ |
| 1742 | error = xfs_bmap_finish(tp, &free_list, &committed); | 1687 | error = xfs_bmap_finish(tp, &free_list, &committed); |
| 1743 | ntp = *tp; | 1688 | ntp = *tp; |
| 1689 | if (committed) { | ||
| 1690 | /* link the inode into the next xact in the chain */ | ||
| 1691 | xfs_trans_ijoin(ntp, ip, | ||
| 1692 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
| 1693 | xfs_trans_ihold(ntp, ip); | ||
| 1694 | } | ||
| 1695 | |||
| 1744 | if (error) { | 1696 | if (error) { |
| 1745 | /* | 1697 | /* |
| 1746 | * If the bmap finish call encounters an error, | 1698 | * If the bmap finish call encounters an error, return |
| 1747 | * return to the caller where the transaction | 1699 | * to the caller where the transaction can be properly |
| 1748 | * can be properly aborted. We just need to | 1700 | * aborted. We just need to make sure we're not |
| 1749 | * make sure we're not holding any resources | 1701 | * holding any resources that we were not when we came |
| 1750 | * that we were not when we came in. | 1702 | * in. |
| 1751 | * | 1703 | * |
| 1752 | * Aborting from this point might lose some | 1704 | * Aborting from this point might lose some blocks in |
| 1753 | * blocks in the file system, but oh well. | 1705 | * the file system, but oh well. |
| 1754 | */ | 1706 | */ |
| 1755 | xfs_bmap_cancel(&free_list); | 1707 | xfs_bmap_cancel(&free_list); |
| 1756 | if (committed) { | ||
| 1757 | /* | ||
| 1758 | * If the passed in transaction committed | ||
| 1759 | * in xfs_bmap_finish(), then we want to | ||
| 1760 | * add the inode to this one before returning. | ||
| 1761 | * This keeps things simple for the higher | ||
| 1762 | * level code, because it always knows that | ||
| 1763 | * the inode is locked and held in the | ||
| 1764 | * transaction that returns to it whether | ||
| 1765 | * errors occur or not. We don't mark the | ||
| 1766 | * inode dirty so that this transaction can | ||
| 1767 | * be easily aborted if possible. | ||
| 1768 | */ | ||
| 1769 | xfs_trans_ijoin(ntp, ip, | ||
| 1770 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
| 1771 | xfs_trans_ihold(ntp, ip); | ||
| 1772 | } | ||
| 1773 | return error; | 1708 | return error; |
| 1774 | } | 1709 | } |
| 1775 | 1710 | ||
| 1776 | if (committed) { | 1711 | if (committed) { |
| 1777 | /* | 1712 | /* |
| 1778 | * The first xact was committed, | 1713 | * Mark the inode dirty so it will be logged and |
| 1779 | * so add the inode to the new one. | 1714 | * moved forward in the log as part of every commit. |
| 1780 | * Mark it dirty so it will be logged | ||
| 1781 | * and moved forward in the log as | ||
| 1782 | * part of every commit. | ||
| 1783 | */ | 1715 | */ |
| 1784 | xfs_trans_ijoin(ntp, ip, | ||
| 1785 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
| 1786 | xfs_trans_ihold(ntp, ip); | ||
| 1787 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); | 1716 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); |
| 1788 | } | 1717 | } |
| 1718 | |||
| 1789 | ntp = xfs_trans_dup(ntp); | 1719 | ntp = xfs_trans_dup(ntp); |
| 1790 | (void) xfs_trans_commit(*tp, 0); | 1720 | error = xfs_trans_commit(*tp, 0); |
| 1791 | *tp = ntp; | 1721 | *tp = ntp; |
| 1792 | error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, | 1722 | |
| 1793 | XFS_TRANS_PERM_LOG_RES, | 1723 | /* link the inode into the next transaction in the chain */ |
| 1794 | XFS_ITRUNCATE_LOG_COUNT); | ||
| 1795 | /* | ||
| 1796 | * Add the inode being truncated to the next chained | ||
| 1797 | * transaction. | ||
| 1798 | */ | ||
| 1799 | xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 1724 | xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
| 1800 | xfs_trans_ihold(ntp, ip); | 1725 | xfs_trans_ihold(ntp, ip); |
| 1726 | |||
| 1727 | if (!error) | ||
| 1728 | error = xfs_trans_reserve(ntp, 0, | ||
| 1729 | XFS_ITRUNCATE_LOG_RES(mp), 0, | ||
| 1730 | XFS_TRANS_PERM_LOG_RES, | ||
| 1731 | XFS_ITRUNCATE_LOG_COUNT); | ||
| 1801 | if (error) | 1732 | if (error) |
| 1802 | return (error); | 1733 | return error; |
| 1803 | } | 1734 | } |
| 1804 | /* | 1735 | /* |
| 1805 | * Only update the size in the case of the data fork, but | 1736 | * Only update the size in the case of the data fork, but |
| @@ -1967,7 +1898,7 @@ xfs_iunlink( | |||
| 1967 | * Here we put the head pointer into our next pointer, | 1898 | * Here we put the head pointer into our next pointer, |
| 1968 | * and then we fall through to point the head at us. | 1899 | * and then we fall through to point the head at us. |
| 1969 | */ | 1900 | */ |
| 1970 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); | 1901 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
| 1971 | if (error) | 1902 | if (error) |
| 1972 | return error; | 1903 | return error; |
| 1973 | 1904 | ||
| @@ -2075,7 +2006,7 @@ xfs_iunlink_remove( | |||
| 2075 | * of dealing with the buffer when there is no need to | 2006 | * of dealing with the buffer when there is no need to |
| 2076 | * change it. | 2007 | * change it. |
| 2077 | */ | 2008 | */ |
| 2078 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); | 2009 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
| 2079 | if (error) { | 2010 | if (error) { |
| 2080 | cmn_err(CE_WARN, | 2011 | cmn_err(CE_WARN, |
| 2081 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 2012 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", |
| @@ -2137,7 +2068,7 @@ xfs_iunlink_remove( | |||
| 2137 | * Now last_ibp points to the buffer previous to us on | 2068 | * Now last_ibp points to the buffer previous to us on |
| 2138 | * the unlinked list. Pull us from the list. | 2069 | * the unlinked list. Pull us from the list. |
| 2139 | */ | 2070 | */ |
| 2140 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); | 2071 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
| 2141 | if (error) { | 2072 | if (error) { |
| 2142 | cmn_err(CE_WARN, | 2073 | cmn_err(CE_WARN, |
| 2143 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 2074 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", |
| @@ -2172,13 +2103,6 @@ xfs_iunlink_remove( | |||
| 2172 | return 0; | 2103 | return 0; |
| 2173 | } | 2104 | } |
| 2174 | 2105 | ||
| 2175 | STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip) | ||
| 2176 | { | ||
| 2177 | return (((ip->i_itemp == NULL) || | ||
| 2178 | !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && | ||
| 2179 | (ip->i_update_core == 0)); | ||
| 2180 | } | ||
| 2181 | |||
| 2182 | STATIC void | 2106 | STATIC void |
| 2183 | xfs_ifree_cluster( | 2107 | xfs_ifree_cluster( |
| 2184 | xfs_inode_t *free_ip, | 2108 | xfs_inode_t *free_ip, |
| @@ -2400,7 +2324,7 @@ xfs_ifree( | |||
| 2400 | 2324 | ||
| 2401 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2325 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
| 2402 | 2326 | ||
| 2403 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0); | 2327 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
| 2404 | if (error) | 2328 | if (error) |
| 2405 | return error; | 2329 | return error; |
| 2406 | 2330 | ||
| @@ -2678,14 +2602,31 @@ xfs_imap( | |||
| 2678 | fsbno = imap->im_blkno ? | 2602 | fsbno = imap->im_blkno ? |
| 2679 | XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; | 2603 | XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; |
| 2680 | error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); | 2604 | error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); |
| 2681 | if (error != 0) { | 2605 | if (error) |
| 2682 | return error; | 2606 | return error; |
| 2683 | } | 2607 | |
| 2684 | imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); | 2608 | imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); |
| 2685 | imap->im_len = XFS_FSB_TO_BB(mp, len); | 2609 | imap->im_len = XFS_FSB_TO_BB(mp, len); |
| 2686 | imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); | 2610 | imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); |
| 2687 | imap->im_ioffset = (ushort)off; | 2611 | imap->im_ioffset = (ushort)off; |
| 2688 | imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); | 2612 | imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); |
| 2613 | |||
| 2614 | /* | ||
| 2615 | * If the inode number maps to a block outside the bounds | ||
| 2616 | * of the file system then return NULL rather than calling | ||
| 2617 | * read_buf and panicing when we get an error from the | ||
| 2618 | * driver. | ||
| 2619 | */ | ||
| 2620 | if ((imap->im_blkno + imap->im_len) > | ||
| 2621 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | ||
| 2622 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | ||
| 2623 | "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " | ||
| 2624 | " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", | ||
| 2625 | (unsigned long long) imap->im_blkno, | ||
| 2626 | (unsigned long long) imap->im_len, | ||
| 2627 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); | ||
| 2628 | return EINVAL; | ||
| 2629 | } | ||
| 2689 | return 0; | 2630 | return 0; |
| 2690 | } | 2631 | } |
| 2691 | 2632 | ||
| @@ -2826,38 +2767,41 @@ xfs_iunpin( | |||
| 2826 | } | 2767 | } |
| 2827 | 2768 | ||
| 2828 | /* | 2769 | /* |
| 2829 | * This is called to wait for the given inode to be unpinned. | 2770 | * This is called to unpin an inode. It can be directed to wait or to return |
| 2830 | * It will sleep until this happens. The caller must have the | 2771 | * immediately without waiting for the inode to be unpinned. The caller must |
| 2831 | * inode locked in at least shared mode so that the buffer cannot | 2772 | * have the inode locked in at least shared mode so that the buffer cannot be |
| 2832 | * be subsequently pinned once someone is waiting for it to be | 2773 | * subsequently pinned once someone is waiting for it to be unpinned. |
| 2833 | * unpinned. | ||
| 2834 | */ | 2774 | */ |
| 2835 | STATIC void | 2775 | STATIC void |
| 2836 | xfs_iunpin_wait( | 2776 | __xfs_iunpin_wait( |
| 2837 | xfs_inode_t *ip) | 2777 | xfs_inode_t *ip, |
| 2778 | int wait) | ||
| 2838 | { | 2779 | { |
| 2839 | xfs_inode_log_item_t *iip; | 2780 | xfs_inode_log_item_t *iip = ip->i_itemp; |
| 2840 | xfs_lsn_t lsn; | ||
| 2841 | 2781 | ||
| 2842 | ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); | 2782 | ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); |
| 2843 | 2783 | if (atomic_read(&ip->i_pincount) == 0) | |
| 2844 | if (atomic_read(&ip->i_pincount) == 0) { | ||
| 2845 | return; | 2784 | return; |
| 2846 | } | ||
| 2847 | 2785 | ||
| 2848 | iip = ip->i_itemp; | 2786 | /* Give the log a push to start the unpinning I/O */ |
| 2849 | if (iip && iip->ili_last_lsn) { | 2787 | xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? |
| 2850 | lsn = iip->ili_last_lsn; | 2788 | iip->ili_last_lsn : 0, XFS_LOG_FORCE); |
| 2851 | } else { | 2789 | if (wait) |
| 2852 | lsn = (xfs_lsn_t)0; | 2790 | wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); |
| 2853 | } | 2791 | } |
| 2854 | 2792 | ||
| 2855 | /* | 2793 | static inline void |
| 2856 | * Give the log a push so we don't wait here too long. | 2794 | xfs_iunpin_wait( |
| 2857 | */ | 2795 | xfs_inode_t *ip) |
| 2858 | xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE); | 2796 | { |
| 2797 | __xfs_iunpin_wait(ip, 1); | ||
| 2798 | } | ||
| 2859 | 2799 | ||
| 2860 | wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); | 2800 | static inline void |
| 2801 | xfs_iunpin_nowait( | ||
| 2802 | xfs_inode_t *ip) | ||
| 2803 | { | ||
| 2804 | __xfs_iunpin_wait(ip, 0); | ||
| 2861 | } | 2805 | } |
| 2862 | 2806 | ||
| 2863 | 2807 | ||
| @@ -2932,7 +2876,7 @@ xfs_iextents_copy( | |||
| 2932 | * format indicates the current state of the fork. | 2876 | * format indicates the current state of the fork. |
| 2933 | */ | 2877 | */ |
| 2934 | /*ARGSUSED*/ | 2878 | /*ARGSUSED*/ |
| 2935 | STATIC int | 2879 | STATIC void |
| 2936 | xfs_iflush_fork( | 2880 | xfs_iflush_fork( |
| 2937 | xfs_inode_t *ip, | 2881 | xfs_inode_t *ip, |
| 2938 | xfs_dinode_t *dip, | 2882 | xfs_dinode_t *dip, |
| @@ -2953,16 +2897,16 @@ xfs_iflush_fork( | |||
| 2953 | static const short extflag[2] = | 2897 | static const short extflag[2] = |
| 2954 | { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; | 2898 | { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; |
| 2955 | 2899 | ||
| 2956 | if (iip == NULL) | 2900 | if (!iip) |
| 2957 | return 0; | 2901 | return; |
| 2958 | ifp = XFS_IFORK_PTR(ip, whichfork); | 2902 | ifp = XFS_IFORK_PTR(ip, whichfork); |
| 2959 | /* | 2903 | /* |
| 2960 | * This can happen if we gave up in iformat in an error path, | 2904 | * This can happen if we gave up in iformat in an error path, |
| 2961 | * for the attribute fork. | 2905 | * for the attribute fork. |
| 2962 | */ | 2906 | */ |
| 2963 | if (ifp == NULL) { | 2907 | if (!ifp) { |
| 2964 | ASSERT(whichfork == XFS_ATTR_FORK); | 2908 | ASSERT(whichfork == XFS_ATTR_FORK); |
| 2965 | return 0; | 2909 | return; |
| 2966 | } | 2910 | } |
| 2967 | cp = XFS_DFORK_PTR(dip, whichfork); | 2911 | cp = XFS_DFORK_PTR(dip, whichfork); |
| 2968 | mp = ip->i_mount; | 2912 | mp = ip->i_mount; |
| @@ -3023,8 +2967,145 @@ xfs_iflush_fork( | |||
| 3023 | ASSERT(0); | 2967 | ASSERT(0); |
| 3024 | break; | 2968 | break; |
| 3025 | } | 2969 | } |
| 2970 | } | ||
| 2971 | |||
| 2972 | STATIC int | ||
| 2973 | xfs_iflush_cluster( | ||
| 2974 | xfs_inode_t *ip, | ||
| 2975 | xfs_buf_t *bp) | ||
| 2976 | { | ||
| 2977 | xfs_mount_t *mp = ip->i_mount; | ||
| 2978 | xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); | ||
| 2979 | unsigned long first_index, mask; | ||
| 2980 | int ilist_size; | ||
| 2981 | xfs_inode_t **ilist; | ||
| 2982 | xfs_inode_t *iq; | ||
| 2983 | int nr_found; | ||
| 2984 | int clcount = 0; | ||
| 2985 | int bufwasdelwri; | ||
| 2986 | int i; | ||
| 2987 | |||
| 2988 | ASSERT(pag->pagi_inodeok); | ||
| 2989 | ASSERT(pag->pag_ici_init); | ||
| 2990 | |||
| 2991 | ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); | ||
| 2992 | ilist = kmem_alloc(ilist_size, KM_MAYFAIL); | ||
| 2993 | if (!ilist) | ||
| 2994 | return 0; | ||
| 2995 | |||
| 2996 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | ||
| 2997 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; | ||
| 2998 | read_lock(&pag->pag_ici_lock); | ||
| 2999 | /* really need a gang lookup range call here */ | ||
| 3000 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, | ||
| 3001 | first_index, | ||
| 3002 | XFS_INODE_CLUSTER_SIZE(mp)); | ||
| 3003 | if (nr_found == 0) | ||
| 3004 | goto out_free; | ||
| 3005 | |||
| 3006 | for (i = 0; i < nr_found; i++) { | ||
| 3007 | iq = ilist[i]; | ||
| 3008 | if (iq == ip) | ||
| 3009 | continue; | ||
| 3010 | /* if the inode lies outside this cluster, we're done. */ | ||
| 3011 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) | ||
| 3012 | break; | ||
| 3013 | /* | ||
| 3014 | * Do an un-protected check to see if the inode is dirty and | ||
| 3015 | * is a candidate for flushing. These checks will be repeated | ||
| 3016 | * later after the appropriate locks are acquired. | ||
| 3017 | */ | ||
| 3018 | if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) | ||
| 3019 | continue; | ||
| 3020 | |||
| 3021 | /* | ||
| 3022 | * Try to get locks. If any are unavailable or it is pinned, | ||
| 3023 | * then this inode cannot be flushed and is skipped. | ||
| 3024 | */ | ||
| 3025 | |||
| 3026 | if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) | ||
| 3027 | continue; | ||
| 3028 | if (!xfs_iflock_nowait(iq)) { | ||
| 3029 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
| 3030 | continue; | ||
| 3031 | } | ||
| 3032 | if (xfs_ipincount(iq)) { | ||
| 3033 | xfs_ifunlock(iq); | ||
| 3034 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
| 3035 | continue; | ||
| 3036 | } | ||
| 3037 | |||
| 3038 | /* | ||
| 3039 | * arriving here means that this inode can be flushed. First | ||
| 3040 | * re-check that it's dirty before flushing. | ||
| 3041 | */ | ||
| 3042 | if (!xfs_inode_clean(iq)) { | ||
| 3043 | int error; | ||
| 3044 | error = xfs_iflush_int(iq, bp); | ||
| 3045 | if (error) { | ||
| 3046 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
| 3047 | goto cluster_corrupt_out; | ||
| 3048 | } | ||
| 3049 | clcount++; | ||
| 3050 | } else { | ||
| 3051 | xfs_ifunlock(iq); | ||
| 3052 | } | ||
| 3053 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
| 3054 | } | ||
| 3055 | |||
| 3056 | if (clcount) { | ||
| 3057 | XFS_STATS_INC(xs_icluster_flushcnt); | ||
| 3058 | XFS_STATS_ADD(xs_icluster_flushinode, clcount); | ||
| 3059 | } | ||
| 3026 | 3060 | ||
| 3061 | out_free: | ||
| 3062 | read_unlock(&pag->pag_ici_lock); | ||
| 3063 | kmem_free(ilist, ilist_size); | ||
| 3027 | return 0; | 3064 | return 0; |
| 3065 | |||
| 3066 | |||
| 3067 | cluster_corrupt_out: | ||
| 3068 | /* | ||
| 3069 | * Corruption detected in the clustering loop. Invalidate the | ||
| 3070 | * inode buffer and shut down the filesystem. | ||
| 3071 | */ | ||
| 3072 | read_unlock(&pag->pag_ici_lock); | ||
| 3073 | /* | ||
| 3074 | * Clean up the buffer. If it was B_DELWRI, just release it -- | ||
| 3075 | * brelse can handle it with no problems. If not, shut down the | ||
| 3076 | * filesystem before releasing the buffer. | ||
| 3077 | */ | ||
| 3078 | bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); | ||
| 3079 | if (bufwasdelwri) | ||
| 3080 | xfs_buf_relse(bp); | ||
| 3081 | |||
| 3082 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
| 3083 | |||
| 3084 | if (!bufwasdelwri) { | ||
| 3085 | /* | ||
| 3086 | * Just like incore_relse: if we have b_iodone functions, | ||
| 3087 | * mark the buffer as an error and call them. Otherwise | ||
| 3088 | * mark it as stale and brelse. | ||
| 3089 | */ | ||
| 3090 | if (XFS_BUF_IODONE_FUNC(bp)) { | ||
| 3091 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | ||
| 3092 | XFS_BUF_UNDONE(bp); | ||
| 3093 | XFS_BUF_STALE(bp); | ||
| 3094 | XFS_BUF_SHUT(bp); | ||
| 3095 | XFS_BUF_ERROR(bp,EIO); | ||
| 3096 | xfs_biodone(bp); | ||
| 3097 | } else { | ||
| 3098 | XFS_BUF_STALE(bp); | ||
| 3099 | xfs_buf_relse(bp); | ||
| 3100 | } | ||
| 3101 | } | ||
| 3102 | |||
| 3103 | /* | ||
| 3104 | * Unlocks the flush lock | ||
| 3105 | */ | ||
| 3106 | xfs_iflush_abort(iq); | ||
| 3107 | kmem_free(ilist, ilist_size); | ||
| 3108 | return XFS_ERROR(EFSCORRUPTED); | ||
| 3028 | } | 3109 | } |
| 3029 | 3110 | ||
| 3030 | /* | 3111 | /* |
| @@ -3046,11 +3127,7 @@ xfs_iflush( | |||
| 3046 | xfs_dinode_t *dip; | 3127 | xfs_dinode_t *dip; |
| 3047 | xfs_mount_t *mp; | 3128 | xfs_mount_t *mp; |
| 3048 | int error; | 3129 | int error; |
| 3049 | /* REFERENCED */ | 3130 | int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); |
| 3050 | xfs_inode_t *iq; | ||
| 3051 | int clcount; /* count of inodes clustered */ | ||
| 3052 | int bufwasdelwri; | ||
| 3053 | struct hlist_node *entry; | ||
| 3054 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; | 3131 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; |
| 3055 | 3132 | ||
| 3056 | XFS_STATS_INC(xs_iflush_count); | 3133 | XFS_STATS_INC(xs_iflush_count); |
| @@ -3067,8 +3144,7 @@ xfs_iflush( | |||
| 3067 | * If the inode isn't dirty, then just release the inode | 3144 | * If the inode isn't dirty, then just release the inode |
| 3068 | * flush lock and do nothing. | 3145 | * flush lock and do nothing. |
| 3069 | */ | 3146 | */ |
| 3070 | if ((ip->i_update_core == 0) && | 3147 | if (xfs_inode_clean(ip)) { |
| 3071 | ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
| 3072 | ASSERT((iip != NULL) ? | 3148 | ASSERT((iip != NULL) ? |
| 3073 | !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); | 3149 | !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); |
| 3074 | xfs_ifunlock(ip); | 3150 | xfs_ifunlock(ip); |
| @@ -3076,11 +3152,21 @@ xfs_iflush( | |||
| 3076 | } | 3152 | } |
| 3077 | 3153 | ||
| 3078 | /* | 3154 | /* |
| 3079 | * We can't flush the inode until it is unpinned, so | 3155 | * We can't flush the inode until it is unpinned, so wait for it if we |
| 3080 | * wait for it. We know noone new can pin it, because | 3156 | * are allowed to block. We know noone new can pin it, because we are |
| 3081 | * we are holding the inode lock shared and you need | 3157 | * holding the inode lock shared and you need to hold it exclusively to |
| 3082 | * to hold it exclusively to pin the inode. | 3158 | * pin the inode. |
| 3159 | * | ||
| 3160 | * If we are not allowed to block, force the log out asynchronously so | ||
| 3161 | * that when we come back the inode will be unpinned. If other inodes | ||
| 3162 | * in the same cluster are dirty, they will probably write the inode | ||
| 3163 | * out for us if they occur after the log force completes. | ||
| 3083 | */ | 3164 | */ |
| 3165 | if (noblock && xfs_ipincount(ip)) { | ||
| 3166 | xfs_iunpin_nowait(ip); | ||
| 3167 | xfs_ifunlock(ip); | ||
| 3168 | return EAGAIN; | ||
| 3169 | } | ||
| 3084 | xfs_iunpin_wait(ip); | 3170 | xfs_iunpin_wait(ip); |
| 3085 | 3171 | ||
| 3086 | /* | 3172 | /* |
| @@ -3097,15 +3183,6 @@ xfs_iflush( | |||
| 3097 | } | 3183 | } |
| 3098 | 3184 | ||
| 3099 | /* | 3185 | /* |
| 3100 | * Get the buffer containing the on-disk inode. | ||
| 3101 | */ | ||
| 3102 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0); | ||
| 3103 | if (error) { | ||
| 3104 | xfs_ifunlock(ip); | ||
| 3105 | return error; | ||
| 3106 | } | ||
| 3107 | |||
| 3108 | /* | ||
| 3109 | * Decide how buffer will be flushed out. This is done before | 3186 | * Decide how buffer will be flushed out. This is done before |
| 3110 | * the call to xfs_iflush_int because this field is zeroed by it. | 3187 | * the call to xfs_iflush_int because this field is zeroed by it. |
| 3111 | */ | 3188 | */ |
| @@ -3121,6 +3198,7 @@ xfs_iflush( | |||
| 3121 | case XFS_IFLUSH_DELWRI_ELSE_SYNC: | 3198 | case XFS_IFLUSH_DELWRI_ELSE_SYNC: |
| 3122 | flags = 0; | 3199 | flags = 0; |
| 3123 | break; | 3200 | break; |
| 3201 | case XFS_IFLUSH_ASYNC_NOBLOCK: | ||
| 3124 | case XFS_IFLUSH_ASYNC: | 3202 | case XFS_IFLUSH_ASYNC: |
| 3125 | case XFS_IFLUSH_DELWRI_ELSE_ASYNC: | 3203 | case XFS_IFLUSH_DELWRI_ELSE_ASYNC: |
| 3126 | flags = INT_ASYNC; | 3204 | flags = INT_ASYNC; |
| @@ -3140,6 +3218,7 @@ xfs_iflush( | |||
| 3140 | case XFS_IFLUSH_DELWRI: | 3218 | case XFS_IFLUSH_DELWRI: |
| 3141 | flags = INT_DELWRI; | 3219 | flags = INT_DELWRI; |
| 3142 | break; | 3220 | break; |
| 3221 | case XFS_IFLUSH_ASYNC_NOBLOCK: | ||
| 3143 | case XFS_IFLUSH_ASYNC: | 3222 | case XFS_IFLUSH_ASYNC: |
| 3144 | flags = INT_ASYNC; | 3223 | flags = INT_ASYNC; |
| 3145 | break; | 3224 | break; |
| @@ -3154,94 +3233,41 @@ xfs_iflush( | |||
| 3154 | } | 3233 | } |
| 3155 | 3234 | ||
| 3156 | /* | 3235 | /* |
| 3157 | * First flush out the inode that xfs_iflush was called with. | 3236 | * Get the buffer containing the on-disk inode. |
| 3158 | */ | 3237 | */ |
| 3159 | error = xfs_iflush_int(ip, bp); | 3238 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0, |
| 3160 | if (error) { | 3239 | noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); |
| 3161 | goto corrupt_out; | 3240 | if (error || !bp) { |
| 3241 | xfs_ifunlock(ip); | ||
| 3242 | return error; | ||
| 3162 | } | 3243 | } |
| 3163 | 3244 | ||
| 3164 | /* | 3245 | /* |
| 3165 | * inode clustering: | 3246 | * First flush out the inode that xfs_iflush was called with. |
| 3166 | * see if other inodes can be gathered into this write | ||
| 3167 | */ | 3247 | */ |
| 3168 | spin_lock(&ip->i_cluster->icl_lock); | 3248 | error = xfs_iflush_int(ip, bp); |
| 3169 | ip->i_cluster->icl_buf = bp; | 3249 | if (error) |
| 3170 | 3250 | goto corrupt_out; | |
| 3171 | clcount = 0; | ||
| 3172 | hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) { | ||
| 3173 | if (iq == ip) | ||
| 3174 | continue; | ||
| 3175 | |||
| 3176 | /* | ||
| 3177 | * Do an un-protected check to see if the inode is dirty and | ||
| 3178 | * is a candidate for flushing. These checks will be repeated | ||
| 3179 | * later after the appropriate locks are acquired. | ||
| 3180 | */ | ||
| 3181 | iip = iq->i_itemp; | ||
| 3182 | if ((iq->i_update_core == 0) && | ||
| 3183 | ((iip == NULL) || | ||
| 3184 | !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) && | ||
| 3185 | xfs_ipincount(iq) == 0) { | ||
| 3186 | continue; | ||
| 3187 | } | ||
| 3188 | |||
| 3189 | /* | ||
| 3190 | * Try to get locks. If any are unavailable, | ||
| 3191 | * then this inode cannot be flushed and is skipped. | ||
| 3192 | */ | ||
| 3193 | |||
| 3194 | /* get inode locks (just i_lock) */ | ||
| 3195 | if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) { | ||
| 3196 | /* get inode flush lock */ | ||
| 3197 | if (xfs_iflock_nowait(iq)) { | ||
| 3198 | /* check if pinned */ | ||
| 3199 | if (xfs_ipincount(iq) == 0) { | ||
| 3200 | /* arriving here means that | ||
| 3201 | * this inode can be flushed. | ||
| 3202 | * first re-check that it's | ||
| 3203 | * dirty | ||
| 3204 | */ | ||
| 3205 | iip = iq->i_itemp; | ||
| 3206 | if ((iq->i_update_core != 0)|| | ||
| 3207 | ((iip != NULL) && | ||
| 3208 | (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
| 3209 | clcount++; | ||
| 3210 | error = xfs_iflush_int(iq, bp); | ||
| 3211 | if (error) { | ||
| 3212 | xfs_iunlock(iq, | ||
| 3213 | XFS_ILOCK_SHARED); | ||
| 3214 | goto cluster_corrupt_out; | ||
| 3215 | } | ||
| 3216 | } else { | ||
| 3217 | xfs_ifunlock(iq); | ||
| 3218 | } | ||
| 3219 | } else { | ||
| 3220 | xfs_ifunlock(iq); | ||
| 3221 | } | ||
| 3222 | } | ||
| 3223 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
| 3224 | } | ||
| 3225 | } | ||
| 3226 | spin_unlock(&ip->i_cluster->icl_lock); | ||
| 3227 | |||
| 3228 | if (clcount) { | ||
| 3229 | XFS_STATS_INC(xs_icluster_flushcnt); | ||
| 3230 | XFS_STATS_ADD(xs_icluster_flushinode, clcount); | ||
| 3231 | } | ||
| 3232 | 3251 | ||
| 3233 | /* | 3252 | /* |
| 3234 | * If the buffer is pinned then push on the log so we won't | 3253 | * If the buffer is pinned then push on the log now so we won't |
| 3235 | * get stuck waiting in the write for too long. | 3254 | * get stuck waiting in the write for too long. |
| 3236 | */ | 3255 | */ |
| 3237 | if (XFS_BUF_ISPINNED(bp)){ | 3256 | if (XFS_BUF_ISPINNED(bp)) |
| 3238 | xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); | 3257 | xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); |
| 3239 | } | 3258 | |
| 3259 | /* | ||
| 3260 | * inode clustering: | ||
| 3261 | * see if other inodes can be gathered into this write | ||
| 3262 | */ | ||
| 3263 | error = xfs_iflush_cluster(ip, bp); | ||
| 3264 | if (error) | ||
| 3265 | goto cluster_corrupt_out; | ||
| 3240 | 3266 | ||
| 3241 | if (flags & INT_DELWRI) { | 3267 | if (flags & INT_DELWRI) { |
| 3242 | xfs_bdwrite(mp, bp); | 3268 | xfs_bdwrite(mp, bp); |
| 3243 | } else if (flags & INT_ASYNC) { | 3269 | } else if (flags & INT_ASYNC) { |
| 3244 | xfs_bawrite(mp, bp); | 3270 | error = xfs_bawrite(mp, bp); |
| 3245 | } else { | 3271 | } else { |
| 3246 | error = xfs_bwrite(mp, bp); | 3272 | error = xfs_bwrite(mp, bp); |
| 3247 | } | 3273 | } |
| @@ -3250,52 +3276,11 @@ xfs_iflush( | |||
| 3250 | corrupt_out: | 3276 | corrupt_out: |
| 3251 | xfs_buf_relse(bp); | 3277 | xfs_buf_relse(bp); |
| 3252 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 3278 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
| 3253 | xfs_iflush_abort(ip); | ||
| 3254 | /* | ||
| 3255 | * Unlocks the flush lock | ||
| 3256 | */ | ||
| 3257 | return XFS_ERROR(EFSCORRUPTED); | ||
| 3258 | |||
| 3259 | cluster_corrupt_out: | 3279 | cluster_corrupt_out: |
| 3260 | /* Corruption detected in the clustering loop. Invalidate the | ||
| 3261 | * inode buffer and shut down the filesystem. | ||
| 3262 | */ | ||
| 3263 | spin_unlock(&ip->i_cluster->icl_lock); | ||
| 3264 | |||
| 3265 | /* | ||
| 3266 | * Clean up the buffer. If it was B_DELWRI, just release it -- | ||
| 3267 | * brelse can handle it with no problems. If not, shut down the | ||
| 3268 | * filesystem before releasing the buffer. | ||
| 3269 | */ | ||
| 3270 | if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) { | ||
| 3271 | xfs_buf_relse(bp); | ||
| 3272 | } | ||
| 3273 | |||
| 3274 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
| 3275 | |||
| 3276 | if(!bufwasdelwri) { | ||
| 3277 | /* | ||
| 3278 | * Just like incore_relse: if we have b_iodone functions, | ||
| 3279 | * mark the buffer as an error and call them. Otherwise | ||
| 3280 | * mark it as stale and brelse. | ||
| 3281 | */ | ||
| 3282 | if (XFS_BUF_IODONE_FUNC(bp)) { | ||
| 3283 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | ||
| 3284 | XFS_BUF_UNDONE(bp); | ||
| 3285 | XFS_BUF_STALE(bp); | ||
| 3286 | XFS_BUF_SHUT(bp); | ||
| 3287 | XFS_BUF_ERROR(bp,EIO); | ||
| 3288 | xfs_biodone(bp); | ||
| 3289 | } else { | ||
| 3290 | XFS_BUF_STALE(bp); | ||
| 3291 | xfs_buf_relse(bp); | ||
| 3292 | } | ||
| 3293 | } | ||
| 3294 | |||
| 3295 | xfs_iflush_abort(iq); | ||
| 3296 | /* | 3280 | /* |
| 3297 | * Unlocks the flush lock | 3281 | * Unlocks the flush lock |
| 3298 | */ | 3282 | */ |
| 3283 | xfs_iflush_abort(ip); | ||
| 3299 | return XFS_ERROR(EFSCORRUPTED); | 3284 | return XFS_ERROR(EFSCORRUPTED); |
| 3300 | } | 3285 | } |
| 3301 | 3286 | ||
| @@ -3325,8 +3310,7 @@ xfs_iflush_int( | |||
| 3325 | * If the inode isn't dirty, then just release the inode | 3310 | * If the inode isn't dirty, then just release the inode |
| 3326 | * flush lock and do nothing. | 3311 | * flush lock and do nothing. |
| 3327 | */ | 3312 | */ |
| 3328 | if ((ip->i_update_core == 0) && | 3313 | if (xfs_inode_clean(ip)) { |
| 3329 | ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
| 3330 | xfs_ifunlock(ip); | 3314 | xfs_ifunlock(ip); |
| 3331 | return 0; | 3315 | return 0; |
| 3332 | } | 3316 | } |
| @@ -3459,16 +3443,9 @@ xfs_iflush_int( | |||
| 3459 | } | 3443 | } |
| 3460 | } | 3444 | } |
| 3461 | 3445 | ||
| 3462 | if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) { | 3446 | xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); |
| 3463 | goto corrupt_out; | 3447 | if (XFS_IFORK_Q(ip)) |
| 3464 | } | 3448 | xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); |
| 3465 | |||
| 3466 | if (XFS_IFORK_Q(ip)) { | ||
| 3467 | /* | ||
| 3468 | * The only error from xfs_iflush_fork is on the data fork. | ||
| 3469 | */ | ||
| 3470 | (void) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); | ||
| 3471 | } | ||
| 3472 | xfs_inobp_check(mp, bp); | 3449 | xfs_inobp_check(mp, bp); |
| 3473 | 3450 | ||
| 3474 | /* | 3451 | /* |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index bfcd72cbaeea..93c37697a72c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
| @@ -133,19 +133,6 @@ typedef struct dm_attrs_s { | |||
| 133 | } dm_attrs_t; | 133 | } dm_attrs_t; |
| 134 | 134 | ||
| 135 | /* | 135 | /* |
| 136 | * This is the xfs inode cluster structure. This structure is used by | ||
| 137 | * xfs_iflush to find inodes that share a cluster and can be flushed to disk at | ||
| 138 | * the same time. | ||
| 139 | */ | ||
| 140 | typedef struct xfs_icluster { | ||
| 141 | struct hlist_head icl_inodes; /* list of inodes on cluster */ | ||
| 142 | xfs_daddr_t icl_blkno; /* starting block number of | ||
| 143 | * the cluster */ | ||
| 144 | struct xfs_buf *icl_buf; /* the inode buffer */ | ||
| 145 | spinlock_t icl_lock; /* inode list lock */ | ||
| 146 | } xfs_icluster_t; | ||
| 147 | |||
| 148 | /* | ||
| 149 | * This is the xfs in-core inode structure. | 136 | * This is the xfs in-core inode structure. |
| 150 | * Most of the on-disk inode is embedded in the i_d field. | 137 | * Most of the on-disk inode is embedded in the i_d field. |
| 151 | * | 138 | * |
| @@ -240,10 +227,6 @@ typedef struct xfs_inode { | |||
| 240 | atomic_t i_pincount; /* inode pin count */ | 227 | atomic_t i_pincount; /* inode pin count */ |
| 241 | wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ | 228 | wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ |
| 242 | spinlock_t i_flags_lock; /* inode i_flags lock */ | 229 | spinlock_t i_flags_lock; /* inode i_flags lock */ |
| 243 | #ifdef HAVE_REFCACHE | ||
| 244 | struct xfs_inode **i_refcache; /* ptr to entry in ref cache */ | ||
| 245 | struct xfs_inode *i_release; /* inode to unref */ | ||
| 246 | #endif | ||
| 247 | /* Miscellaneous state. */ | 230 | /* Miscellaneous state. */ |
| 248 | unsigned short i_flags; /* see defined flags below */ | 231 | unsigned short i_flags; /* see defined flags below */ |
| 249 | unsigned char i_update_core; /* timestamps/size is dirty */ | 232 | unsigned char i_update_core; /* timestamps/size is dirty */ |
| @@ -252,8 +235,6 @@ typedef struct xfs_inode { | |||
| 252 | unsigned int i_delayed_blks; /* count of delay alloc blks */ | 235 | unsigned int i_delayed_blks; /* count of delay alloc blks */ |
| 253 | 236 | ||
| 254 | xfs_icdinode_t i_d; /* most of ondisk inode */ | 237 | xfs_icdinode_t i_d; /* most of ondisk inode */ |
| 255 | xfs_icluster_t *i_cluster; /* cluster list header */ | ||
| 256 | struct hlist_node i_cnode; /* cluster link node */ | ||
| 257 | 238 | ||
| 258 | xfs_fsize_t i_size; /* in-memory size */ | 239 | xfs_fsize_t i_size; /* in-memory size */ |
| 259 | xfs_fsize_t i_new_size; /* size when write completes */ | 240 | xfs_fsize_t i_new_size; /* size when write completes */ |
| @@ -461,6 +442,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) | |||
| 461 | #define XFS_IFLUSH_SYNC 3 | 442 | #define XFS_IFLUSH_SYNC 3 |
| 462 | #define XFS_IFLUSH_ASYNC 4 | 443 | #define XFS_IFLUSH_ASYNC 4 |
| 463 | #define XFS_IFLUSH_DELWRI 5 | 444 | #define XFS_IFLUSH_DELWRI 5 |
| 445 | #define XFS_IFLUSH_ASYNC_NOBLOCK 6 | ||
| 464 | 446 | ||
| 465 | /* | 447 | /* |
| 466 | * Flags for xfs_itruncate_start(). | 448 | * Flags for xfs_itruncate_start(). |
| @@ -515,7 +497,7 @@ int xfs_finish_reclaim_all(struct xfs_mount *, int); | |||
| 515 | */ | 497 | */ |
| 516 | int xfs_itobp(struct xfs_mount *, struct xfs_trans *, | 498 | int xfs_itobp(struct xfs_mount *, struct xfs_trans *, |
| 517 | xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **, | 499 | xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **, |
| 518 | xfs_daddr_t, uint); | 500 | xfs_daddr_t, uint, uint); |
| 519 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, | 501 | int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, |
| 520 | xfs_inode_t **, xfs_daddr_t, uint); | 502 | xfs_inode_t **, xfs_daddr_t, uint); |
| 521 | int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); | 503 | int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); |
| @@ -597,7 +579,6 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); | |||
| 597 | #define xfs_inobp_check(mp, bp) | 579 | #define xfs_inobp_check(mp, bp) |
| 598 | #endif /* DEBUG */ | 580 | #endif /* DEBUG */ |
| 599 | 581 | ||
| 600 | extern struct kmem_zone *xfs_icluster_zone; | ||
| 601 | extern struct kmem_zone *xfs_ifork_zone; | 582 | extern struct kmem_zone *xfs_ifork_zone; |
| 602 | extern struct kmem_zone *xfs_inode_zone; | 583 | extern struct kmem_zone *xfs_inode_zone; |
| 603 | extern struct kmem_zone *xfs_ili_zone; | 584 | extern struct kmem_zone *xfs_ili_zone; |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 2c775b4ae9e6..93b5db453ea2 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include "xfs_btree.h" | 40 | #include "xfs_btree.h" |
| 41 | #include "xfs_ialloc.h" | 41 | #include "xfs_ialloc.h" |
| 42 | #include "xfs_rw.h" | 42 | #include "xfs_rw.h" |
| 43 | #include "xfs_error.h" | ||
| 43 | 44 | ||
| 44 | 45 | ||
| 45 | kmem_zone_t *xfs_ili_zone; /* inode log item zone */ | 46 | kmem_zone_t *xfs_ili_zone; /* inode log item zone */ |
| @@ -813,7 +814,12 @@ xfs_inode_item_pushbuf( | |||
| 813 | XFS_LOG_FORCE); | 814 | XFS_LOG_FORCE); |
| 814 | } | 815 | } |
| 815 | if (dopush) { | 816 | if (dopush) { |
| 816 | xfs_bawrite(mp, bp); | 817 | int error; |
| 818 | error = xfs_bawrite(mp, bp); | ||
| 819 | if (error) | ||
| 820 | xfs_fs_cmn_err(CE_WARN, mp, | ||
| 821 | "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p", | ||
| 822 | error, iip, bp); | ||
| 817 | } else { | 823 | } else { |
| 818 | xfs_buf_relse(bp); | 824 | xfs_buf_relse(bp); |
| 819 | } | 825 | } |
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index bfe92ea17952..40513077ab36 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h | |||
| @@ -168,6 +168,14 @@ static inline int xfs_ilog_fext(int w) | |||
| 168 | return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT); | 168 | return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT); |
| 169 | } | 169 | } |
| 170 | 170 | ||
| 171 | static inline int xfs_inode_clean(xfs_inode_t *ip) | ||
| 172 | { | ||
| 173 | return (!ip->i_itemp || | ||
| 174 | !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && | ||
| 175 | !ip->i_update_core; | ||
| 176 | } | ||
| 177 | |||
| 178 | |||
| 171 | #ifdef __KERNEL__ | 179 | #ifdef __KERNEL__ |
| 172 | 180 | ||
| 173 | extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); | 181 | extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index fde37f87d52f..fb3cf1191419 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
| @@ -802,8 +802,11 @@ xfs_iomap_write_allocate( | |||
| 802 | */ | 802 | */ |
| 803 | nimaps = 1; | 803 | nimaps = 1; |
| 804 | end_fsb = XFS_B_TO_FSB(mp, ip->i_size); | 804 | end_fsb = XFS_B_TO_FSB(mp, ip->i_size); |
| 805 | xfs_bmap_last_offset(NULL, ip, &last_block, | 805 | error = xfs_bmap_last_offset(NULL, ip, &last_block, |
| 806 | XFS_DATA_FORK); | 806 | XFS_DATA_FORK); |
| 807 | if (error) | ||
| 808 | goto trans_cancel; | ||
| 809 | |||
| 807 | last_block = XFS_FILEOFF_MAX(last_block, end_fsb); | 810 | last_block = XFS_FILEOFF_MAX(last_block, end_fsb); |
| 808 | if ((map_start_fsb + count_fsb) > last_block) { | 811 | if ((map_start_fsb + count_fsb) > last_block) { |
| 809 | count_fsb = last_block - map_start_fsb; | 812 | count_fsb = last_block - map_start_fsb; |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f615e04364f4..eb85bdedad0c 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
| @@ -129,7 +129,7 @@ xfs_bulkstat_one_iget( | |||
| 129 | return error; | 129 | return error; |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | STATIC int | 132 | STATIC void |
| 133 | xfs_bulkstat_one_dinode( | 133 | xfs_bulkstat_one_dinode( |
| 134 | xfs_mount_t *mp, /* mount point for filesystem */ | 134 | xfs_mount_t *mp, /* mount point for filesystem */ |
| 135 | xfs_ino_t ino, /* inode number to get data for */ | 135 | xfs_ino_t ino, /* inode number to get data for */ |
| @@ -198,8 +198,6 @@ xfs_bulkstat_one_dinode( | |||
| 198 | buf->bs_blocks = be64_to_cpu(dic->di_nblocks); | 198 | buf->bs_blocks = be64_to_cpu(dic->di_nblocks); |
| 199 | break; | 199 | break; |
| 200 | } | 200 | } |
| 201 | |||
| 202 | return 0; | ||
| 203 | } | 201 | } |
| 204 | 202 | ||
| 205 | STATIC int | 203 | STATIC int |
| @@ -614,7 +612,8 @@ xfs_bulkstat( | |||
| 614 | xfs_buf_relse(bp); | 612 | xfs_buf_relse(bp); |
| 615 | error = xfs_itobp(mp, NULL, ip, | 613 | error = xfs_itobp(mp, NULL, ip, |
| 616 | &dip, &bp, bno, | 614 | &dip, &bp, bno, |
| 617 | XFS_IMAP_BULKSTAT); | 615 | XFS_IMAP_BULKSTAT, |
| 616 | XFS_BUF_LOCK); | ||
| 618 | if (!error) | 617 | if (!error) |
| 619 | clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; | 618 | clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; |
| 620 | kmem_zone_free(xfs_inode_zone, ip); | 619 | kmem_zone_free(xfs_inode_zone, ip); |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 31f2b04f2c97..afaee301b0ee 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
| @@ -41,6 +41,7 @@ | |||
| 41 | #include "xfs_inode.h" | 41 | #include "xfs_inode.h" |
| 42 | #include "xfs_rw.h" | 42 | #include "xfs_rw.h" |
| 43 | 43 | ||
| 44 | kmem_zone_t *xfs_log_ticket_zone; | ||
| 44 | 45 | ||
| 45 | #define xlog_write_adv_cnt(ptr, len, off, bytes) \ | 46 | #define xlog_write_adv_cnt(ptr, len, off, bytes) \ |
| 46 | { (ptr) += (bytes); \ | 47 | { (ptr) += (bytes); \ |
| @@ -73,8 +74,6 @@ STATIC int xlog_state_get_iclog_space(xlog_t *log, | |||
| 73 | xlog_ticket_t *ticket, | 74 | xlog_ticket_t *ticket, |
| 74 | int *continued_write, | 75 | int *continued_write, |
| 75 | int *logoffsetp); | 76 | int *logoffsetp); |
| 76 | STATIC void xlog_state_put_ticket(xlog_t *log, | ||
| 77 | xlog_ticket_t *tic); | ||
| 78 | STATIC int xlog_state_release_iclog(xlog_t *log, | 77 | STATIC int xlog_state_release_iclog(xlog_t *log, |
| 79 | xlog_in_core_t *iclog); | 78 | xlog_in_core_t *iclog); |
| 80 | STATIC void xlog_state_switch_iclogs(xlog_t *log, | 79 | STATIC void xlog_state_switch_iclogs(xlog_t *log, |
| @@ -101,7 +100,6 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, | |||
| 101 | 100 | ||
| 102 | 101 | ||
| 103 | /* local ticket functions */ | 102 | /* local ticket functions */ |
| 104 | STATIC void xlog_state_ticket_alloc(xlog_t *log); | ||
| 105 | STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, | 103 | STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, |
| 106 | int unit_bytes, | 104 | int unit_bytes, |
| 107 | int count, | 105 | int count, |
| @@ -330,7 +328,7 @@ xfs_log_done(xfs_mount_t *mp, | |||
| 330 | */ | 328 | */ |
| 331 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); | 329 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); |
| 332 | xlog_ungrant_log_space(log, ticket); | 330 | xlog_ungrant_log_space(log, ticket); |
| 333 | xlog_state_put_ticket(log, ticket); | 331 | xlog_ticket_put(log, ticket); |
| 334 | } else { | 332 | } else { |
| 335 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); | 333 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); |
| 336 | xlog_regrant_reserve_log_space(log, ticket); | 334 | xlog_regrant_reserve_log_space(log, ticket); |
| @@ -384,7 +382,27 @@ _xfs_log_force( | |||
| 384 | return xlog_state_sync_all(log, flags, log_flushed); | 382 | return xlog_state_sync_all(log, flags, log_flushed); |
| 385 | else | 383 | else |
| 386 | return xlog_state_sync(log, lsn, flags, log_flushed); | 384 | return xlog_state_sync(log, lsn, flags, log_flushed); |
| 387 | } /* xfs_log_force */ | 385 | } /* _xfs_log_force */ |
| 386 | |||
| 387 | /* | ||
| 388 | * Wrapper for _xfs_log_force(), to be used when caller doesn't care | ||
| 389 | * about errors or whether the log was flushed or not. This is the normal | ||
| 390 | * interface to use when trying to unpin items or move the log forward. | ||
| 391 | */ | ||
| 392 | void | ||
| 393 | xfs_log_force( | ||
| 394 | xfs_mount_t *mp, | ||
| 395 | xfs_lsn_t lsn, | ||
| 396 | uint flags) | ||
| 397 | { | ||
| 398 | int error; | ||
| 399 | error = _xfs_log_force(mp, lsn, flags, NULL); | ||
| 400 | if (error) { | ||
| 401 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | ||
| 402 | "error %d returned.", error); | ||
| 403 | } | ||
| 404 | } | ||
| 405 | |||
| 388 | 406 | ||
| 389 | /* | 407 | /* |
| 390 | * Attaches a new iclog I/O completion callback routine during | 408 | * Attaches a new iclog I/O completion callback routine during |
| @@ -397,12 +415,10 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ | |||
| 397 | void *iclog_hndl, /* iclog to hang callback off */ | 415 | void *iclog_hndl, /* iclog to hang callback off */ |
| 398 | xfs_log_callback_t *cb) | 416 | xfs_log_callback_t *cb) |
| 399 | { | 417 | { |
| 400 | xlog_t *log = mp->m_log; | ||
| 401 | xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; | 418 | xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; |
| 402 | int abortflg; | 419 | int abortflg; |
| 403 | 420 | ||
| 404 | cb->cb_next = NULL; | 421 | spin_lock(&iclog->ic_callback_lock); |
| 405 | spin_lock(&log->l_icloglock); | ||
| 406 | abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); | 422 | abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); |
| 407 | if (!abortflg) { | 423 | if (!abortflg) { |
| 408 | ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || | 424 | ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || |
| @@ -411,7 +427,7 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ | |||
| 411 | *(iclog->ic_callback_tail) = cb; | 427 | *(iclog->ic_callback_tail) = cb; |
| 412 | iclog->ic_callback_tail = &(cb->cb_next); | 428 | iclog->ic_callback_tail = &(cb->cb_next); |
| 413 | } | 429 | } |
| 414 | spin_unlock(&log->l_icloglock); | 430 | spin_unlock(&iclog->ic_callback_lock); |
| 415 | return abortflg; | 431 | return abortflg; |
| 416 | } /* xfs_log_notify */ | 432 | } /* xfs_log_notify */ |
| 417 | 433 | ||
| @@ -471,6 +487,8 @@ xfs_log_reserve(xfs_mount_t *mp, | |||
| 471 | /* may sleep if need to allocate more tickets */ | 487 | /* may sleep if need to allocate more tickets */ |
| 472 | internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, | 488 | internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, |
| 473 | client, flags); | 489 | client, flags); |
| 490 | if (!internal_ticket) | ||
| 491 | return XFS_ERROR(ENOMEM); | ||
| 474 | internal_ticket->t_trans_type = t_type; | 492 | internal_ticket->t_trans_type = t_type; |
| 475 | *ticket = internal_ticket; | 493 | *ticket = internal_ticket; |
| 476 | xlog_trace_loggrant(log, internal_ticket, | 494 | xlog_trace_loggrant(log, internal_ticket, |
| @@ -636,7 +654,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
| 636 | if (mp->m_flags & XFS_MOUNT_RDONLY) | 654 | if (mp->m_flags & XFS_MOUNT_RDONLY) |
| 637 | return 0; | 655 | return 0; |
| 638 | 656 | ||
| 639 | xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); | 657 | error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); |
| 658 | ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); | ||
| 640 | 659 | ||
| 641 | #ifdef DEBUG | 660 | #ifdef DEBUG |
| 642 | first_iclog = iclog = log->l_iclog; | 661 | first_iclog = iclog = log->l_iclog; |
| @@ -675,10 +694,10 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
| 675 | 694 | ||
| 676 | spin_lock(&log->l_icloglock); | 695 | spin_lock(&log->l_icloglock); |
| 677 | iclog = log->l_iclog; | 696 | iclog = log->l_iclog; |
| 678 | iclog->ic_refcnt++; | 697 | atomic_inc(&iclog->ic_refcnt); |
| 679 | spin_unlock(&log->l_icloglock); | 698 | spin_unlock(&log->l_icloglock); |
| 680 | xlog_state_want_sync(log, iclog); | 699 | xlog_state_want_sync(log, iclog); |
| 681 | (void) xlog_state_release_iclog(log, iclog); | 700 | error = xlog_state_release_iclog(log, iclog); |
| 682 | 701 | ||
| 683 | spin_lock(&log->l_icloglock); | 702 | spin_lock(&log->l_icloglock); |
| 684 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || | 703 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || |
| @@ -695,7 +714,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
| 695 | if (tic) { | 714 | if (tic) { |
| 696 | xlog_trace_loggrant(log, tic, "unmount rec"); | 715 | xlog_trace_loggrant(log, tic, "unmount rec"); |
| 697 | xlog_ungrant_log_space(log, tic); | 716 | xlog_ungrant_log_space(log, tic); |
| 698 | xlog_state_put_ticket(log, tic); | 717 | xlog_ticket_put(log, tic); |
| 699 | } | 718 | } |
| 700 | } else { | 719 | } else { |
| 701 | /* | 720 | /* |
| @@ -713,11 +732,11 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
| 713 | */ | 732 | */ |
| 714 | spin_lock(&log->l_icloglock); | 733 | spin_lock(&log->l_icloglock); |
| 715 | iclog = log->l_iclog; | 734 | iclog = log->l_iclog; |
| 716 | iclog->ic_refcnt++; | 735 | atomic_inc(&iclog->ic_refcnt); |
| 717 | spin_unlock(&log->l_icloglock); | 736 | spin_unlock(&log->l_icloglock); |
| 718 | 737 | ||
| 719 | xlog_state_want_sync(log, iclog); | 738 | xlog_state_want_sync(log, iclog); |
| 720 | (void) xlog_state_release_iclog(log, iclog); | 739 | error = xlog_state_release_iclog(log, iclog); |
| 721 | 740 | ||
| 722 | spin_lock(&log->l_icloglock); | 741 | spin_lock(&log->l_icloglock); |
| 723 | 742 | ||
| @@ -732,7 +751,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
| 732 | } | 751 | } |
| 733 | } | 752 | } |
| 734 | 753 | ||
| 735 | return 0; | 754 | return error; |
| 736 | } /* xfs_log_unmount_write */ | 755 | } /* xfs_log_unmount_write */ |
| 737 | 756 | ||
| 738 | /* | 757 | /* |
| @@ -1210,7 +1229,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
| 1210 | spin_lock_init(&log->l_icloglock); | 1229 | spin_lock_init(&log->l_icloglock); |
| 1211 | spin_lock_init(&log->l_grant_lock); | 1230 | spin_lock_init(&log->l_grant_lock); |
| 1212 | initnsema(&log->l_flushsema, 0, "ic-flush"); | 1231 | initnsema(&log->l_flushsema, 0, "ic-flush"); |
| 1213 | xlog_state_ticket_alloc(log); /* wait until after icloglock inited */ | ||
| 1214 | 1232 | ||
| 1215 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | 1233 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ |
| 1216 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | 1234 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); |
| @@ -1240,9 +1258,9 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
| 1240 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); | 1258 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); |
| 1241 | iclog->ic_bp = bp; | 1259 | iclog->ic_bp = bp; |
| 1242 | iclog->hic_data = bp->b_addr; | 1260 | iclog->hic_data = bp->b_addr; |
| 1243 | 1261 | #ifdef DEBUG | |
| 1244 | log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); | 1262 | log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); |
| 1245 | 1263 | #endif | |
| 1246 | head = &iclog->ic_header; | 1264 | head = &iclog->ic_header; |
| 1247 | memset(head, 0, sizeof(xlog_rec_header_t)); | 1265 | memset(head, 0, sizeof(xlog_rec_header_t)); |
| 1248 | head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); | 1266 | head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); |
| @@ -1253,10 +1271,11 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
| 1253 | head->h_fmt = cpu_to_be32(XLOG_FMT); | 1271 | head->h_fmt = cpu_to_be32(XLOG_FMT); |
| 1254 | memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); | 1272 | memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); |
| 1255 | 1273 | ||
| 1256 | |||
| 1257 | iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; | 1274 | iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; |
| 1258 | iclog->ic_state = XLOG_STATE_ACTIVE; | 1275 | iclog->ic_state = XLOG_STATE_ACTIVE; |
| 1259 | iclog->ic_log = log; | 1276 | iclog->ic_log = log; |
| 1277 | atomic_set(&iclog->ic_refcnt, 0); | ||
| 1278 | spin_lock_init(&iclog->ic_callback_lock); | ||
| 1260 | iclog->ic_callback_tail = &(iclog->ic_callback); | 1279 | iclog->ic_callback_tail = &(iclog->ic_callback); |
| 1261 | iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; | 1280 | iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; |
| 1262 | 1281 | ||
| @@ -1405,7 +1424,7 @@ xlog_sync(xlog_t *log, | |||
| 1405 | int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); | 1424 | int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); |
| 1406 | 1425 | ||
| 1407 | XFS_STATS_INC(xs_log_writes); | 1426 | XFS_STATS_INC(xs_log_writes); |
| 1408 | ASSERT(iclog->ic_refcnt == 0); | 1427 | ASSERT(atomic_read(&iclog->ic_refcnt) == 0); |
| 1409 | 1428 | ||
| 1410 | /* Add for LR header */ | 1429 | /* Add for LR header */ |
| 1411 | count_init = log->l_iclog_hsize + iclog->ic_offset; | 1430 | count_init = log->l_iclog_hsize + iclog->ic_offset; |
| @@ -1538,7 +1557,6 @@ STATIC void | |||
| 1538 | xlog_dealloc_log(xlog_t *log) | 1557 | xlog_dealloc_log(xlog_t *log) |
| 1539 | { | 1558 | { |
| 1540 | xlog_in_core_t *iclog, *next_iclog; | 1559 | xlog_in_core_t *iclog, *next_iclog; |
| 1541 | xlog_ticket_t *tic, *next_tic; | ||
| 1542 | int i; | 1560 | int i; |
| 1543 | 1561 | ||
| 1544 | iclog = log->l_iclog; | 1562 | iclog = log->l_iclog; |
| @@ -1559,22 +1577,6 @@ xlog_dealloc_log(xlog_t *log) | |||
| 1559 | spinlock_destroy(&log->l_icloglock); | 1577 | spinlock_destroy(&log->l_icloglock); |
| 1560 | spinlock_destroy(&log->l_grant_lock); | 1578 | spinlock_destroy(&log->l_grant_lock); |
| 1561 | 1579 | ||
| 1562 | /* XXXsup take a look at this again. */ | ||
| 1563 | if ((log->l_ticket_cnt != log->l_ticket_tcnt) && | ||
| 1564 | !XLOG_FORCED_SHUTDOWN(log)) { | ||
| 1565 | xfs_fs_cmn_err(CE_WARN, log->l_mp, | ||
| 1566 | "xlog_dealloc_log: (cnt: %d, total: %d)", | ||
| 1567 | log->l_ticket_cnt, log->l_ticket_tcnt); | ||
| 1568 | /* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */ | ||
| 1569 | |||
| 1570 | } else { | ||
| 1571 | tic = log->l_unmount_free; | ||
| 1572 | while (tic) { | ||
| 1573 | next_tic = tic->t_next; | ||
| 1574 | kmem_free(tic, PAGE_SIZE); | ||
| 1575 | tic = next_tic; | ||
| 1576 | } | ||
| 1577 | } | ||
| 1578 | xfs_buf_free(log->l_xbuf); | 1580 | xfs_buf_free(log->l_xbuf); |
| 1579 | #ifdef XFS_LOG_TRACE | 1581 | #ifdef XFS_LOG_TRACE |
| 1580 | if (log->l_trace != NULL) { | 1582 | if (log->l_trace != NULL) { |
| @@ -1987,7 +1989,7 @@ xlog_state_clean_log(xlog_t *log) | |||
| 1987 | if (iclog->ic_state == XLOG_STATE_DIRTY) { | 1989 | if (iclog->ic_state == XLOG_STATE_DIRTY) { |
| 1988 | iclog->ic_state = XLOG_STATE_ACTIVE; | 1990 | iclog->ic_state = XLOG_STATE_ACTIVE; |
| 1989 | iclog->ic_offset = 0; | 1991 | iclog->ic_offset = 0; |
| 1990 | iclog->ic_callback = NULL; /* don't need to free */ | 1992 | ASSERT(iclog->ic_callback == NULL); |
| 1991 | /* | 1993 | /* |
| 1992 | * If the number of ops in this iclog indicate it just | 1994 | * If the number of ops in this iclog indicate it just |
| 1993 | * contains the dummy transaction, we can | 1995 | * contains the dummy transaction, we can |
| @@ -2190,37 +2192,40 @@ xlog_state_do_callback( | |||
| 2190 | be64_to_cpu(iclog->ic_header.h_lsn); | 2192 | be64_to_cpu(iclog->ic_header.h_lsn); |
| 2191 | spin_unlock(&log->l_grant_lock); | 2193 | spin_unlock(&log->l_grant_lock); |
| 2192 | 2194 | ||
| 2193 | /* | ||
| 2194 | * Keep processing entries in the callback list | ||
| 2195 | * until we come around and it is empty. We | ||
| 2196 | * need to atomically see that the list is | ||
| 2197 | * empty and change the state to DIRTY so that | ||
| 2198 | * we don't miss any more callbacks being added. | ||
| 2199 | */ | ||
| 2200 | spin_lock(&log->l_icloglock); | ||
| 2201 | } else { | 2195 | } else { |
| 2196 | spin_unlock(&log->l_icloglock); | ||
| 2202 | ioerrors++; | 2197 | ioerrors++; |
| 2203 | } | 2198 | } |
| 2204 | cb = iclog->ic_callback; | ||
| 2205 | 2199 | ||
| 2200 | /* | ||
| 2201 | * Keep processing entries in the callback list until | ||
| 2202 | * we come around and it is empty. We need to | ||
| 2203 | * atomically see that the list is empty and change the | ||
| 2204 | * state to DIRTY so that we don't miss any more | ||
| 2205 | * callbacks being added. | ||
| 2206 | */ | ||
| 2207 | spin_lock(&iclog->ic_callback_lock); | ||
| 2208 | cb = iclog->ic_callback; | ||
| 2206 | while (cb) { | 2209 | while (cb) { |
| 2207 | iclog->ic_callback_tail = &(iclog->ic_callback); | 2210 | iclog->ic_callback_tail = &(iclog->ic_callback); |
| 2208 | iclog->ic_callback = NULL; | 2211 | iclog->ic_callback = NULL; |
| 2209 | spin_unlock(&log->l_icloglock); | 2212 | spin_unlock(&iclog->ic_callback_lock); |
| 2210 | 2213 | ||
| 2211 | /* perform callbacks in the order given */ | 2214 | /* perform callbacks in the order given */ |
| 2212 | for (; cb; cb = cb_next) { | 2215 | for (; cb; cb = cb_next) { |
| 2213 | cb_next = cb->cb_next; | 2216 | cb_next = cb->cb_next; |
| 2214 | cb->cb_func(cb->cb_arg, aborted); | 2217 | cb->cb_func(cb->cb_arg, aborted); |
| 2215 | } | 2218 | } |
| 2216 | spin_lock(&log->l_icloglock); | 2219 | spin_lock(&iclog->ic_callback_lock); |
| 2217 | cb = iclog->ic_callback; | 2220 | cb = iclog->ic_callback; |
| 2218 | } | 2221 | } |
| 2219 | 2222 | ||
| 2220 | loopdidcallbacks++; | 2223 | loopdidcallbacks++; |
| 2221 | funcdidcallbacks++; | 2224 | funcdidcallbacks++; |
| 2222 | 2225 | ||
| 2226 | spin_lock(&log->l_icloglock); | ||
| 2223 | ASSERT(iclog->ic_callback == NULL); | 2227 | ASSERT(iclog->ic_callback == NULL); |
| 2228 | spin_unlock(&iclog->ic_callback_lock); | ||
| 2224 | if (!(iclog->ic_state & XLOG_STATE_IOERROR)) | 2229 | if (!(iclog->ic_state & XLOG_STATE_IOERROR)) |
| 2225 | iclog->ic_state = XLOG_STATE_DIRTY; | 2230 | iclog->ic_state = XLOG_STATE_DIRTY; |
| 2226 | 2231 | ||
| @@ -2241,7 +2246,7 @@ xlog_state_do_callback( | |||
| 2241 | repeats = 0; | 2246 | repeats = 0; |
| 2242 | xfs_fs_cmn_err(CE_WARN, log->l_mp, | 2247 | xfs_fs_cmn_err(CE_WARN, log->l_mp, |
| 2243 | "%s: possible infinite loop (%d iterations)", | 2248 | "%s: possible infinite loop (%d iterations)", |
| 2244 | __FUNCTION__, flushcnt); | 2249 | __func__, flushcnt); |
| 2245 | } | 2250 | } |
| 2246 | } while (!ioerrors && loopdidcallbacks); | 2251 | } while (!ioerrors && loopdidcallbacks); |
| 2247 | 2252 | ||
| @@ -2309,7 +2314,7 @@ xlog_state_done_syncing( | |||
| 2309 | 2314 | ||
| 2310 | ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || | 2315 | ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || |
| 2311 | iclog->ic_state == XLOG_STATE_IOERROR); | 2316 | iclog->ic_state == XLOG_STATE_IOERROR); |
| 2312 | ASSERT(iclog->ic_refcnt == 0); | 2317 | ASSERT(atomic_read(&iclog->ic_refcnt) == 0); |
| 2313 | ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); | 2318 | ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); |
| 2314 | 2319 | ||
| 2315 | 2320 | ||
| @@ -2391,7 +2396,7 @@ restart: | |||
| 2391 | ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); | 2396 | ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); |
| 2392 | head = &iclog->ic_header; | 2397 | head = &iclog->ic_header; |
| 2393 | 2398 | ||
| 2394 | iclog->ic_refcnt++; /* prevents sync */ | 2399 | atomic_inc(&iclog->ic_refcnt); /* prevents sync */ |
| 2395 | log_offset = iclog->ic_offset; | 2400 | log_offset = iclog->ic_offset; |
| 2396 | 2401 | ||
| 2397 | /* On the 1st write to an iclog, figure out lsn. This works | 2402 | /* On the 1st write to an iclog, figure out lsn. This works |
| @@ -2423,12 +2428,12 @@ restart: | |||
| 2423 | xlog_state_switch_iclogs(log, iclog, iclog->ic_size); | 2428 | xlog_state_switch_iclogs(log, iclog, iclog->ic_size); |
| 2424 | 2429 | ||
| 2425 | /* If I'm the only one writing to this iclog, sync it to disk */ | 2430 | /* If I'm the only one writing to this iclog, sync it to disk */ |
| 2426 | if (iclog->ic_refcnt == 1) { | 2431 | if (atomic_read(&iclog->ic_refcnt) == 1) { |
| 2427 | spin_unlock(&log->l_icloglock); | 2432 | spin_unlock(&log->l_icloglock); |
| 2428 | if ((error = xlog_state_release_iclog(log, iclog))) | 2433 | if ((error = xlog_state_release_iclog(log, iclog))) |
| 2429 | return error; | 2434 | return error; |
| 2430 | } else { | 2435 | } else { |
| 2431 | iclog->ic_refcnt--; | 2436 | atomic_dec(&iclog->ic_refcnt); |
| 2432 | spin_unlock(&log->l_icloglock); | 2437 | spin_unlock(&log->l_icloglock); |
| 2433 | } | 2438 | } |
| 2434 | goto restart; | 2439 | goto restart; |
| @@ -2792,18 +2797,6 @@ xlog_ungrant_log_space(xlog_t *log, | |||
| 2792 | 2797 | ||
| 2793 | 2798 | ||
| 2794 | /* | 2799 | /* |
| 2795 | * Atomically put back used ticket. | ||
| 2796 | */ | ||
| 2797 | STATIC void | ||
| 2798 | xlog_state_put_ticket(xlog_t *log, | ||
| 2799 | xlog_ticket_t *tic) | ||
| 2800 | { | ||
| 2801 | spin_lock(&log->l_icloglock); | ||
| 2802 | xlog_ticket_put(log, tic); | ||
| 2803 | spin_unlock(&log->l_icloglock); | ||
| 2804 | } /* xlog_state_put_ticket */ | ||
| 2805 | |||
| 2806 | /* | ||
| 2807 | * Flush iclog to disk if this is the last reference to the given iclog and | 2800 | * Flush iclog to disk if this is the last reference to the given iclog and |
| 2808 | * the WANT_SYNC bit is set. | 2801 | * the WANT_SYNC bit is set. |
| 2809 | * | 2802 | * |
| @@ -2813,33 +2806,35 @@ xlog_state_put_ticket(xlog_t *log, | |||
| 2813 | * | 2806 | * |
| 2814 | */ | 2807 | */ |
| 2815 | STATIC int | 2808 | STATIC int |
| 2816 | xlog_state_release_iclog(xlog_t *log, | 2809 | xlog_state_release_iclog( |
| 2817 | xlog_in_core_t *iclog) | 2810 | xlog_t *log, |
| 2811 | xlog_in_core_t *iclog) | ||
| 2818 | { | 2812 | { |
| 2819 | int sync = 0; /* do we sync? */ | 2813 | int sync = 0; /* do we sync? */ |
| 2820 | 2814 | ||
| 2821 | xlog_assign_tail_lsn(log->l_mp); | 2815 | if (iclog->ic_state & XLOG_STATE_IOERROR) |
| 2816 | return XFS_ERROR(EIO); | ||
| 2822 | 2817 | ||
| 2823 | spin_lock(&log->l_icloglock); | 2818 | ASSERT(atomic_read(&iclog->ic_refcnt) > 0); |
| 2819 | if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) | ||
| 2820 | return 0; | ||
| 2824 | 2821 | ||
| 2825 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | 2822 | if (iclog->ic_state & XLOG_STATE_IOERROR) { |
| 2826 | spin_unlock(&log->l_icloglock); | 2823 | spin_unlock(&log->l_icloglock); |
| 2827 | return XFS_ERROR(EIO); | 2824 | return XFS_ERROR(EIO); |
| 2828 | } | 2825 | } |
| 2829 | |||
| 2830 | ASSERT(iclog->ic_refcnt > 0); | ||
| 2831 | ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || | 2826 | ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || |
| 2832 | iclog->ic_state == XLOG_STATE_WANT_SYNC); | 2827 | iclog->ic_state == XLOG_STATE_WANT_SYNC); |
| 2833 | 2828 | ||
| 2834 | if (--iclog->ic_refcnt == 0 && | 2829 | if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { |
| 2835 | iclog->ic_state == XLOG_STATE_WANT_SYNC) { | 2830 | /* update tail before writing to iclog */ |
| 2831 | xlog_assign_tail_lsn(log->l_mp); | ||
| 2836 | sync++; | 2832 | sync++; |
| 2837 | iclog->ic_state = XLOG_STATE_SYNCING; | 2833 | iclog->ic_state = XLOG_STATE_SYNCING; |
| 2838 | iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); | 2834 | iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); |
| 2839 | xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); | 2835 | xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); |
| 2840 | /* cycle incremented when incrementing curr_block */ | 2836 | /* cycle incremented when incrementing curr_block */ |
| 2841 | } | 2837 | } |
| 2842 | |||
| 2843 | spin_unlock(&log->l_icloglock); | 2838 | spin_unlock(&log->l_icloglock); |
| 2844 | 2839 | ||
| 2845 | /* | 2840 | /* |
| @@ -2849,11 +2844,9 @@ xlog_state_release_iclog(xlog_t *log, | |||
| 2849 | * this iclog has consistent data, so we ignore IOERROR | 2844 | * this iclog has consistent data, so we ignore IOERROR |
| 2850 | * flags after this point. | 2845 | * flags after this point. |
| 2851 | */ | 2846 | */ |
| 2852 | if (sync) { | 2847 | if (sync) |
| 2853 | return xlog_sync(log, iclog); | 2848 | return xlog_sync(log, iclog); |
| 2854 | } | ||
| 2855 | return 0; | 2849 | return 0; |
| 2856 | |||
| 2857 | } /* xlog_state_release_iclog */ | 2850 | } /* xlog_state_release_iclog */ |
| 2858 | 2851 | ||
| 2859 | 2852 | ||
| @@ -2953,7 +2946,8 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) | |||
| 2953 | * previous iclog and go to sleep. | 2946 | * previous iclog and go to sleep. |
| 2954 | */ | 2947 | */ |
| 2955 | if (iclog->ic_state == XLOG_STATE_DIRTY || | 2948 | if (iclog->ic_state == XLOG_STATE_DIRTY || |
| 2956 | (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) { | 2949 | (atomic_read(&iclog->ic_refcnt) == 0 |
| 2950 | && iclog->ic_offset == 0)) { | ||
| 2957 | iclog = iclog->ic_prev; | 2951 | iclog = iclog->ic_prev; |
| 2958 | if (iclog->ic_state == XLOG_STATE_ACTIVE || | 2952 | if (iclog->ic_state == XLOG_STATE_ACTIVE || |
| 2959 | iclog->ic_state == XLOG_STATE_DIRTY) | 2953 | iclog->ic_state == XLOG_STATE_DIRTY) |
| @@ -2961,14 +2955,14 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) | |||
| 2961 | else | 2955 | else |
| 2962 | goto maybe_sleep; | 2956 | goto maybe_sleep; |
| 2963 | } else { | 2957 | } else { |
| 2964 | if (iclog->ic_refcnt == 0) { | 2958 | if (atomic_read(&iclog->ic_refcnt) == 0) { |
| 2965 | /* We are the only one with access to this | 2959 | /* We are the only one with access to this |
| 2966 | * iclog. Flush it out now. There should | 2960 | * iclog. Flush it out now. There should |
| 2967 | * be a roundoff of zero to show that someone | 2961 | * be a roundoff of zero to show that someone |
| 2968 | * has already taken care of the roundoff from | 2962 | * has already taken care of the roundoff from |
| 2969 | * the previous sync. | 2963 | * the previous sync. |
| 2970 | */ | 2964 | */ |
| 2971 | iclog->ic_refcnt++; | 2965 | atomic_inc(&iclog->ic_refcnt); |
| 2972 | lsn = be64_to_cpu(iclog->ic_header.h_lsn); | 2966 | lsn = be64_to_cpu(iclog->ic_header.h_lsn); |
| 2973 | xlog_state_switch_iclogs(log, iclog, 0); | 2967 | xlog_state_switch_iclogs(log, iclog, 0); |
| 2974 | spin_unlock(&log->l_icloglock); | 2968 | spin_unlock(&log->l_icloglock); |
| @@ -3100,7 +3094,7 @@ try_again: | |||
| 3100 | already_slept = 1; | 3094 | already_slept = 1; |
| 3101 | goto try_again; | 3095 | goto try_again; |
| 3102 | } else { | 3096 | } else { |
| 3103 | iclog->ic_refcnt++; | 3097 | atomic_inc(&iclog->ic_refcnt); |
| 3104 | xlog_state_switch_iclogs(log, iclog, 0); | 3098 | xlog_state_switch_iclogs(log, iclog, 0); |
| 3105 | spin_unlock(&log->l_icloglock); | 3099 | spin_unlock(&log->l_icloglock); |
| 3106 | if (xlog_state_release_iclog(log, iclog)) | 3100 | if (xlog_state_release_iclog(log, iclog)) |
| @@ -3172,92 +3166,19 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) | |||
| 3172 | */ | 3166 | */ |
| 3173 | 3167 | ||
| 3174 | /* | 3168 | /* |
| 3175 | * Algorithm doesn't take into account page size. ;-( | 3169 | * Free a used ticket. |
| 3176 | */ | ||
| 3177 | STATIC void | ||
| 3178 | xlog_state_ticket_alloc(xlog_t *log) | ||
| 3179 | { | ||
| 3180 | xlog_ticket_t *t_list; | ||
| 3181 | xlog_ticket_t *next; | ||
| 3182 | xfs_caddr_t buf; | ||
| 3183 | uint i = (PAGE_SIZE / sizeof(xlog_ticket_t)) - 2; | ||
| 3184 | |||
| 3185 | /* | ||
| 3186 | * The kmem_zalloc may sleep, so we shouldn't be holding the | ||
| 3187 | * global lock. XXXmiken: may want to use zone allocator. | ||
| 3188 | */ | ||
| 3189 | buf = (xfs_caddr_t) kmem_zalloc(PAGE_SIZE, KM_SLEEP); | ||
| 3190 | |||
| 3191 | spin_lock(&log->l_icloglock); | ||
| 3192 | |||
| 3193 | /* Attach 1st ticket to Q, so we can keep track of allocated memory */ | ||
| 3194 | t_list = (xlog_ticket_t *)buf; | ||
| 3195 | t_list->t_next = log->l_unmount_free; | ||
| 3196 | log->l_unmount_free = t_list++; | ||
| 3197 | log->l_ticket_cnt++; | ||
| 3198 | log->l_ticket_tcnt++; | ||
| 3199 | |||
| 3200 | /* Next ticket becomes first ticket attached to ticket free list */ | ||
| 3201 | if (log->l_freelist != NULL) { | ||
| 3202 | ASSERT(log->l_tail != NULL); | ||
| 3203 | log->l_tail->t_next = t_list; | ||
| 3204 | } else { | ||
| 3205 | log->l_freelist = t_list; | ||
| 3206 | } | ||
| 3207 | log->l_ticket_cnt++; | ||
| 3208 | log->l_ticket_tcnt++; | ||
| 3209 | |||
| 3210 | /* Cycle through rest of alloc'ed memory, building up free Q */ | ||
| 3211 | for ( ; i > 0; i--) { | ||
| 3212 | next = t_list + 1; | ||
| 3213 | t_list->t_next = next; | ||
| 3214 | t_list = next; | ||
| 3215 | log->l_ticket_cnt++; | ||
| 3216 | log->l_ticket_tcnt++; | ||
| 3217 | } | ||
| 3218 | t_list->t_next = NULL; | ||
| 3219 | log->l_tail = t_list; | ||
| 3220 | spin_unlock(&log->l_icloglock); | ||
| 3221 | } /* xlog_state_ticket_alloc */ | ||
| 3222 | |||
| 3223 | |||
| 3224 | /* | ||
| 3225 | * Put ticket into free list | ||
| 3226 | * | ||
| 3227 | * Assumption: log lock is held around this call. | ||
| 3228 | */ | 3170 | */ |
| 3229 | STATIC void | 3171 | STATIC void |
| 3230 | xlog_ticket_put(xlog_t *log, | 3172 | xlog_ticket_put(xlog_t *log, |
| 3231 | xlog_ticket_t *ticket) | 3173 | xlog_ticket_t *ticket) |
| 3232 | { | 3174 | { |
| 3233 | sv_destroy(&ticket->t_sema); | 3175 | sv_destroy(&ticket->t_sema); |
| 3234 | 3176 | kmem_zone_free(xfs_log_ticket_zone, ticket); | |
| 3235 | /* | ||
| 3236 | * Don't think caching will make that much difference. It's | ||
| 3237 | * more important to make debug easier. | ||
| 3238 | */ | ||
| 3239 | #if 0 | ||
| 3240 | /* real code will want to use LIFO for caching */ | ||
| 3241 | ticket->t_next = log->l_freelist; | ||
| 3242 | log->l_freelist = ticket; | ||
| 3243 | /* no need to clear fields */ | ||
| 3244 | #else | ||
| 3245 | /* When we debug, it is easier if tickets are cycled */ | ||
| 3246 | ticket->t_next = NULL; | ||
| 3247 | if (log->l_tail) { | ||
| 3248 | log->l_tail->t_next = ticket; | ||
| 3249 | } else { | ||
| 3250 | ASSERT(log->l_freelist == NULL); | ||
| 3251 | log->l_freelist = ticket; | ||
| 3252 | } | ||
| 3253 | log->l_tail = ticket; | ||
| 3254 | #endif /* DEBUG */ | ||
| 3255 | log->l_ticket_cnt++; | ||
| 3256 | } /* xlog_ticket_put */ | 3177 | } /* xlog_ticket_put */ |
| 3257 | 3178 | ||
| 3258 | 3179 | ||
| 3259 | /* | 3180 | /* |
| 3260 | * Grab ticket off freelist or allocation some more | 3181 | * Allocate and initialise a new log ticket. |
| 3261 | */ | 3182 | */ |
| 3262 | STATIC xlog_ticket_t * | 3183 | STATIC xlog_ticket_t * |
| 3263 | xlog_ticket_get(xlog_t *log, | 3184 | xlog_ticket_get(xlog_t *log, |
| @@ -3269,21 +3190,9 @@ xlog_ticket_get(xlog_t *log, | |||
| 3269 | xlog_ticket_t *tic; | 3190 | xlog_ticket_t *tic; |
| 3270 | uint num_headers; | 3191 | uint num_headers; |
| 3271 | 3192 | ||
| 3272 | alloc: | 3193 | tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL); |
| 3273 | if (log->l_freelist == NULL) | 3194 | if (!tic) |
| 3274 | xlog_state_ticket_alloc(log); /* potentially sleep */ | 3195 | return NULL; |
| 3275 | |||
| 3276 | spin_lock(&log->l_icloglock); | ||
| 3277 | if (log->l_freelist == NULL) { | ||
| 3278 | spin_unlock(&log->l_icloglock); | ||
| 3279 | goto alloc; | ||
| 3280 | } | ||
| 3281 | tic = log->l_freelist; | ||
| 3282 | log->l_freelist = tic->t_next; | ||
| 3283 | if (log->l_freelist == NULL) | ||
| 3284 | log->l_tail = NULL; | ||
| 3285 | log->l_ticket_cnt--; | ||
| 3286 | spin_unlock(&log->l_icloglock); | ||
| 3287 | 3196 | ||
| 3288 | /* | 3197 | /* |
| 3289 | * Permanent reservations have up to 'cnt'-1 active log operations | 3198 | * Permanent reservations have up to 'cnt'-1 active log operations |
| @@ -3611,8 +3520,8 @@ xfs_log_force_umount( | |||
| 3611 | * before we mark the filesystem SHUTDOWN and wake | 3520 | * before we mark the filesystem SHUTDOWN and wake |
| 3612 | * everybody up to tell the bad news. | 3521 | * everybody up to tell the bad news. |
| 3613 | */ | 3522 | */ |
| 3614 | spin_lock(&log->l_grant_lock); | ||
| 3615 | spin_lock(&log->l_icloglock); | 3523 | spin_lock(&log->l_icloglock); |
| 3524 | spin_lock(&log->l_grant_lock); | ||
| 3616 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; | 3525 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; |
| 3617 | XFS_BUF_DONE(mp->m_sb_bp); | 3526 | XFS_BUF_DONE(mp->m_sb_bp); |
| 3618 | /* | 3527 | /* |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 4cdac048df5e..d1d678ecb63e 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
| @@ -142,8 +142,9 @@ int _xfs_log_force(struct xfs_mount *mp, | |||
| 142 | xfs_lsn_t lsn, | 142 | xfs_lsn_t lsn, |
| 143 | uint flags, | 143 | uint flags, |
| 144 | int *log_forced); | 144 | int *log_forced); |
| 145 | #define xfs_log_force(mp, lsn, flags) \ | 145 | void xfs_log_force(struct xfs_mount *mp, |
| 146 | _xfs_log_force(mp, lsn, flags, NULL); | 146 | xfs_lsn_t lsn, |
| 147 | uint flags); | ||
| 147 | int xfs_log_mount(struct xfs_mount *mp, | 148 | int xfs_log_mount(struct xfs_mount *mp, |
| 148 | struct xfs_buftarg *log_target, | 149 | struct xfs_buftarg *log_target, |
| 149 | xfs_daddr_t start_block, | 150 | xfs_daddr_t start_block, |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index c6244cc733c0..8952a392b5f3 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
| @@ -242,7 +242,7 @@ typedef struct xlog_res { | |||
| 242 | 242 | ||
| 243 | typedef struct xlog_ticket { | 243 | typedef struct xlog_ticket { |
| 244 | sv_t t_sema; /* sleep on this semaphore : 20 */ | 244 | sv_t t_sema; /* sleep on this semaphore : 20 */ |
| 245 | struct xlog_ticket *t_next; /* :4|8 */ | 245 | struct xlog_ticket *t_next; /* :4|8 */ |
| 246 | struct xlog_ticket *t_prev; /* :4|8 */ | 246 | struct xlog_ticket *t_prev; /* :4|8 */ |
| 247 | xlog_tid_t t_tid; /* transaction identifier : 4 */ | 247 | xlog_tid_t t_tid; /* transaction identifier : 4 */ |
| 248 | int t_curr_res; /* current reservation in bytes : 4 */ | 248 | int t_curr_res; /* current reservation in bytes : 4 */ |
| @@ -324,6 +324,19 @@ typedef struct xlog_rec_ext_header { | |||
| 324 | * - ic_offset is the current number of bytes written to in this iclog. | 324 | * - ic_offset is the current number of bytes written to in this iclog. |
| 325 | * - ic_refcnt is bumped when someone is writing to the log. | 325 | * - ic_refcnt is bumped when someone is writing to the log. |
| 326 | * - ic_state is the state of the iclog. | 326 | * - ic_state is the state of the iclog. |
| 327 | * | ||
| 328 | * Because of cacheline contention on large machines, we need to separate | ||
| 329 | * various resources onto different cachelines. To start with, make the | ||
| 330 | * structure cacheline aligned. The following fields can be contended on | ||
| 331 | * by independent processes: | ||
| 332 | * | ||
| 333 | * - ic_callback_* | ||
| 334 | * - ic_refcnt | ||
| 335 | * - fields protected by the global l_icloglock | ||
| 336 | * | ||
| 337 | * so we need to ensure that these fields are located in separate cachelines. | ||
| 338 | * We'll put all the read-only and l_icloglock fields in the first cacheline, | ||
| 339 | * and move everything else out to subsequent cachelines. | ||
| 327 | */ | 340 | */ |
| 328 | typedef struct xlog_iclog_fields { | 341 | typedef struct xlog_iclog_fields { |
| 329 | sv_t ic_forcesema; | 342 | sv_t ic_forcesema; |
| @@ -332,17 +345,22 @@ typedef struct xlog_iclog_fields { | |||
| 332 | struct xlog_in_core *ic_prev; | 345 | struct xlog_in_core *ic_prev; |
| 333 | struct xfs_buf *ic_bp; | 346 | struct xfs_buf *ic_bp; |
| 334 | struct log *ic_log; | 347 | struct log *ic_log; |
| 335 | xfs_log_callback_t *ic_callback; | ||
| 336 | xfs_log_callback_t **ic_callback_tail; | ||
| 337 | #ifdef XFS_LOG_TRACE | ||
| 338 | struct ktrace *ic_trace; | ||
| 339 | #endif | ||
| 340 | int ic_size; | 348 | int ic_size; |
| 341 | int ic_offset; | 349 | int ic_offset; |
| 342 | int ic_refcnt; | ||
| 343 | int ic_bwritecnt; | 350 | int ic_bwritecnt; |
| 344 | ushort_t ic_state; | 351 | ushort_t ic_state; |
| 345 | char *ic_datap; /* pointer to iclog data */ | 352 | char *ic_datap; /* pointer to iclog data */ |
| 353 | #ifdef XFS_LOG_TRACE | ||
| 354 | struct ktrace *ic_trace; | ||
| 355 | #endif | ||
| 356 | |||
| 357 | /* Callback structures need their own cacheline */ | ||
| 358 | spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; | ||
| 359 | xfs_log_callback_t *ic_callback; | ||
| 360 | xfs_log_callback_t **ic_callback_tail; | ||
| 361 | |||
| 362 | /* reference counts need their own cacheline */ | ||
| 363 | atomic_t ic_refcnt ____cacheline_aligned_in_smp; | ||
| 346 | } xlog_iclog_fields_t; | 364 | } xlog_iclog_fields_t; |
| 347 | 365 | ||
| 348 | typedef union xlog_in_core2 { | 366 | typedef union xlog_in_core2 { |
| @@ -366,6 +384,7 @@ typedef struct xlog_in_core { | |||
| 366 | #define ic_bp hic_fields.ic_bp | 384 | #define ic_bp hic_fields.ic_bp |
| 367 | #define ic_log hic_fields.ic_log | 385 | #define ic_log hic_fields.ic_log |
| 368 | #define ic_callback hic_fields.ic_callback | 386 | #define ic_callback hic_fields.ic_callback |
| 387 | #define ic_callback_lock hic_fields.ic_callback_lock | ||
| 369 | #define ic_callback_tail hic_fields.ic_callback_tail | 388 | #define ic_callback_tail hic_fields.ic_callback_tail |
| 370 | #define ic_trace hic_fields.ic_trace | 389 | #define ic_trace hic_fields.ic_trace |
| 371 | #define ic_size hic_fields.ic_size | 390 | #define ic_size hic_fields.ic_size |
| @@ -383,43 +402,46 @@ typedef struct xlog_in_core { | |||
| 383 | * that round off problems won't occur when releasing partial reservations. | 402 | * that round off problems won't occur when releasing partial reservations. |
| 384 | */ | 403 | */ |
| 385 | typedef struct log { | 404 | typedef struct log { |
| 405 | /* The following fields don't need locking */ | ||
| 406 | struct xfs_mount *l_mp; /* mount point */ | ||
| 407 | struct xfs_buf *l_xbuf; /* extra buffer for log | ||
| 408 | * wrapping */ | ||
| 409 | struct xfs_buftarg *l_targ; /* buftarg of log */ | ||
| 410 | uint l_flags; | ||
| 411 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ | ||
| 412 | struct xfs_buf_cancel **l_buf_cancel_table; | ||
| 413 | int l_iclog_hsize; /* size of iclog header */ | ||
| 414 | int l_iclog_heads; /* # of iclog header sectors */ | ||
| 415 | uint l_sectbb_log; /* log2 of sector size in BBs */ | ||
| 416 | uint l_sectbb_mask; /* sector size (in BBs) | ||
| 417 | * alignment mask */ | ||
| 418 | int l_iclog_size; /* size of log in bytes */ | ||
| 419 | int l_iclog_size_log; /* log power size of log */ | ||
| 420 | int l_iclog_bufs; /* number of iclog buffers */ | ||
| 421 | xfs_daddr_t l_logBBstart; /* start block of log */ | ||
| 422 | int l_logsize; /* size of log in bytes */ | ||
| 423 | int l_logBBsize; /* size of log in BB chunks */ | ||
| 424 | |||
| 386 | /* The following block of fields are changed while holding icloglock */ | 425 | /* The following block of fields are changed while holding icloglock */ |
| 387 | sema_t l_flushsema; /* iclog flushing semaphore */ | 426 | sema_t l_flushsema ____cacheline_aligned_in_smp; |
| 427 | /* iclog flushing semaphore */ | ||
| 388 | int l_flushcnt; /* # of procs waiting on this | 428 | int l_flushcnt; /* # of procs waiting on this |
| 389 | * sema */ | 429 | * sema */ |
| 390 | int l_ticket_cnt; /* free ticket count */ | ||
| 391 | int l_ticket_tcnt; /* total ticket count */ | ||
| 392 | int l_covered_state;/* state of "covering disk | 430 | int l_covered_state;/* state of "covering disk |
| 393 | * log entries" */ | 431 | * log entries" */ |
| 394 | xlog_ticket_t *l_freelist; /* free list of tickets */ | ||
| 395 | xlog_ticket_t *l_unmount_free;/* kmem_free these addresses */ | ||
| 396 | xlog_ticket_t *l_tail; /* free list of tickets */ | ||
| 397 | xlog_in_core_t *l_iclog; /* head log queue */ | 432 | xlog_in_core_t *l_iclog; /* head log queue */ |
| 398 | spinlock_t l_icloglock; /* grab to change iclog state */ | 433 | spinlock_t l_icloglock; /* grab to change iclog state */ |
| 399 | xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed | 434 | xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed |
| 400 | * buffers */ | 435 | * buffers */ |
| 401 | xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ | 436 | xfs_lsn_t l_last_sync_lsn;/* lsn of last LR on disk */ |
| 402 | struct xfs_mount *l_mp; /* mount point */ | ||
| 403 | struct xfs_buf *l_xbuf; /* extra buffer for log | ||
| 404 | * wrapping */ | ||
| 405 | struct xfs_buftarg *l_targ; /* buftarg of log */ | ||
| 406 | xfs_daddr_t l_logBBstart; /* start block of log */ | ||
| 407 | int l_logsize; /* size of log in bytes */ | ||
| 408 | int l_logBBsize; /* size of log in BB chunks */ | ||
| 409 | int l_curr_cycle; /* Cycle number of log writes */ | 437 | int l_curr_cycle; /* Cycle number of log writes */ |
| 410 | int l_prev_cycle; /* Cycle number before last | 438 | int l_prev_cycle; /* Cycle number before last |
| 411 | * block increment */ | 439 | * block increment */ |
| 412 | int l_curr_block; /* current logical log block */ | 440 | int l_curr_block; /* current logical log block */ |
| 413 | int l_prev_block; /* previous logical log block */ | 441 | int l_prev_block; /* previous logical log block */ |
| 414 | int l_iclog_size; /* size of log in bytes */ | ||
| 415 | int l_iclog_size_log; /* log power size of log */ | ||
| 416 | int l_iclog_bufs; /* number of iclog buffers */ | ||
| 417 | |||
| 418 | /* The following field are used for debugging; need to hold icloglock */ | ||
| 419 | char *l_iclog_bak[XLOG_MAX_ICLOGS]; | ||
| 420 | 442 | ||
| 421 | /* The following block of fields are changed while holding grant_lock */ | 443 | /* The following block of fields are changed while holding grant_lock */ |
| 422 | spinlock_t l_grant_lock; | 444 | spinlock_t l_grant_lock ____cacheline_aligned_in_smp; |
| 423 | xlog_ticket_t *l_reserve_headq; | 445 | xlog_ticket_t *l_reserve_headq; |
| 424 | xlog_ticket_t *l_write_headq; | 446 | xlog_ticket_t *l_write_headq; |
| 425 | int l_grant_reserve_cycle; | 447 | int l_grant_reserve_cycle; |
| @@ -427,19 +449,16 @@ typedef struct log { | |||
| 427 | int l_grant_write_cycle; | 449 | int l_grant_write_cycle; |
| 428 | int l_grant_write_bytes; | 450 | int l_grant_write_bytes; |
| 429 | 451 | ||
| 430 | /* The following fields don't need locking */ | ||
| 431 | #ifdef XFS_LOG_TRACE | 452 | #ifdef XFS_LOG_TRACE |
| 432 | struct ktrace *l_trace; | 453 | struct ktrace *l_trace; |
| 433 | struct ktrace *l_grant_trace; | 454 | struct ktrace *l_grant_trace; |
| 434 | #endif | 455 | #endif |
| 435 | uint l_flags; | 456 | |
| 436 | uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ | 457 | /* The following field are used for debugging; need to hold icloglock */ |
| 437 | struct xfs_buf_cancel **l_buf_cancel_table; | 458 | #ifdef DEBUG |
| 438 | int l_iclog_hsize; /* size of iclog header */ | 459 | char *l_iclog_bak[XLOG_MAX_ICLOGS]; |
| 439 | int l_iclog_heads; /* # of iclog header sectors */ | 460 | #endif |
| 440 | uint l_sectbb_log; /* log2 of sector size in BBs */ | 461 | |
| 441 | uint l_sectbb_mask; /* sector size (in BBs) | ||
| 442 | * alignment mask */ | ||
| 443 | } xlog_t; | 462 | } xlog_t; |
| 444 | 463 | ||
| 445 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) | 464 | #define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR) |
| @@ -459,6 +478,8 @@ extern struct xfs_buf *xlog_get_bp(xlog_t *, int); | |||
| 459 | extern void xlog_put_bp(struct xfs_buf *); | 478 | extern void xlog_put_bp(struct xfs_buf *); |
| 460 | extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); | 479 | extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); |
| 461 | 480 | ||
| 481 | extern kmem_zone_t *xfs_log_ticket_zone; | ||
| 482 | |||
| 462 | /* iclog tracing */ | 483 | /* iclog tracing */ |
| 463 | #define XLOG_TRACE_GRAB_FLUSH 1 | 484 | #define XLOG_TRACE_GRAB_FLUSH 1 |
| 464 | #define XLOG_TRACE_REL_FLUSH 2 | 485 | #define XLOG_TRACE_REL_FLUSH 2 |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b2b70eba282c..e65ab4af0955 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
| @@ -46,6 +46,7 @@ | |||
| 46 | #include "xfs_trans_priv.h" | 46 | #include "xfs_trans_priv.h" |
| 47 | #include "xfs_quota.h" | 47 | #include "xfs_quota.h" |
| 48 | #include "xfs_rw.h" | 48 | #include "xfs_rw.h" |
| 49 | #include "xfs_utils.h" | ||
| 49 | 50 | ||
| 50 | STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); | 51 | STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *); |
| 51 | STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); | 52 | STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t); |
| @@ -120,7 +121,8 @@ xlog_bread( | |||
| 120 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); | 121 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); |
| 121 | 122 | ||
| 122 | xfsbdstrat(log->l_mp, bp); | 123 | xfsbdstrat(log->l_mp, bp); |
| 123 | if ((error = xfs_iowait(bp))) | 124 | error = xfs_iowait(bp); |
| 125 | if (error) | ||
| 124 | xfs_ioerror_alert("xlog_bread", log->l_mp, | 126 | xfs_ioerror_alert("xlog_bread", log->l_mp, |
| 125 | bp, XFS_BUF_ADDR(bp)); | 127 | bp, XFS_BUF_ADDR(bp)); |
| 126 | return error; | 128 | return error; |
| @@ -191,7 +193,7 @@ xlog_header_check_dump( | |||
| 191 | { | 193 | { |
| 192 | int b; | 194 | int b; |
| 193 | 195 | ||
| 194 | cmn_err(CE_DEBUG, "%s: SB : uuid = ", __FUNCTION__); | 196 | cmn_err(CE_DEBUG, "%s: SB : uuid = ", __func__); |
| 195 | for (b = 0; b < 16; b++) | 197 | for (b = 0; b < 16; b++) |
| 196 | cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); | 198 | cmn_err(CE_DEBUG, "%02x", ((uchar_t *)&mp->m_sb.sb_uuid)[b]); |
| 197 | cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); | 199 | cmn_err(CE_DEBUG, ", fmt = %d\n", XLOG_FMT); |
| @@ -1160,10 +1162,14 @@ xlog_write_log_records( | |||
| 1160 | if (j == 0 && (start_block + endcount > ealign)) { | 1162 | if (j == 0 && (start_block + endcount > ealign)) { |
| 1161 | offset = XFS_BUF_PTR(bp); | 1163 | offset = XFS_BUF_PTR(bp); |
| 1162 | balign = BBTOB(ealign - start_block); | 1164 | balign = BBTOB(ealign - start_block); |
| 1163 | XFS_BUF_SET_PTR(bp, offset + balign, BBTOB(sectbb)); | 1165 | error = XFS_BUF_SET_PTR(bp, offset + balign, |
| 1164 | if ((error = xlog_bread(log, ealign, sectbb, bp))) | 1166 | BBTOB(sectbb)); |
| 1167 | if (!error) | ||
| 1168 | error = xlog_bread(log, ealign, sectbb, bp); | ||
| 1169 | if (!error) | ||
| 1170 | error = XFS_BUF_SET_PTR(bp, offset, bufblks); | ||
| 1171 | if (error) | ||
| 1165 | break; | 1172 | break; |
| 1166 | XFS_BUF_SET_PTR(bp, offset, bufblks); | ||
| 1167 | } | 1173 | } |
| 1168 | 1174 | ||
| 1169 | offset = xlog_align(log, start_block, endcount, bp); | 1175 | offset = xlog_align(log, start_block, endcount, bp); |
| @@ -2280,7 +2286,9 @@ xlog_recover_do_inode_trans( | |||
| 2280 | * invalidate the buffer when we write it out below. | 2286 | * invalidate the buffer when we write it out below. |
| 2281 | */ | 2287 | */ |
| 2282 | imap.im_blkno = 0; | 2288 | imap.im_blkno = 0; |
| 2283 | xfs_imap(log->l_mp, NULL, ino, &imap, 0); | 2289 | error = xfs_imap(log->l_mp, NULL, ino, &imap, 0); |
| 2290 | if (error) | ||
| 2291 | goto error; | ||
| 2284 | } | 2292 | } |
| 2285 | 2293 | ||
| 2286 | /* | 2294 | /* |
| @@ -2964,7 +2972,7 @@ xlog_recover_process_data( | |||
| 2964 | * Process an extent free intent item that was recovered from | 2972 | * Process an extent free intent item that was recovered from |
| 2965 | * the log. We need to free the extents that it describes. | 2973 | * the log. We need to free the extents that it describes. |
| 2966 | */ | 2974 | */ |
| 2967 | STATIC void | 2975 | STATIC int |
| 2968 | xlog_recover_process_efi( | 2976 | xlog_recover_process_efi( |
| 2969 | xfs_mount_t *mp, | 2977 | xfs_mount_t *mp, |
| 2970 | xfs_efi_log_item_t *efip) | 2978 | xfs_efi_log_item_t *efip) |
| @@ -2972,6 +2980,7 @@ xlog_recover_process_efi( | |||
| 2972 | xfs_efd_log_item_t *efdp; | 2980 | xfs_efd_log_item_t *efdp; |
| 2973 | xfs_trans_t *tp; | 2981 | xfs_trans_t *tp; |
| 2974 | int i; | 2982 | int i; |
| 2983 | int error = 0; | ||
| 2975 | xfs_extent_t *extp; | 2984 | xfs_extent_t *extp; |
| 2976 | xfs_fsblock_t startblock_fsb; | 2985 | xfs_fsblock_t startblock_fsb; |
| 2977 | 2986 | ||
| @@ -2995,23 +3004,32 @@ xlog_recover_process_efi( | |||
| 2995 | * free the memory associated with it. | 3004 | * free the memory associated with it. |
| 2996 | */ | 3005 | */ |
| 2997 | xfs_efi_release(efip, efip->efi_format.efi_nextents); | 3006 | xfs_efi_release(efip, efip->efi_format.efi_nextents); |
| 2998 | return; | 3007 | return XFS_ERROR(EIO); |
| 2999 | } | 3008 | } |
| 3000 | } | 3009 | } |
| 3001 | 3010 | ||
| 3002 | tp = xfs_trans_alloc(mp, 0); | 3011 | tp = xfs_trans_alloc(mp, 0); |
| 3003 | xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); | 3012 | error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); |
| 3013 | if (error) | ||
| 3014 | goto abort_error; | ||
| 3004 | efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); | 3015 | efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); |
| 3005 | 3016 | ||
| 3006 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { | 3017 | for (i = 0; i < efip->efi_format.efi_nextents; i++) { |
| 3007 | extp = &(efip->efi_format.efi_extents[i]); | 3018 | extp = &(efip->efi_format.efi_extents[i]); |
| 3008 | xfs_free_extent(tp, extp->ext_start, extp->ext_len); | 3019 | error = xfs_free_extent(tp, extp->ext_start, extp->ext_len); |
| 3020 | if (error) | ||
| 3021 | goto abort_error; | ||
| 3009 | xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, | 3022 | xfs_trans_log_efd_extent(tp, efdp, extp->ext_start, |
| 3010 | extp->ext_len); | 3023 | extp->ext_len); |
| 3011 | } | 3024 | } |
| 3012 | 3025 | ||
| 3013 | efip->efi_flags |= XFS_EFI_RECOVERED; | 3026 | efip->efi_flags |= XFS_EFI_RECOVERED; |
| 3014 | xfs_trans_commit(tp, 0); | 3027 | error = xfs_trans_commit(tp, 0); |
| 3028 | return error; | ||
| 3029 | |||
| 3030 | abort_error: | ||
| 3031 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | ||
| 3032 | return error; | ||
| 3015 | } | 3033 | } |
| 3016 | 3034 | ||
| 3017 | /* | 3035 | /* |
| @@ -3059,7 +3077,7 @@ xlog_recover_check_ail( | |||
| 3059 | * everything already in the AIL, we stop processing as soon as | 3077 | * everything already in the AIL, we stop processing as soon as |
| 3060 | * we see something other than an EFI in the AIL. | 3078 | * we see something other than an EFI in the AIL. |
| 3061 | */ | 3079 | */ |
| 3062 | STATIC void | 3080 | STATIC int |
| 3063 | xlog_recover_process_efis( | 3081 | xlog_recover_process_efis( |
| 3064 | xlog_t *log) | 3082 | xlog_t *log) |
| 3065 | { | 3083 | { |
| @@ -3067,6 +3085,7 @@ xlog_recover_process_efis( | |||
| 3067 | xfs_efi_log_item_t *efip; | 3085 | xfs_efi_log_item_t *efip; |
| 3068 | int gen; | 3086 | int gen; |
| 3069 | xfs_mount_t *mp; | 3087 | xfs_mount_t *mp; |
| 3088 | int error = 0; | ||
| 3070 | 3089 | ||
| 3071 | mp = log->l_mp; | 3090 | mp = log->l_mp; |
| 3072 | spin_lock(&mp->m_ail_lock); | 3091 | spin_lock(&mp->m_ail_lock); |
| @@ -3091,11 +3110,14 @@ xlog_recover_process_efis( | |||
| 3091 | } | 3110 | } |
| 3092 | 3111 | ||
| 3093 | spin_unlock(&mp->m_ail_lock); | 3112 | spin_unlock(&mp->m_ail_lock); |
| 3094 | xlog_recover_process_efi(mp, efip); | 3113 | error = xlog_recover_process_efi(mp, efip); |
| 3114 | if (error) | ||
| 3115 | return error; | ||
| 3095 | spin_lock(&mp->m_ail_lock); | 3116 | spin_lock(&mp->m_ail_lock); |
| 3096 | lip = xfs_trans_next_ail(mp, lip, &gen, NULL); | 3117 | lip = xfs_trans_next_ail(mp, lip, &gen, NULL); |
| 3097 | } | 3118 | } |
| 3098 | spin_unlock(&mp->m_ail_lock); | 3119 | spin_unlock(&mp->m_ail_lock); |
| 3120 | return error; | ||
| 3099 | } | 3121 | } |
| 3100 | 3122 | ||
| 3101 | /* | 3123 | /* |
| @@ -3115,21 +3137,18 @@ xlog_recover_clear_agi_bucket( | |||
| 3115 | int error; | 3137 | int error; |
| 3116 | 3138 | ||
| 3117 | tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); | 3139 | tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); |
| 3118 | xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0); | 3140 | error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), 0, 0, 0); |
| 3119 | 3141 | if (!error) | |
| 3120 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | 3142 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, |
| 3121 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), | 3143 | XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), |
| 3122 | XFS_FSS_TO_BB(mp, 1), 0, &agibp); | 3144 | XFS_FSS_TO_BB(mp, 1), 0, &agibp); |
| 3123 | if (error) { | 3145 | if (error) |
| 3124 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | 3146 | goto out_abort; |
| 3125 | return; | ||
| 3126 | } | ||
| 3127 | 3147 | ||
| 3148 | error = EINVAL; | ||
| 3128 | agi = XFS_BUF_TO_AGI(agibp); | 3149 | agi = XFS_BUF_TO_AGI(agibp); |
| 3129 | if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) { | 3150 | if (be32_to_cpu(agi->agi_magicnum) != XFS_AGI_MAGIC) |
| 3130 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | 3151 | goto out_abort; |
| 3131 | return; | ||
| 3132 | } | ||
| 3133 | 3152 | ||
| 3134 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); | 3153 | agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); |
| 3135 | offset = offsetof(xfs_agi_t, agi_unlinked) + | 3154 | offset = offsetof(xfs_agi_t, agi_unlinked) + |
| @@ -3137,7 +3156,17 @@ xlog_recover_clear_agi_bucket( | |||
| 3137 | xfs_trans_log_buf(tp, agibp, offset, | 3156 | xfs_trans_log_buf(tp, agibp, offset, |
| 3138 | (offset + sizeof(xfs_agino_t) - 1)); | 3157 | (offset + sizeof(xfs_agino_t) - 1)); |
| 3139 | 3158 | ||
| 3140 | (void) xfs_trans_commit(tp, 0); | 3159 | error = xfs_trans_commit(tp, 0); |
| 3160 | if (error) | ||
| 3161 | goto out_error; | ||
| 3162 | return; | ||
| 3163 | |||
| 3164 | out_abort: | ||
| 3165 | xfs_trans_cancel(tp, XFS_TRANS_ABORT); | ||
| 3166 | out_error: | ||
| 3167 | xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " | ||
| 3168 | "failed to clear agi %d. Continuing.", agno); | ||
| 3169 | return; | ||
| 3141 | } | 3170 | } |
| 3142 | 3171 | ||
| 3143 | /* | 3172 | /* |
| @@ -3214,7 +3243,8 @@ xlog_recover_process_iunlinks( | |||
| 3214 | * next inode in the bucket. | 3243 | * next inode in the bucket. |
| 3215 | */ | 3244 | */ |
| 3216 | error = xfs_itobp(mp, NULL, ip, &dip, | 3245 | error = xfs_itobp(mp, NULL, ip, &dip, |
| 3217 | &ibp, 0, 0); | 3246 | &ibp, 0, 0, |
| 3247 | XFS_BUF_LOCK); | ||
| 3218 | ASSERT(error || (dip != NULL)); | 3248 | ASSERT(error || (dip != NULL)); |
| 3219 | } | 3249 | } |
| 3220 | 3250 | ||
| @@ -3247,7 +3277,7 @@ xlog_recover_process_iunlinks( | |||
| 3247 | if (ip->i_d.di_mode == 0) | 3277 | if (ip->i_d.di_mode == 0) |
| 3248 | xfs_iput_new(ip, 0); | 3278 | xfs_iput_new(ip, 0); |
| 3249 | else | 3279 | else |
| 3250 | VN_RELE(XFS_ITOV(ip)); | 3280 | IRELE(ip); |
| 3251 | } else { | 3281 | } else { |
| 3252 | /* | 3282 | /* |
| 3253 | * We can't read in the inode | 3283 | * We can't read in the inode |
| @@ -3445,7 +3475,7 @@ xlog_valid_rec_header( | |||
| 3445 | (!rhead->h_version || | 3475 | (!rhead->h_version || |
| 3446 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { | 3476 | (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { |
| 3447 | xlog_warn("XFS: %s: unrecognised log version (%d).", | 3477 | xlog_warn("XFS: %s: unrecognised log version (%d).", |
| 3448 | __FUNCTION__, be32_to_cpu(rhead->h_version)); | 3478 | __func__, be32_to_cpu(rhead->h_version)); |
| 3449 | return XFS_ERROR(EIO); | 3479 | return XFS_ERROR(EIO); |
| 3450 | } | 3480 | } |
| 3451 | 3481 | ||
| @@ -3604,15 +3634,19 @@ xlog_do_recovery_pass( | |||
| 3604 | * _first_, then the log start (LR header end) | 3634 | * _first_, then the log start (LR header end) |
| 3605 | * - order is important. | 3635 | * - order is important. |
| 3606 | */ | 3636 | */ |
| 3637 | wrapped_hblks = hblks - split_hblks; | ||
| 3607 | bufaddr = XFS_BUF_PTR(hbp); | 3638 | bufaddr = XFS_BUF_PTR(hbp); |
| 3608 | XFS_BUF_SET_PTR(hbp, | 3639 | error = XFS_BUF_SET_PTR(hbp, |
| 3609 | bufaddr + BBTOB(split_hblks), | 3640 | bufaddr + BBTOB(split_hblks), |
| 3610 | BBTOB(hblks - split_hblks)); | 3641 | BBTOB(hblks - split_hblks)); |
| 3611 | wrapped_hblks = hblks - split_hblks; | 3642 | if (!error) |
| 3612 | error = xlog_bread(log, 0, wrapped_hblks, hbp); | 3643 | error = xlog_bread(log, 0, |
| 3644 | wrapped_hblks, hbp); | ||
| 3645 | if (!error) | ||
| 3646 | error = XFS_BUF_SET_PTR(hbp, bufaddr, | ||
| 3647 | BBTOB(hblks)); | ||
| 3613 | if (error) | 3648 | if (error) |
| 3614 | goto bread_err2; | 3649 | goto bread_err2; |
| 3615 | XFS_BUF_SET_PTR(hbp, bufaddr, BBTOB(hblks)); | ||
| 3616 | if (!offset) | 3650 | if (!offset) |
| 3617 | offset = xlog_align(log, 0, | 3651 | offset = xlog_align(log, 0, |
| 3618 | wrapped_hblks, hbp); | 3652 | wrapped_hblks, hbp); |
| @@ -3664,13 +3698,18 @@ xlog_do_recovery_pass( | |||
| 3664 | * - order is important. | 3698 | * - order is important. |
| 3665 | */ | 3699 | */ |
| 3666 | bufaddr = XFS_BUF_PTR(dbp); | 3700 | bufaddr = XFS_BUF_PTR(dbp); |
| 3667 | XFS_BUF_SET_PTR(dbp, | 3701 | error = XFS_BUF_SET_PTR(dbp, |
| 3668 | bufaddr + BBTOB(split_bblks), | 3702 | bufaddr + BBTOB(split_bblks), |
| 3669 | BBTOB(bblks - split_bblks)); | 3703 | BBTOB(bblks - split_bblks)); |
| 3670 | if ((error = xlog_bread(log, wrapped_hblks, | 3704 | if (!error) |
| 3671 | bblks - split_bblks, dbp))) | 3705 | error = xlog_bread(log, wrapped_hblks, |
| 3706 | bblks - split_bblks, | ||
| 3707 | dbp); | ||
| 3708 | if (!error) | ||
| 3709 | error = XFS_BUF_SET_PTR(dbp, bufaddr, | ||
| 3710 | h_size); | ||
| 3711 | if (error) | ||
| 3672 | goto bread_err2; | 3712 | goto bread_err2; |
| 3673 | XFS_BUF_SET_PTR(dbp, bufaddr, h_size); | ||
| 3674 | if (!offset) | 3713 | if (!offset) |
| 3675 | offset = xlog_align(log, wrapped_hblks, | 3714 | offset = xlog_align(log, wrapped_hblks, |
| 3676 | bblks - split_bblks, dbp); | 3715 | bblks - split_bblks, dbp); |
| @@ -3826,7 +3865,8 @@ xlog_do_recover( | |||
| 3826 | XFS_BUF_READ(bp); | 3865 | XFS_BUF_READ(bp); |
| 3827 | XFS_BUF_UNASYNC(bp); | 3866 | XFS_BUF_UNASYNC(bp); |
| 3828 | xfsbdstrat(log->l_mp, bp); | 3867 | xfsbdstrat(log->l_mp, bp); |
| 3829 | if ((error = xfs_iowait(bp))) { | 3868 | error = xfs_iowait(bp); |
| 3869 | if (error) { | ||
| 3830 | xfs_ioerror_alert("xlog_do_recover", | 3870 | xfs_ioerror_alert("xlog_do_recover", |
| 3831 | log->l_mp, bp, XFS_BUF_ADDR(bp)); | 3871 | log->l_mp, bp, XFS_BUF_ADDR(bp)); |
| 3832 | ASSERT(0); | 3872 | ASSERT(0); |
| @@ -3917,7 +3957,14 @@ xlog_recover_finish( | |||
| 3917 | * rather than accepting new requests. | 3957 | * rather than accepting new requests. |
| 3918 | */ | 3958 | */ |
| 3919 | if (log->l_flags & XLOG_RECOVERY_NEEDED) { | 3959 | if (log->l_flags & XLOG_RECOVERY_NEEDED) { |
| 3920 | xlog_recover_process_efis(log); | 3960 | int error; |
| 3961 | error = xlog_recover_process_efis(log); | ||
| 3962 | if (error) { | ||
| 3963 | cmn_err(CE_ALERT, | ||
| 3964 | "Failed to recover EFIs on filesystem: %s", | ||
| 3965 | log->l_mp->m_fsname); | ||
| 3966 | return error; | ||
| 3967 | } | ||
| 3921 | /* | 3968 | /* |
| 3922 | * Sync the log to get all the EFIs out of the AIL. | 3969 | * Sync the log to get all the EFIs out of the AIL. |
| 3923 | * This isn't absolutely necessary, but it helps in | 3970 | * This isn't absolutely necessary, but it helps in |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 8ed164eb9544..2fec452afbcc 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
| @@ -43,8 +43,9 @@ | |||
| 43 | #include "xfs_rw.h" | 43 | #include "xfs_rw.h" |
| 44 | #include "xfs_quota.h" | 44 | #include "xfs_quota.h" |
| 45 | #include "xfs_fsops.h" | 45 | #include "xfs_fsops.h" |
| 46 | #include "xfs_utils.h" | ||
| 46 | 47 | ||
| 47 | STATIC void xfs_mount_log_sb(xfs_mount_t *, __int64_t); | 48 | STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t); |
| 48 | STATIC int xfs_uuid_mount(xfs_mount_t *); | 49 | STATIC int xfs_uuid_mount(xfs_mount_t *); |
| 49 | STATIC void xfs_uuid_unmount(xfs_mount_t *mp); | 50 | STATIC void xfs_uuid_unmount(xfs_mount_t *mp); |
| 50 | STATIC void xfs_unmountfs_wait(xfs_mount_t *); | 51 | STATIC void xfs_unmountfs_wait(xfs_mount_t *); |
| @@ -57,7 +58,7 @@ STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, | |||
| 57 | STATIC void xfs_icsb_sync_counters(xfs_mount_t *); | 58 | STATIC void xfs_icsb_sync_counters(xfs_mount_t *); |
| 58 | STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, | 59 | STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, |
| 59 | int64_t, int); | 60 | int64_t, int); |
| 60 | STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); | 61 | STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); |
| 61 | 62 | ||
| 62 | #else | 63 | #else |
| 63 | 64 | ||
| @@ -956,7 +957,6 @@ xfs_mountfs( | |||
| 956 | { | 957 | { |
| 957 | xfs_sb_t *sbp = &(mp->m_sb); | 958 | xfs_sb_t *sbp = &(mp->m_sb); |
| 958 | xfs_inode_t *rip; | 959 | xfs_inode_t *rip; |
| 959 | bhv_vnode_t *rvp = NULL; | ||
| 960 | __uint64_t resblks; | 960 | __uint64_t resblks; |
| 961 | __int64_t update_flags = 0LL; | 961 | __int64_t update_flags = 0LL; |
| 962 | uint quotamount, quotaflags; | 962 | uint quotamount, quotaflags; |
| @@ -964,11 +964,6 @@ xfs_mountfs( | |||
| 964 | int uuid_mounted = 0; | 964 | int uuid_mounted = 0; |
| 965 | int error = 0; | 965 | int error = 0; |
| 966 | 966 | ||
| 967 | if (mp->m_sb_bp == NULL) { | ||
| 968 | error = xfs_readsb(mp, mfsi_flags); | ||
| 969 | if (error) | ||
| 970 | return error; | ||
| 971 | } | ||
| 972 | xfs_mount_common(mp, sbp); | 967 | xfs_mount_common(mp, sbp); |
| 973 | 968 | ||
| 974 | /* | 969 | /* |
| @@ -1163,7 +1158,6 @@ xfs_mountfs( | |||
| 1163 | } | 1158 | } |
| 1164 | 1159 | ||
| 1165 | ASSERT(rip != NULL); | 1160 | ASSERT(rip != NULL); |
| 1166 | rvp = XFS_ITOV(rip); | ||
| 1167 | 1161 | ||
| 1168 | if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { | 1162 | if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { |
| 1169 | cmn_err(CE_WARN, "XFS: corrupted root inode"); | 1163 | cmn_err(CE_WARN, "XFS: corrupted root inode"); |
| @@ -1195,8 +1189,13 @@ xfs_mountfs( | |||
| 1195 | /* | 1189 | /* |
| 1196 | * If fs is not mounted readonly, then update the superblock changes. | 1190 | * If fs is not mounted readonly, then update the superblock changes. |
| 1197 | */ | 1191 | */ |
| 1198 | if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) | 1192 | if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { |
| 1199 | xfs_mount_log_sb(mp, update_flags); | 1193 | error = xfs_mount_log_sb(mp, update_flags); |
| 1194 | if (error) { | ||
| 1195 | cmn_err(CE_WARN, "XFS: failed to write sb changes"); | ||
| 1196 | goto error4; | ||
| 1197 | } | ||
| 1198 | } | ||
| 1200 | 1199 | ||
| 1201 | /* | 1200 | /* |
| 1202 | * Initialise the XFS quota management subsystem for this mount | 1201 | * Initialise the XFS quota management subsystem for this mount |
| @@ -1233,12 +1232,15 @@ xfs_mountfs( | |||
| 1233 | * | 1232 | * |
| 1234 | * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. | 1233 | * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. |
| 1235 | * This may drive us straight to ENOSPC on mount, but that implies | 1234 | * This may drive us straight to ENOSPC on mount, but that implies |
| 1236 | * we were already there on the last unmount. | 1235 | * we were already there on the last unmount. Warn if this occurs. |
| 1237 | */ | 1236 | */ |
| 1238 | resblks = mp->m_sb.sb_dblocks; | 1237 | resblks = mp->m_sb.sb_dblocks; |
| 1239 | do_div(resblks, 20); | 1238 | do_div(resblks, 20); |
| 1240 | resblks = min_t(__uint64_t, resblks, 1024); | 1239 | resblks = min_t(__uint64_t, resblks, 1024); |
| 1241 | xfs_reserve_blocks(mp, &resblks, NULL); | 1240 | error = xfs_reserve_blocks(mp, &resblks, NULL); |
| 1241 | if (error) | ||
| 1242 | cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. " | ||
| 1243 | "Continuing without a reserve pool."); | ||
| 1242 | 1244 | ||
| 1243 | return 0; | 1245 | return 0; |
| 1244 | 1246 | ||
| @@ -1246,7 +1248,7 @@ xfs_mountfs( | |||
| 1246 | /* | 1248 | /* |
| 1247 | * Free up the root inode. | 1249 | * Free up the root inode. |
| 1248 | */ | 1250 | */ |
| 1249 | VN_RELE(rvp); | 1251 | IRELE(rip); |
| 1250 | error3: | 1252 | error3: |
| 1251 | xfs_log_unmount_dealloc(mp); | 1253 | xfs_log_unmount_dealloc(mp); |
| 1252 | error2: | 1254 | error2: |
| @@ -1274,6 +1276,7 @@ int | |||
| 1274 | xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) | 1276 | xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) |
| 1275 | { | 1277 | { |
| 1276 | __uint64_t resblks; | 1278 | __uint64_t resblks; |
| 1279 | int error = 0; | ||
| 1277 | 1280 | ||
| 1278 | /* | 1281 | /* |
| 1279 | * We can potentially deadlock here if we have an inode cluster | 1282 | * We can potentially deadlock here if we have an inode cluster |
| @@ -1317,9 +1320,15 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) | |||
| 1317 | * value does not matter.... | 1320 | * value does not matter.... |
| 1318 | */ | 1321 | */ |
| 1319 | resblks = 0; | 1322 | resblks = 0; |
| 1320 | xfs_reserve_blocks(mp, &resblks, NULL); | 1323 | error = xfs_reserve_blocks(mp, &resblks, NULL); |
| 1324 | if (error) | ||
| 1325 | cmn_err(CE_WARN, "XFS: Unable to free reserved block pool. " | ||
| 1326 | "Freespace may not be correct on next mount."); | ||
| 1321 | 1327 | ||
| 1322 | xfs_log_sbcount(mp, 1); | 1328 | error = xfs_log_sbcount(mp, 1); |
| 1329 | if (error) | ||
| 1330 | cmn_err(CE_WARN, "XFS: Unable to update superblock counters. " | ||
| 1331 | "Freespace may not be correct on next mount."); | ||
| 1323 | xfs_unmountfs_writesb(mp); | 1332 | xfs_unmountfs_writesb(mp); |
| 1324 | xfs_unmountfs_wait(mp); /* wait for async bufs */ | 1333 | xfs_unmountfs_wait(mp); /* wait for async bufs */ |
| 1325 | xfs_log_unmount(mp); /* Done! No more fs ops. */ | 1334 | xfs_log_unmount(mp); /* Done! No more fs ops. */ |
| @@ -1411,9 +1420,8 @@ xfs_log_sbcount( | |||
| 1411 | xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); | 1420 | xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); |
| 1412 | if (sync) | 1421 | if (sync) |
| 1413 | xfs_trans_set_sync(tp); | 1422 | xfs_trans_set_sync(tp); |
| 1414 | xfs_trans_commit(tp, 0); | 1423 | error = xfs_trans_commit(tp, 0); |
| 1415 | 1424 | return error; | |
| 1416 | return 0; | ||
| 1417 | } | 1425 | } |
| 1418 | 1426 | ||
| 1419 | STATIC void | 1427 | STATIC void |
| @@ -1462,7 +1470,6 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) | |||
| 1462 | XFS_BUF_UNASYNC(sbp); | 1470 | XFS_BUF_UNASYNC(sbp); |
| 1463 | ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); | 1471 | ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); |
| 1464 | xfsbdstrat(mp, sbp); | 1472 | xfsbdstrat(mp, sbp); |
| 1465 | /* Nevermind errors we might get here. */ | ||
| 1466 | error = xfs_iowait(sbp); | 1473 | error = xfs_iowait(sbp); |
| 1467 | if (error) | 1474 | if (error) |
| 1468 | xfs_ioerror_alert("xfs_unmountfs_writesb", | 1475 | xfs_ioerror_alert("xfs_unmountfs_writesb", |
| @@ -1911,24 +1918,27 @@ xfs_uuid_unmount( | |||
| 1911 | * be altered by the mount options, as well as any potential sb_features2 | 1918 | * be altered by the mount options, as well as any potential sb_features2 |
| 1912 | * fixup. Only the first superblock is updated. | 1919 | * fixup. Only the first superblock is updated. |
| 1913 | */ | 1920 | */ |
| 1914 | STATIC void | 1921 | STATIC int |
| 1915 | xfs_mount_log_sb( | 1922 | xfs_mount_log_sb( |
| 1916 | xfs_mount_t *mp, | 1923 | xfs_mount_t *mp, |
| 1917 | __int64_t fields) | 1924 | __int64_t fields) |
| 1918 | { | 1925 | { |
| 1919 | xfs_trans_t *tp; | 1926 | xfs_trans_t *tp; |
| 1927 | int error; | ||
| 1920 | 1928 | ||
| 1921 | ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | | 1929 | ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | |
| 1922 | XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2)); | 1930 | XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2)); |
| 1923 | 1931 | ||
| 1924 | tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); | 1932 | tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); |
| 1925 | if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, | 1933 | error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, |
| 1926 | XFS_DEFAULT_LOG_COUNT)) { | 1934 | XFS_DEFAULT_LOG_COUNT); |
| 1935 | if (error) { | ||
| 1927 | xfs_trans_cancel(tp, 0); | 1936 | xfs_trans_cancel(tp, 0); |
| 1928 | return; | 1937 | return error; |
| 1929 | } | 1938 | } |
| 1930 | xfs_mod_sb(tp, fields); | 1939 | xfs_mod_sb(tp, fields); |
| 1931 | xfs_trans_commit(tp, 0); | 1940 | error = xfs_trans_commit(tp, 0); |
| 1941 | return error; | ||
| 1932 | } | 1942 | } |
| 1933 | 1943 | ||
| 1934 | 1944 | ||
| @@ -2189,7 +2199,7 @@ xfs_icsb_counter_disabled( | |||
| 2189 | return test_bit(field, &mp->m_icsb_counters); | 2199 | return test_bit(field, &mp->m_icsb_counters); |
| 2190 | } | 2200 | } |
| 2191 | 2201 | ||
| 2192 | STATIC int | 2202 | STATIC void |
| 2193 | xfs_icsb_disable_counter( | 2203 | xfs_icsb_disable_counter( |
| 2194 | xfs_mount_t *mp, | 2204 | xfs_mount_t *mp, |
| 2195 | xfs_sb_field_t field) | 2205 | xfs_sb_field_t field) |
| @@ -2207,7 +2217,7 @@ xfs_icsb_disable_counter( | |||
| 2207 | * the m_icsb_mutex. | 2217 | * the m_icsb_mutex. |
| 2208 | */ | 2218 | */ |
| 2209 | if (xfs_icsb_counter_disabled(mp, field)) | 2219 | if (xfs_icsb_counter_disabled(mp, field)) |
| 2210 | return 0; | 2220 | return; |
| 2211 | 2221 | ||
| 2212 | xfs_icsb_lock_all_counters(mp); | 2222 | xfs_icsb_lock_all_counters(mp); |
| 2213 | if (!test_and_set_bit(field, &mp->m_icsb_counters)) { | 2223 | if (!test_and_set_bit(field, &mp->m_icsb_counters)) { |
| @@ -2230,8 +2240,6 @@ xfs_icsb_disable_counter( | |||
| 2230 | } | 2240 | } |
| 2231 | 2241 | ||
| 2232 | xfs_icsb_unlock_all_counters(mp); | 2242 | xfs_icsb_unlock_all_counters(mp); |
| 2233 | |||
| 2234 | return 0; | ||
| 2235 | } | 2243 | } |
| 2236 | 2244 | ||
| 2237 | STATIC void | 2245 | STATIC void |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1d8a4728d847..1ed575110ff0 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
| @@ -66,17 +66,17 @@ struct xfs_mru_cache; | |||
| 66 | * Prototypes and functions for the Data Migration subsystem. | 66 | * Prototypes and functions for the Data Migration subsystem. |
| 67 | */ | 67 | */ |
| 68 | 68 | ||
| 69 | typedef int (*xfs_send_data_t)(int, bhv_vnode_t *, | 69 | typedef int (*xfs_send_data_t)(int, struct xfs_inode *, |
| 70 | xfs_off_t, size_t, int, bhv_vrwlock_t *); | 70 | xfs_off_t, size_t, int, int *); |
| 71 | typedef int (*xfs_send_mmap_t)(struct vm_area_struct *, uint); | 71 | typedef int (*xfs_send_mmap_t)(struct vm_area_struct *, uint); |
| 72 | typedef int (*xfs_send_destroy_t)(bhv_vnode_t *, dm_right_t); | 72 | typedef int (*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t); |
| 73 | typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *, | 73 | typedef int (*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *, |
| 74 | bhv_vnode_t *, | 74 | struct xfs_inode *, dm_right_t, |
| 75 | dm_right_t, bhv_vnode_t *, dm_right_t, | 75 | struct xfs_inode *, dm_right_t, |
| 76 | char *, char *, mode_t, int, int); | 76 | const char *, const char *, mode_t, int, int); |
| 77 | typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t, | 77 | typedef int (*xfs_send_mount_t)(struct xfs_mount *, dm_right_t, |
| 78 | char *, char *); | 78 | char *, char *); |
| 79 | typedef void (*xfs_send_unmount_t)(struct xfs_mount *, bhv_vnode_t *, | 79 | typedef void (*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *, |
| 80 | dm_right_t, mode_t, int, int); | 80 | dm_right_t, mode_t, int, int); |
| 81 | 81 | ||
| 82 | typedef struct xfs_dmops { | 82 | typedef struct xfs_dmops { |
| @@ -88,20 +88,20 @@ typedef struct xfs_dmops { | |||
| 88 | xfs_send_unmount_t xfs_send_unmount; | 88 | xfs_send_unmount_t xfs_send_unmount; |
| 89 | } xfs_dmops_t; | 89 | } xfs_dmops_t; |
| 90 | 90 | ||
| 91 | #define XFS_SEND_DATA(mp, ev,vp,off,len,fl,lock) \ | 91 | #define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \ |
| 92 | (*(mp)->m_dm_ops->xfs_send_data)(ev,vp,off,len,fl,lock) | 92 | (*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock) |
| 93 | #define XFS_SEND_MMAP(mp, vma,fl) \ | 93 | #define XFS_SEND_MMAP(mp, vma,fl) \ |
| 94 | (*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl) | 94 | (*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl) |
| 95 | #define XFS_SEND_DESTROY(mp, vp,right) \ | 95 | #define XFS_SEND_DESTROY(mp, ip,right) \ |
| 96 | (*(mp)->m_dm_ops->xfs_send_destroy)(vp,right) | 96 | (*(mp)->m_dm_ops->xfs_send_destroy)(ip,right) |
| 97 | #define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ | 97 | #define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ |
| 98 | (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl) | 98 | (*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl) |
| 99 | #define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ | 99 | #define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \ |
| 100 | (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) | 100 | (*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) |
| 101 | #define XFS_SEND_MOUNT(mp,right,path,name) \ | 101 | #define XFS_SEND_MOUNT(mp,right,path,name) \ |
| 102 | (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name) | 102 | (*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name) |
| 103 | #define XFS_SEND_UNMOUNT(mp, vp,right,mode,rval,fl) \ | 103 | #define XFS_SEND_UNMOUNT(mp, ip,right,mode,rval,fl) \ |
| 104 | (*(mp)->m_dm_ops->xfs_send_unmount)(mp,vp,right,mode,rval,fl) | 104 | (*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl) |
| 105 | 105 | ||
| 106 | 106 | ||
| 107 | /* | 107 | /* |
| @@ -220,7 +220,7 @@ extern void xfs_icsb_sync_counters_flags(struct xfs_mount *, int); | |||
| 220 | #endif | 220 | #endif |
| 221 | 221 | ||
| 222 | typedef struct xfs_ail { | 222 | typedef struct xfs_ail { |
| 223 | xfs_ail_entry_t xa_ail; | 223 | struct list_head xa_ail; |
| 224 | uint xa_gen; | 224 | uint xa_gen; |
| 225 | struct task_struct *xa_task; | 225 | struct task_struct *xa_task; |
| 226 | xfs_lsn_t xa_target; | 226 | xfs_lsn_t xa_target; |
| @@ -401,7 +401,7 @@ typedef struct xfs_mount { | |||
| 401 | 401 | ||
| 402 | /* | 402 | /* |
| 403 | * Allow large block sizes to be reported to userspace programs if the | 403 | * Allow large block sizes to be reported to userspace programs if the |
| 404 | * "largeio" mount option is used. | 404 | * "largeio" mount option is used. |
| 405 | * | 405 | * |
| 406 | * If compatibility mode is specified, simply return the basic unit of caching | 406 | * If compatibility mode is specified, simply return the basic unit of caching |
| 407 | * so that we don't get inefficient read/modify/write I/O from user apps. | 407 | * so that we don't get inefficient read/modify/write I/O from user apps. |
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 7eb157a59f9e..ee371890d85d 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c | |||
| @@ -36,7 +36,6 @@ | |||
| 36 | #include "xfs_bmap.h" | 36 | #include "xfs_bmap.h" |
| 37 | #include "xfs_error.h" | 37 | #include "xfs_error.h" |
| 38 | #include "xfs_quota.h" | 38 | #include "xfs_quota.h" |
| 39 | #include "xfs_refcache.h" | ||
| 40 | #include "xfs_utils.h" | 39 | #include "xfs_utils.h" |
| 41 | #include "xfs_trans_space.h" | 40 | #include "xfs_trans_space.h" |
| 42 | #include "xfs_vnodeops.h" | 41 | #include "xfs_vnodeops.h" |
| @@ -84,25 +83,23 @@ int xfs_rename_skip, xfs_rename_nskip; | |||
| 84 | */ | 83 | */ |
| 85 | STATIC int | 84 | STATIC int |
| 86 | xfs_lock_for_rename( | 85 | xfs_lock_for_rename( |
| 87 | xfs_inode_t *dp1, /* old (source) directory inode */ | 86 | xfs_inode_t *dp1, /* in: old (source) directory inode */ |
| 88 | xfs_inode_t *dp2, /* new (target) directory inode */ | 87 | xfs_inode_t *dp2, /* in: new (target) directory inode */ |
| 89 | bhv_vname_t *vname1,/* old entry name */ | 88 | xfs_inode_t *ip1, /* in: inode of old entry */ |
| 90 | bhv_vname_t *vname2,/* new entry name */ | 89 | struct xfs_name *name2, /* in: new entry name */ |
| 91 | xfs_inode_t **ipp1, /* inode of old entry */ | 90 | xfs_inode_t **ipp2, /* out: inode of new entry, if it |
| 92 | xfs_inode_t **ipp2, /* inode of new entry, if it | ||
| 93 | already exists, NULL otherwise. */ | 91 | already exists, NULL otherwise. */ |
| 94 | xfs_inode_t **i_tab,/* array of inode returned, sorted */ | 92 | xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ |
| 95 | int *num_inodes) /* number of inodes in array */ | 93 | int *num_inodes) /* out: number of inodes in array */ |
| 96 | { | 94 | { |
| 97 | xfs_inode_t *ip1, *ip2, *temp; | 95 | xfs_inode_t *ip2 = NULL; |
| 96 | xfs_inode_t *temp; | ||
| 98 | xfs_ino_t inum1, inum2; | 97 | xfs_ino_t inum1, inum2; |
| 99 | int error; | 98 | int error; |
| 100 | int i, j; | 99 | int i, j; |
| 101 | uint lock_mode; | 100 | uint lock_mode; |
| 102 | int diff_dirs = (dp1 != dp2); | 101 | int diff_dirs = (dp1 != dp2); |
| 103 | 102 | ||
| 104 | ip2 = NULL; | ||
| 105 | |||
| 106 | /* | 103 | /* |
| 107 | * First, find out the current inums of the entries so that we | 104 | * First, find out the current inums of the entries so that we |
| 108 | * can determine the initial locking order. We'll have to | 105 | * can determine the initial locking order. We'll have to |
| @@ -110,27 +107,20 @@ xfs_lock_for_rename( | |||
| 110 | * to see if we still have the right inodes, directories, etc. | 107 | * to see if we still have the right inodes, directories, etc. |
| 111 | */ | 108 | */ |
| 112 | lock_mode = xfs_ilock_map_shared(dp1); | 109 | lock_mode = xfs_ilock_map_shared(dp1); |
| 113 | error = xfs_get_dir_entry(vname1, &ip1); | 110 | IHOLD(ip1); |
| 114 | if (error) { | 111 | xfs_itrace_ref(ip1); |
| 115 | xfs_iunlock_map_shared(dp1, lock_mode); | ||
| 116 | return error; | ||
| 117 | } | ||
| 118 | 112 | ||
| 119 | inum1 = ip1->i_ino; | 113 | inum1 = ip1->i_ino; |
| 120 | 114 | ||
| 121 | ASSERT(ip1); | ||
| 122 | xfs_itrace_ref(ip1); | ||
| 123 | |||
| 124 | /* | 115 | /* |
| 125 | * Unlock dp1 and lock dp2 if they are different. | 116 | * Unlock dp1 and lock dp2 if they are different. |
| 126 | */ | 117 | */ |
| 127 | |||
| 128 | if (diff_dirs) { | 118 | if (diff_dirs) { |
| 129 | xfs_iunlock_map_shared(dp1, lock_mode); | 119 | xfs_iunlock_map_shared(dp1, lock_mode); |
| 130 | lock_mode = xfs_ilock_map_shared(dp2); | 120 | lock_mode = xfs_ilock_map_shared(dp2); |
| 131 | } | 121 | } |
| 132 | 122 | ||
| 133 | error = xfs_dir_lookup_int(dp2, lock_mode, vname2, &inum2, &ip2); | 123 | error = xfs_dir_lookup_int(dp2, lock_mode, name2, &inum2, &ip2); |
| 134 | if (error == ENOENT) { /* target does not need to exist. */ | 124 | if (error == ENOENT) { /* target does not need to exist. */ |
| 135 | inum2 = 0; | 125 | inum2 = 0; |
| 136 | } else if (error) { | 126 | } else if (error) { |
| @@ -162,6 +152,7 @@ xfs_lock_for_rename( | |||
| 162 | *num_inodes = 4; | 152 | *num_inodes = 4; |
| 163 | i_tab[3] = ip2; | 153 | i_tab[3] = ip2; |
| 164 | } | 154 | } |
| 155 | *ipp2 = i_tab[3]; | ||
| 165 | 156 | ||
| 166 | /* | 157 | /* |
| 167 | * Sort the elements via bubble sort. (Remember, there are at | 158 | * Sort the elements via bubble sort. (Remember, there are at |
| @@ -199,21 +190,6 @@ xfs_lock_for_rename( | |||
| 199 | xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED); | 190 | xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED); |
| 200 | } | 191 | } |
| 201 | 192 | ||
| 202 | /* | ||
| 203 | * Set the return value. Null out any unused entries in i_tab. | ||
| 204 | */ | ||
| 205 | *ipp1 = *ipp2 = NULL; | ||
| 206 | for (i=0; i < *num_inodes; i++) { | ||
| 207 | if (i_tab[i]->i_ino == inum1) { | ||
| 208 | *ipp1 = i_tab[i]; | ||
| 209 | } | ||
| 210 | if (i_tab[i]->i_ino == inum2) { | ||
| 211 | *ipp2 = i_tab[i]; | ||
| 212 | } | ||
| 213 | } | ||
| 214 | for (;i < 4; i++) { | ||
| 215 | i_tab[i] = NULL; | ||
| 216 | } | ||
| 217 | return 0; | 193 | return 0; |
| 218 | } | 194 | } |
| 219 | 195 | ||
| @@ -223,13 +199,13 @@ xfs_lock_for_rename( | |||
| 223 | int | 199 | int |
| 224 | xfs_rename( | 200 | xfs_rename( |
| 225 | xfs_inode_t *src_dp, | 201 | xfs_inode_t *src_dp, |
| 226 | bhv_vname_t *src_vname, | 202 | struct xfs_name *src_name, |
| 227 | bhv_vnode_t *target_dir_vp, | 203 | xfs_inode_t *src_ip, |
| 228 | bhv_vname_t *target_vname) | 204 | xfs_inode_t *target_dp, |
| 205 | struct xfs_name *target_name) | ||
| 229 | { | 206 | { |
| 230 | bhv_vnode_t *src_dir_vp = XFS_ITOV(src_dp); | ||
| 231 | xfs_trans_t *tp; | 207 | xfs_trans_t *tp; |
| 232 | xfs_inode_t *target_dp, *src_ip, *target_ip; | 208 | xfs_inode_t *target_ip; |
| 233 | xfs_mount_t *mp = src_dp->i_mount; | 209 | xfs_mount_t *mp = src_dp->i_mount; |
| 234 | int new_parent; /* moving to a new dir */ | 210 | int new_parent; /* moving to a new dir */ |
| 235 | int src_is_directory; /* src_name is a directory */ | 211 | int src_is_directory; /* src_name is a directory */ |
| @@ -243,29 +219,16 @@ xfs_rename( | |||
| 243 | int spaceres; | 219 | int spaceres; |
| 244 | int target_link_zero = 0; | 220 | int target_link_zero = 0; |
| 245 | int num_inodes; | 221 | int num_inodes; |
| 246 | char *src_name = VNAME(src_vname); | ||
| 247 | char *target_name = VNAME(target_vname); | ||
| 248 | int src_namelen = VNAMELEN(src_vname); | ||
| 249 | int target_namelen = VNAMELEN(target_vname); | ||
| 250 | 222 | ||
| 251 | xfs_itrace_entry(src_dp); | 223 | xfs_itrace_entry(src_dp); |
| 252 | xfs_itrace_entry(xfs_vtoi(target_dir_vp)); | 224 | xfs_itrace_entry(target_dp); |
| 253 | |||
| 254 | /* | ||
| 255 | * Find the XFS behavior descriptor for the target directory | ||
| 256 | * vnode since it was not handed to us. | ||
| 257 | */ | ||
| 258 | target_dp = xfs_vtoi(target_dir_vp); | ||
| 259 | if (target_dp == NULL) { | ||
| 260 | return XFS_ERROR(EXDEV); | ||
| 261 | } | ||
| 262 | 225 | ||
| 263 | if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) || | 226 | if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) || |
| 264 | DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) { | 227 | DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) { |
| 265 | error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, | 228 | error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, |
| 266 | src_dir_vp, DM_RIGHT_NULL, | 229 | src_dp, DM_RIGHT_NULL, |
| 267 | target_dir_vp, DM_RIGHT_NULL, | 230 | target_dp, DM_RIGHT_NULL, |
| 268 | src_name, target_name, | 231 | src_name->name, target_name->name, |
| 269 | 0, 0, 0); | 232 | 0, 0, 0); |
| 270 | if (error) { | 233 | if (error) { |
| 271 | return error; | 234 | return error; |
| @@ -282,10 +245,8 @@ xfs_rename( | |||
| 282 | * does not exist in the source directory. | 245 | * does not exist in the source directory. |
| 283 | */ | 246 | */ |
| 284 | tp = NULL; | 247 | tp = NULL; |
| 285 | error = xfs_lock_for_rename(src_dp, target_dp, src_vname, | 248 | error = xfs_lock_for_rename(src_dp, target_dp, src_ip, target_name, |
| 286 | target_vname, &src_ip, &target_ip, inodes, | 249 | &target_ip, inodes, &num_inodes); |
| 287 | &num_inodes); | ||
| 288 | |||
| 289 | if (error) { | 250 | if (error) { |
| 290 | /* | 251 | /* |
| 291 | * We have nothing locked, no inode references, and | 252 | * We have nothing locked, no inode references, and |
| @@ -331,7 +292,7 @@ xfs_rename( | |||
| 331 | XFS_BMAP_INIT(&free_list, &first_block); | 292 | XFS_BMAP_INIT(&free_list, &first_block); |
| 332 | tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); | 293 | tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); |
| 333 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | 294 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; |
| 334 | spaceres = XFS_RENAME_SPACE_RES(mp, target_namelen); | 295 | spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); |
| 335 | error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0, | 296 | error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0, |
| 336 | XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); | 297 | XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); |
| 337 | if (error == ENOSPC) { | 298 | if (error == ENOSPC) { |
| @@ -365,10 +326,10 @@ xfs_rename( | |||
| 365 | * them when they unlock the inodes. Also, we need to be careful | 326 | * them when they unlock the inodes. Also, we need to be careful |
| 366 | * not to add an inode to the transaction more than once. | 327 | * not to add an inode to the transaction more than once. |
| 367 | */ | 328 | */ |
| 368 | VN_HOLD(src_dir_vp); | 329 | IHOLD(src_dp); |
| 369 | xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); | 330 | xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); |
| 370 | if (new_parent) { | 331 | if (new_parent) { |
| 371 | VN_HOLD(target_dir_vp); | 332 | IHOLD(target_dp); |
| 372 | xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); | 333 | xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); |
| 373 | } | 334 | } |
| 374 | if ((src_ip != src_dp) && (src_ip != target_dp)) { | 335 | if ((src_ip != src_dp) && (src_ip != target_dp)) { |
| @@ -389,9 +350,8 @@ xfs_rename( | |||
| 389 | * If there's no space reservation, check the entry will | 350 | * If there's no space reservation, check the entry will |
| 390 | * fit before actually inserting it. | 351 | * fit before actually inserting it. |
| 391 | */ | 352 | */ |
| 392 | if (spaceres == 0 && | 353 | error = xfs_dir_canenter(tp, target_dp, target_name, spaceres); |
| 393 | (error = xfs_dir_canenter(tp, target_dp, target_name, | 354 | if (error) |
| 394 | target_namelen))) | ||
| 395 | goto error_return; | 355 | goto error_return; |
| 396 | /* | 356 | /* |
| 397 | * If target does not exist and the rename crosses | 357 | * If target does not exist and the rename crosses |
| @@ -399,8 +359,8 @@ xfs_rename( | |||
| 399 | * to account for the ".." reference from the new entry. | 359 | * to account for the ".." reference from the new entry. |
| 400 | */ | 360 | */ |
| 401 | error = xfs_dir_createname(tp, target_dp, target_name, | 361 | error = xfs_dir_createname(tp, target_dp, target_name, |
| 402 | target_namelen, src_ip->i_ino, | 362 | src_ip->i_ino, &first_block, |
| 403 | &first_block, &free_list, spaceres); | 363 | &free_list, spaceres); |
| 404 | if (error == ENOSPC) | 364 | if (error == ENOSPC) |
| 405 | goto error_return; | 365 | goto error_return; |
| 406 | if (error) | 366 | if (error) |
| @@ -439,7 +399,7 @@ xfs_rename( | |||
| 439 | * name at the destination directory, remove it first. | 399 | * name at the destination directory, remove it first. |
| 440 | */ | 400 | */ |
| 441 | error = xfs_dir_replace(tp, target_dp, target_name, | 401 | error = xfs_dir_replace(tp, target_dp, target_name, |
| 442 | target_namelen, src_ip->i_ino, | 402 | src_ip->i_ino, |
| 443 | &first_block, &free_list, spaceres); | 403 | &first_block, &free_list, spaceres); |
| 444 | if (error) | 404 | if (error) |
| 445 | goto abort_return; | 405 | goto abort_return; |
| @@ -476,7 +436,8 @@ xfs_rename( | |||
| 476 | * Rewrite the ".." entry to point to the new | 436 | * Rewrite the ".." entry to point to the new |
| 477 | * directory. | 437 | * directory. |
| 478 | */ | 438 | */ |
| 479 | error = xfs_dir_replace(tp, src_ip, "..", 2, target_dp->i_ino, | 439 | error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, |
| 440 | target_dp->i_ino, | ||
| 480 | &first_block, &free_list, spaceres); | 441 | &first_block, &free_list, spaceres); |
| 481 | ASSERT(error != EEXIST); | 442 | ASSERT(error != EEXIST); |
| 482 | if (error) | 443 | if (error) |
| @@ -512,8 +473,8 @@ xfs_rename( | |||
| 512 | goto abort_return; | 473 | goto abort_return; |
| 513 | } | 474 | } |
| 514 | 475 | ||
| 515 | error = xfs_dir_removename(tp, src_dp, src_name, src_namelen, | 476 | error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, |
| 516 | src_ip->i_ino, &first_block, &free_list, spaceres); | 477 | &first_block, &free_list, spaceres); |
| 517 | if (error) | 478 | if (error) |
| 518 | goto abort_return; | 479 | goto abort_return; |
| 519 | xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 480 | xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
| @@ -580,10 +541,8 @@ xfs_rename( | |||
| 580 | * the vnode references. | 541 | * the vnode references. |
| 581 | */ | 542 | */ |
| 582 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 543 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
| 583 | if (target_ip != NULL) { | 544 | if (target_ip != NULL) |
| 584 | xfs_refcache_purge_ip(target_ip); | ||
| 585 | IRELE(target_ip); | 545 | IRELE(target_ip); |
| 586 | } | ||
| 587 | /* | 546 | /* |
| 588 | * Let interposed file systems know about removed links. | 547 | * Let interposed file systems know about removed links. |
| 589 | */ | 548 | */ |
| @@ -598,9 +557,9 @@ std_return: | |||
| 598 | if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) || | 557 | if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) || |
| 599 | DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) { | 558 | DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) { |
| 600 | (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, | 559 | (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, |
| 601 | src_dir_vp, DM_RIGHT_NULL, | 560 | src_dp, DM_RIGHT_NULL, |
| 602 | target_dir_vp, DM_RIGHT_NULL, | 561 | target_dp, DM_RIGHT_NULL, |
| 603 | src_name, target_name, | 562 | src_name->name, target_name->name, |
| 604 | 0, error, 0); | 563 | 0, error, 0); |
| 605 | } | 564 | } |
| 606 | return error; | 565 | return error; |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 47082c01872d..a0dc6e5bc5b9 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
| @@ -44,6 +44,7 @@ | |||
| 44 | #include "xfs_rw.h" | 44 | #include "xfs_rw.h" |
| 45 | #include "xfs_inode_item.h" | 45 | #include "xfs_inode_item.h" |
| 46 | #include "xfs_trans_space.h" | 46 | #include "xfs_trans_space.h" |
| 47 | #include "xfs_utils.h" | ||
| 47 | 48 | ||
| 48 | 49 | ||
| 49 | /* | 50 | /* |
| @@ -123,14 +124,14 @@ xfs_growfs_rt_alloc( | |||
| 123 | XFS_GROWRTALLOC_LOG_RES(mp), 0, | 124 | XFS_GROWRTALLOC_LOG_RES(mp), 0, |
| 124 | XFS_TRANS_PERM_LOG_RES, | 125 | XFS_TRANS_PERM_LOG_RES, |
| 125 | XFS_DEFAULT_PERM_LOG_COUNT))) | 126 | XFS_DEFAULT_PERM_LOG_COUNT))) |
| 126 | goto error_exit; | 127 | goto error_cancel; |
| 127 | cancelflags = XFS_TRANS_RELEASE_LOG_RES; | 128 | cancelflags = XFS_TRANS_RELEASE_LOG_RES; |
| 128 | /* | 129 | /* |
| 129 | * Lock the inode. | 130 | * Lock the inode. |
| 130 | */ | 131 | */ |
| 131 | if ((error = xfs_trans_iget(mp, tp, ino, 0, | 132 | if ((error = xfs_trans_iget(mp, tp, ino, 0, |
| 132 | XFS_ILOCK_EXCL, &ip))) | 133 | XFS_ILOCK_EXCL, &ip))) |
| 133 | goto error_exit; | 134 | goto error_cancel; |
| 134 | XFS_BMAP_INIT(&flist, &firstblock); | 135 | XFS_BMAP_INIT(&flist, &firstblock); |
| 135 | /* | 136 | /* |
| 136 | * Allocate blocks to the bitmap file. | 137 | * Allocate blocks to the bitmap file. |
| @@ -143,14 +144,16 @@ xfs_growfs_rt_alloc( | |||
| 143 | if (!error && nmap < 1) | 144 | if (!error && nmap < 1) |
| 144 | error = XFS_ERROR(ENOSPC); | 145 | error = XFS_ERROR(ENOSPC); |
| 145 | if (error) | 146 | if (error) |
| 146 | goto error_exit; | 147 | goto error_cancel; |
| 147 | /* | 148 | /* |
| 148 | * Free any blocks freed up in the transaction, then commit. | 149 | * Free any blocks freed up in the transaction, then commit. |
| 149 | */ | 150 | */ |
| 150 | error = xfs_bmap_finish(&tp, &flist, &committed); | 151 | error = xfs_bmap_finish(&tp, &flist, &committed); |
| 151 | if (error) | 152 | if (error) |
| 152 | goto error_exit; | 153 | goto error_cancel; |
| 153 | xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 154 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
| 155 | if (error) | ||
| 156 | goto error; | ||
| 154 | /* | 157 | /* |
| 155 | * Now we need to clear the allocated blocks. | 158 | * Now we need to clear the allocated blocks. |
| 156 | * Do this one block per transaction, to keep it simple. | 159 | * Do this one block per transaction, to keep it simple. |
| @@ -165,13 +168,13 @@ xfs_growfs_rt_alloc( | |||
| 165 | */ | 168 | */ |
| 166 | if ((error = xfs_trans_reserve(tp, 0, | 169 | if ((error = xfs_trans_reserve(tp, 0, |
| 167 | XFS_GROWRTZERO_LOG_RES(mp), 0, 0, 0))) | 170 | XFS_GROWRTZERO_LOG_RES(mp), 0, 0, 0))) |
| 168 | goto error_exit; | 171 | goto error_cancel; |
| 169 | /* | 172 | /* |
| 170 | * Lock the bitmap inode. | 173 | * Lock the bitmap inode. |
| 171 | */ | 174 | */ |
| 172 | if ((error = xfs_trans_iget(mp, tp, ino, 0, | 175 | if ((error = xfs_trans_iget(mp, tp, ino, 0, |
| 173 | XFS_ILOCK_EXCL, &ip))) | 176 | XFS_ILOCK_EXCL, &ip))) |
| 174 | goto error_exit; | 177 | goto error_cancel; |
| 175 | /* | 178 | /* |
| 176 | * Get a buffer for the block. | 179 | * Get a buffer for the block. |
| 177 | */ | 180 | */ |
| @@ -180,14 +183,16 @@ xfs_growfs_rt_alloc( | |||
| 180 | mp->m_bsize, 0); | 183 | mp->m_bsize, 0); |
| 181 | if (bp == NULL) { | 184 | if (bp == NULL) { |
| 182 | error = XFS_ERROR(EIO); | 185 | error = XFS_ERROR(EIO); |
| 183 | goto error_exit; | 186 | goto error_cancel; |
| 184 | } | 187 | } |
| 185 | memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize); | 188 | memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize); |
| 186 | xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); | 189 | xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); |
| 187 | /* | 190 | /* |
| 188 | * Commit the transaction. | 191 | * Commit the transaction. |
| 189 | */ | 192 | */ |
| 190 | xfs_trans_commit(tp, 0); | 193 | error = xfs_trans_commit(tp, 0); |
| 194 | if (error) | ||
| 195 | goto error; | ||
| 191 | } | 196 | } |
| 192 | /* | 197 | /* |
| 193 | * Go on to the next extent, if any. | 198 | * Go on to the next extent, if any. |
| @@ -195,8 +200,9 @@ xfs_growfs_rt_alloc( | |||
| 195 | oblocks = map.br_startoff + map.br_blockcount; | 200 | oblocks = map.br_startoff + map.br_blockcount; |
| 196 | } | 201 | } |
| 197 | return 0; | 202 | return 0; |
| 198 | error_exit: | 203 | error_cancel: |
| 199 | xfs_trans_cancel(tp, cancelflags); | 204 | xfs_trans_cancel(tp, cancelflags); |
| 205 | error: | ||
| 200 | return error; | 206 | return error; |
| 201 | } | 207 | } |
| 202 | 208 | ||
| @@ -1875,6 +1881,7 @@ xfs_growfs_rt( | |||
| 1875 | xfs_trans_t *tp; /* transaction pointer */ | 1881 | xfs_trans_t *tp; /* transaction pointer */ |
| 1876 | 1882 | ||
| 1877 | sbp = &mp->m_sb; | 1883 | sbp = &mp->m_sb; |
| 1884 | cancelflags = 0; | ||
| 1878 | /* | 1885 | /* |
| 1879 | * Initial error checking. | 1886 | * Initial error checking. |
| 1880 | */ | 1887 | */ |
| @@ -2041,13 +2048,15 @@ xfs_growfs_rt( | |||
| 2041 | */ | 2048 | */ |
| 2042 | mp->m_rsumlevels = nrsumlevels; | 2049 | mp->m_rsumlevels = nrsumlevels; |
| 2043 | mp->m_rsumsize = nrsumsize; | 2050 | mp->m_rsumsize = nrsumsize; |
| 2044 | /* | 2051 | |
| 2045 | * Commit the transaction. | 2052 | error = xfs_trans_commit(tp, 0); |
| 2046 | */ | 2053 | if (error) { |
| 2047 | xfs_trans_commit(tp, 0); | 2054 | tp = NULL; |
| 2055 | break; | ||
| 2056 | } | ||
| 2048 | } | 2057 | } |
| 2049 | 2058 | ||
| 2050 | if (error) | 2059 | if (error && tp) |
| 2051 | xfs_trans_cancel(tp, cancelflags); | 2060 | xfs_trans_cancel(tp, cancelflags); |
| 2052 | 2061 | ||
| 2053 | /* | 2062 | /* |
| @@ -2278,7 +2287,7 @@ xfs_rtmount_inodes( | |||
| 2278 | ASSERT(sbp->sb_rsumino != NULLFSINO); | 2287 | ASSERT(sbp->sb_rsumino != NULLFSINO); |
| 2279 | error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0); | 2288 | error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0); |
| 2280 | if (error) { | 2289 | if (error) { |
| 2281 | VN_RELE(XFS_ITOV(mp->m_rbmip)); | 2290 | IRELE(mp->m_rbmip); |
| 2282 | return error; | 2291 | return error; |
| 2283 | } | 2292 | } |
| 2284 | ASSERT(mp->m_rsumip != NULL); | 2293 | ASSERT(mp->m_rsumip != NULL); |
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index cd3ece6cc918..b0f31c09a76d 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c | |||
| @@ -126,11 +126,11 @@ xfs_write_sync_logforce( | |||
| 126 | * when we return. | 126 | * when we return. |
| 127 | */ | 127 | */ |
| 128 | if (iip && iip->ili_last_lsn) { | 128 | if (iip && iip->ili_last_lsn) { |
| 129 | xfs_log_force(mp, iip->ili_last_lsn, | 129 | error = _xfs_log_force(mp, iip->ili_last_lsn, |
| 130 | XFS_LOG_FORCE | XFS_LOG_SYNC); | 130 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); |
| 131 | } else if (xfs_ipincount(ip) > 0) { | 131 | } else if (xfs_ipincount(ip) > 0) { |
| 132 | xfs_log_force(mp, (xfs_lsn_t)0, | 132 | error = _xfs_log_force(mp, (xfs_lsn_t)0, |
| 133 | XFS_LOG_FORCE | XFS_LOG_SYNC); | 133 | XFS_LOG_FORCE | XFS_LOG_SYNC, NULL); |
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | } else { | 136 | } else { |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 7f40628d85c7..0804207c7391 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
| @@ -113,13 +113,8 @@ struct xfs_mount; | |||
| 113 | struct xfs_trans; | 113 | struct xfs_trans; |
| 114 | struct xfs_dquot_acct; | 114 | struct xfs_dquot_acct; |
| 115 | 115 | ||
| 116 | typedef struct xfs_ail_entry { | ||
| 117 | struct xfs_log_item *ail_forw; /* AIL forw pointer */ | ||
| 118 | struct xfs_log_item *ail_back; /* AIL back pointer */ | ||
| 119 | } xfs_ail_entry_t; | ||
| 120 | |||
| 121 | typedef struct xfs_log_item { | 116 | typedef struct xfs_log_item { |
| 122 | xfs_ail_entry_t li_ail; /* AIL pointers */ | 117 | struct list_head li_ail; /* AIL pointers */ |
| 123 | xfs_lsn_t li_lsn; /* last on-disk lsn */ | 118 | xfs_lsn_t li_lsn; /* last on-disk lsn */ |
| 124 | struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ | 119 | struct xfs_log_item_desc *li_desc; /* ptr to current desc*/ |
| 125 | struct xfs_mount *li_mountp; /* ptr to fs mount */ | 120 | struct xfs_mount *li_mountp; /* ptr to fs mount */ |
| @@ -341,7 +336,6 @@ typedef struct xfs_trans { | |||
| 341 | unsigned int t_rtx_res; /* # of rt extents resvd */ | 336 | unsigned int t_rtx_res; /* # of rt extents resvd */ |
| 342 | unsigned int t_rtx_res_used; /* # of resvd rt extents used */ | 337 | unsigned int t_rtx_res_used; /* # of resvd rt extents used */ |
| 343 | xfs_log_ticket_t t_ticket; /* log mgr ticket */ | 338 | xfs_log_ticket_t t_ticket; /* log mgr ticket */ |
| 344 | sema_t t_sema; /* sema for commit completion */ | ||
| 345 | xfs_lsn_t t_lsn; /* log seq num of start of | 339 | xfs_lsn_t t_lsn; /* log seq num of start of |
| 346 | * transaction. */ | 340 | * transaction. */ |
| 347 | xfs_lsn_t t_commit_lsn; /* log seq num of end of | 341 | xfs_lsn_t t_commit_lsn; /* log seq num of end of |
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 76d470d8a1e6..1f77c00af566 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c | |||
| @@ -28,13 +28,13 @@ | |||
| 28 | #include "xfs_trans_priv.h" | 28 | #include "xfs_trans_priv.h" |
| 29 | #include "xfs_error.h" | 29 | #include "xfs_error.h" |
| 30 | 30 | ||
| 31 | STATIC void xfs_ail_insert(xfs_ail_entry_t *, xfs_log_item_t *); | 31 | STATIC void xfs_ail_insert(xfs_ail_t *, xfs_log_item_t *); |
| 32 | STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_entry_t *, xfs_log_item_t *); | 32 | STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_t *, xfs_log_item_t *); |
| 33 | STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_entry_t *); | 33 | STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_t *); |
| 34 | STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_entry_t *, xfs_log_item_t *); | 34 | STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_t *, xfs_log_item_t *); |
| 35 | 35 | ||
| 36 | #ifdef DEBUG | 36 | #ifdef DEBUG |
| 37 | STATIC void xfs_ail_check(xfs_ail_entry_t *, xfs_log_item_t *); | 37 | STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *); |
| 38 | #else | 38 | #else |
| 39 | #define xfs_ail_check(a,l) | 39 | #define xfs_ail_check(a,l) |
| 40 | #endif /* DEBUG */ | 40 | #endif /* DEBUG */ |
| @@ -57,7 +57,7 @@ xfs_trans_tail_ail( | |||
| 57 | xfs_log_item_t *lip; | 57 | xfs_log_item_t *lip; |
| 58 | 58 | ||
| 59 | spin_lock(&mp->m_ail_lock); | 59 | spin_lock(&mp->m_ail_lock); |
| 60 | lip = xfs_ail_min(&(mp->m_ail.xa_ail)); | 60 | lip = xfs_ail_min(&mp->m_ail); |
| 61 | if (lip == NULL) { | 61 | if (lip == NULL) { |
| 62 | lsn = (xfs_lsn_t)0; | 62 | lsn = (xfs_lsn_t)0; |
| 63 | } else { | 63 | } else { |
| @@ -91,7 +91,7 @@ xfs_trans_push_ail( | |||
| 91 | { | 91 | { |
| 92 | xfs_log_item_t *lip; | 92 | xfs_log_item_t *lip; |
| 93 | 93 | ||
| 94 | lip = xfs_ail_min(&mp->m_ail.xa_ail); | 94 | lip = xfs_ail_min(&mp->m_ail); |
| 95 | if (lip && !XFS_FORCED_SHUTDOWN(mp)) { | 95 | if (lip && !XFS_FORCED_SHUTDOWN(mp)) { |
| 96 | if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0) | 96 | if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0) |
| 97 | xfsaild_wakeup(mp, threshold_lsn); | 97 | xfsaild_wakeup(mp, threshold_lsn); |
| @@ -111,15 +111,17 @@ xfs_trans_first_push_ail( | |||
| 111 | { | 111 | { |
| 112 | xfs_log_item_t *lip; | 112 | xfs_log_item_t *lip; |
| 113 | 113 | ||
| 114 | lip = xfs_ail_min(&(mp->m_ail.xa_ail)); | 114 | lip = xfs_ail_min(&mp->m_ail); |
| 115 | *gen = (int)mp->m_ail.xa_gen; | 115 | *gen = (int)mp->m_ail.xa_gen; |
| 116 | if (lsn == 0) | 116 | if (lsn == 0) |
| 117 | return lip; | 117 | return lip; |
| 118 | 118 | ||
| 119 | while (lip && (XFS_LSN_CMP(lip->li_lsn, lsn) < 0)) | 119 | list_for_each_entry(lip, &mp->m_ail.xa_ail, li_ail) { |
| 120 | lip = lip->li_ail.ail_forw; | 120 | if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) |
| 121 | return lip; | ||
| 122 | } | ||
| 121 | 123 | ||
| 122 | return lip; | 124 | return NULL; |
| 123 | } | 125 | } |
| 124 | 126 | ||
| 125 | /* | 127 | /* |
| @@ -329,7 +331,7 @@ xfs_trans_unlocked_item( | |||
| 329 | * the call to xfs_log_move_tail() doesn't do anything if there's | 331 | * the call to xfs_log_move_tail() doesn't do anything if there's |
| 330 | * not enough free space to wake people up so we're safe calling it. | 332 | * not enough free space to wake people up so we're safe calling it. |
| 331 | */ | 333 | */ |
| 332 | min_lip = xfs_ail_min(&mp->m_ail.xa_ail); | 334 | min_lip = xfs_ail_min(&mp->m_ail); |
| 333 | 335 | ||
| 334 | if (min_lip == lip) | 336 | if (min_lip == lip) |
| 335 | xfs_log_move_tail(mp, 1); | 337 | xfs_log_move_tail(mp, 1); |
| @@ -357,15 +359,13 @@ xfs_trans_update_ail( | |||
| 357 | xfs_log_item_t *lip, | 359 | xfs_log_item_t *lip, |
| 358 | xfs_lsn_t lsn) __releases(mp->m_ail_lock) | 360 | xfs_lsn_t lsn) __releases(mp->m_ail_lock) |
| 359 | { | 361 | { |
| 360 | xfs_ail_entry_t *ailp; | ||
| 361 | xfs_log_item_t *dlip=NULL; | 362 | xfs_log_item_t *dlip=NULL; |
| 362 | xfs_log_item_t *mlip; /* ptr to minimum lip */ | 363 | xfs_log_item_t *mlip; /* ptr to minimum lip */ |
| 363 | 364 | ||
| 364 | ailp = &(mp->m_ail.xa_ail); | 365 | mlip = xfs_ail_min(&mp->m_ail); |
| 365 | mlip = xfs_ail_min(ailp); | ||
| 366 | 366 | ||
| 367 | if (lip->li_flags & XFS_LI_IN_AIL) { | 367 | if (lip->li_flags & XFS_LI_IN_AIL) { |
| 368 | dlip = xfs_ail_delete(ailp, lip); | 368 | dlip = xfs_ail_delete(&mp->m_ail, lip); |
| 369 | ASSERT(dlip == lip); | 369 | ASSERT(dlip == lip); |
| 370 | } else { | 370 | } else { |
| 371 | lip->li_flags |= XFS_LI_IN_AIL; | 371 | lip->li_flags |= XFS_LI_IN_AIL; |
| @@ -373,11 +373,11 @@ xfs_trans_update_ail( | |||
| 373 | 373 | ||
| 374 | lip->li_lsn = lsn; | 374 | lip->li_lsn = lsn; |
| 375 | 375 | ||
| 376 | xfs_ail_insert(ailp, lip); | 376 | xfs_ail_insert(&mp->m_ail, lip); |
| 377 | mp->m_ail.xa_gen++; | 377 | mp->m_ail.xa_gen++; |
| 378 | 378 | ||
| 379 | if (mlip == dlip) { | 379 | if (mlip == dlip) { |
| 380 | mlip = xfs_ail_min(&(mp->m_ail.xa_ail)); | 380 | mlip = xfs_ail_min(&mp->m_ail); |
| 381 | spin_unlock(&mp->m_ail_lock); | 381 | spin_unlock(&mp->m_ail_lock); |
| 382 | xfs_log_move_tail(mp, mlip->li_lsn); | 382 | xfs_log_move_tail(mp, mlip->li_lsn); |
| 383 | } else { | 383 | } else { |
| @@ -407,14 +407,12 @@ xfs_trans_delete_ail( | |||
| 407 | xfs_mount_t *mp, | 407 | xfs_mount_t *mp, |
| 408 | xfs_log_item_t *lip) __releases(mp->m_ail_lock) | 408 | xfs_log_item_t *lip) __releases(mp->m_ail_lock) |
| 409 | { | 409 | { |
| 410 | xfs_ail_entry_t *ailp; | ||
| 411 | xfs_log_item_t *dlip; | 410 | xfs_log_item_t *dlip; |
| 412 | xfs_log_item_t *mlip; | 411 | xfs_log_item_t *mlip; |
| 413 | 412 | ||
| 414 | if (lip->li_flags & XFS_LI_IN_AIL) { | 413 | if (lip->li_flags & XFS_LI_IN_AIL) { |
| 415 | ailp = &(mp->m_ail.xa_ail); | 414 | mlip = xfs_ail_min(&mp->m_ail); |
| 416 | mlip = xfs_ail_min(ailp); | 415 | dlip = xfs_ail_delete(&mp->m_ail, lip); |
| 417 | dlip = xfs_ail_delete(ailp, lip); | ||
| 418 | ASSERT(dlip == lip); | 416 | ASSERT(dlip == lip); |
| 419 | 417 | ||
| 420 | 418 | ||
| @@ -423,7 +421,7 @@ xfs_trans_delete_ail( | |||
| 423 | mp->m_ail.xa_gen++; | 421 | mp->m_ail.xa_gen++; |
| 424 | 422 | ||
| 425 | if (mlip == dlip) { | 423 | if (mlip == dlip) { |
| 426 | mlip = xfs_ail_min(&(mp->m_ail.xa_ail)); | 424 | mlip = xfs_ail_min(&mp->m_ail); |
| 427 | spin_unlock(&mp->m_ail_lock); | 425 | spin_unlock(&mp->m_ail_lock); |
| 428 | xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); | 426 | xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0)); |
| 429 | } else { | 427 | } else { |
| @@ -440,7 +438,7 @@ xfs_trans_delete_ail( | |||
| 440 | else { | 438 | else { |
| 441 | xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, | 439 | xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, |
| 442 | "%s: attempting to delete a log item that is not in the AIL", | 440 | "%s: attempting to delete a log item that is not in the AIL", |
| 443 | __FUNCTION__); | 441 | __func__); |
| 444 | spin_unlock(&mp->m_ail_lock); | 442 | spin_unlock(&mp->m_ail_lock); |
| 445 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 443 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
| 446 | } | 444 | } |
| @@ -461,7 +459,7 @@ xfs_trans_first_ail( | |||
| 461 | { | 459 | { |
| 462 | xfs_log_item_t *lip; | 460 | xfs_log_item_t *lip; |
| 463 | 461 | ||
| 464 | lip = xfs_ail_min(&(mp->m_ail.xa_ail)); | 462 | lip = xfs_ail_min(&mp->m_ail); |
| 465 | *gen = (int)mp->m_ail.xa_gen; | 463 | *gen = (int)mp->m_ail.xa_gen; |
| 466 | 464 | ||
| 467 | return lip; | 465 | return lip; |
| @@ -485,9 +483,9 @@ xfs_trans_next_ail( | |||
| 485 | 483 | ||
| 486 | ASSERT(mp && lip && gen); | 484 | ASSERT(mp && lip && gen); |
| 487 | if (mp->m_ail.xa_gen == *gen) { | 485 | if (mp->m_ail.xa_gen == *gen) { |
| 488 | nlip = xfs_ail_next(&(mp->m_ail.xa_ail), lip); | 486 | nlip = xfs_ail_next(&mp->m_ail, lip); |
| 489 | } else { | 487 | } else { |
| 490 | nlip = xfs_ail_min(&(mp->m_ail).xa_ail); | 488 | nlip = xfs_ail_min(&mp->m_ail); |
| 491 | *gen = (int)mp->m_ail.xa_gen; | 489 | *gen = (int)mp->m_ail.xa_gen; |
| 492 | if (restarts != NULL) { | 490 | if (restarts != NULL) { |
| 493 | XFS_STATS_INC(xs_push_ail_restarts); | 491 | XFS_STATS_INC(xs_push_ail_restarts); |
| @@ -517,8 +515,7 @@ int | |||
| 517 | xfs_trans_ail_init( | 515 | xfs_trans_ail_init( |
| 518 | xfs_mount_t *mp) | 516 | xfs_mount_t *mp) |
| 519 | { | 517 | { |
| 520 | mp->m_ail.xa_ail.ail_forw = (xfs_log_item_t*)&mp->m_ail.xa_ail; | 518 | INIT_LIST_HEAD(&mp->m_ail.xa_ail); |
| 521 | mp->m_ail.xa_ail.ail_back = (xfs_log_item_t*)&mp->m_ail.xa_ail; | ||
| 522 | return xfsaild_start(mp); | 519 | return xfsaild_start(mp); |
| 523 | } | 520 | } |
| 524 | 521 | ||
| @@ -537,7 +534,7 @@ xfs_trans_ail_destroy( | |||
| 537 | */ | 534 | */ |
| 538 | STATIC void | 535 | STATIC void |
| 539 | xfs_ail_insert( | 536 | xfs_ail_insert( |
| 540 | xfs_ail_entry_t *base, | 537 | xfs_ail_t *ailp, |
| 541 | xfs_log_item_t *lip) | 538 | xfs_log_item_t *lip) |
| 542 | /* ARGSUSED */ | 539 | /* ARGSUSED */ |
| 543 | { | 540 | { |
| @@ -546,27 +543,22 @@ xfs_ail_insert( | |||
| 546 | /* | 543 | /* |
| 547 | * If the list is empty, just insert the item. | 544 | * If the list is empty, just insert the item. |
| 548 | */ | 545 | */ |
| 549 | if (base->ail_back == (xfs_log_item_t*)base) { | 546 | if (list_empty(&ailp->xa_ail)) { |
| 550 | base->ail_forw = lip; | 547 | list_add(&lip->li_ail, &ailp->xa_ail); |
| 551 | base->ail_back = lip; | ||
| 552 | lip->li_ail.ail_forw = (xfs_log_item_t*)base; | ||
| 553 | lip->li_ail.ail_back = (xfs_log_item_t*)base; | ||
| 554 | return; | 548 | return; |
| 555 | } | 549 | } |
| 556 | 550 | ||
| 557 | next_lip = base->ail_back; | 551 | list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { |
| 558 | while ((next_lip != (xfs_log_item_t*)base) && | 552 | if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0) |
| 559 | (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) > 0)) { | 553 | break; |
| 560 | next_lip = next_lip->li_ail.ail_back; | ||
| 561 | } | 554 | } |
| 562 | ASSERT((next_lip == (xfs_log_item_t*)base) || | 555 | |
| 556 | ASSERT((&next_lip->li_ail == &ailp->xa_ail) || | ||
| 563 | (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); | 557 | (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)); |
| 564 | lip->li_ail.ail_forw = next_lip->li_ail.ail_forw; | ||
| 565 | lip->li_ail.ail_back = next_lip; | ||
| 566 | next_lip->li_ail.ail_forw = lip; | ||
| 567 | lip->li_ail.ail_forw->li_ail.ail_back = lip; | ||
| 568 | 558 | ||
| 569 | xfs_ail_check(base, lip); | 559 | list_add(&lip->li_ail, &next_lip->li_ail); |
| 560 | |||
| 561 | xfs_ail_check(ailp, lip); | ||
| 570 | return; | 562 | return; |
| 571 | } | 563 | } |
| 572 | 564 | ||
| @@ -576,15 +568,13 @@ xfs_ail_insert( | |||
| 576 | /*ARGSUSED*/ | 568 | /*ARGSUSED*/ |
| 577 | STATIC xfs_log_item_t * | 569 | STATIC xfs_log_item_t * |
| 578 | xfs_ail_delete( | 570 | xfs_ail_delete( |
| 579 | xfs_ail_entry_t *base, | 571 | xfs_ail_t *ailp, |
| 580 | xfs_log_item_t *lip) | 572 | xfs_log_item_t *lip) |
| 581 | /* ARGSUSED */ | 573 | /* ARGSUSED */ |
| 582 | { | 574 | { |
| 583 | xfs_ail_check(base, lip); | 575 | xfs_ail_check(ailp, lip); |
| 584 | lip->li_ail.ail_forw->li_ail.ail_back = lip->li_ail.ail_back; | 576 | |
| 585 | lip->li_ail.ail_back->li_ail.ail_forw = lip->li_ail.ail_forw; | 577 | list_del(&lip->li_ail); |
| 586 | lip->li_ail.ail_forw = NULL; | ||
| 587 | lip->li_ail.ail_back = NULL; | ||
| 588 | 578 | ||
| 589 | return lip; | 579 | return lip; |
| 590 | } | 580 | } |
| @@ -595,14 +585,13 @@ xfs_ail_delete( | |||
| 595 | */ | 585 | */ |
| 596 | STATIC xfs_log_item_t * | 586 | STATIC xfs_log_item_t * |
| 597 | xfs_ail_min( | 587 | xfs_ail_min( |
| 598 | xfs_ail_entry_t *base) | 588 | xfs_ail_t *ailp) |
| 599 | /* ARGSUSED */ | 589 | /* ARGSUSED */ |
| 600 | { | 590 | { |
| 601 | register xfs_log_item_t *forw = base->ail_forw; | 591 | if (list_empty(&ailp->xa_ail)) |
| 602 | if (forw == (xfs_log_item_t*)base) { | ||
| 603 | return NULL; | 592 | return NULL; |
| 604 | } | 593 | |
| 605 | return forw; | 594 | return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); |
| 606 | } | 595 | } |
| 607 | 596 | ||
| 608 | /* | 597 | /* |
| @@ -612,15 +601,14 @@ xfs_ail_min( | |||
| 612 | */ | 601 | */ |
| 613 | STATIC xfs_log_item_t * | 602 | STATIC xfs_log_item_t * |
| 614 | xfs_ail_next( | 603 | xfs_ail_next( |
| 615 | xfs_ail_entry_t *base, | 604 | xfs_ail_t *ailp, |
| 616 | xfs_log_item_t *lip) | 605 | xfs_log_item_t *lip) |
| 617 | /* ARGSUSED */ | 606 | /* ARGSUSED */ |
| 618 | { | 607 | { |
| 619 | if (lip->li_ail.ail_forw == (xfs_log_item_t*)base) { | 608 | if (lip->li_ail.next == &ailp->xa_ail) |
| 620 | return NULL; | 609 | return NULL; |
| 621 | } | ||
| 622 | return lip->li_ail.ail_forw; | ||
| 623 | 610 | ||
| 611 | return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); | ||
| 624 | } | 612 | } |
| 625 | 613 | ||
| 626 | #ifdef DEBUG | 614 | #ifdef DEBUG |
| @@ -629,57 +617,40 @@ xfs_ail_next( | |||
| 629 | */ | 617 | */ |
| 630 | STATIC void | 618 | STATIC void |
| 631 | xfs_ail_check( | 619 | xfs_ail_check( |
| 632 | xfs_ail_entry_t *base, | 620 | xfs_ail_t *ailp, |
| 633 | xfs_log_item_t *lip) | 621 | xfs_log_item_t *lip) |
| 634 | { | 622 | { |
| 635 | xfs_log_item_t *prev_lip; | 623 | xfs_log_item_t *prev_lip; |
| 636 | 624 | ||
| 637 | prev_lip = base->ail_forw; | 625 | if (list_empty(&ailp->xa_ail)) |
| 638 | if (prev_lip == (xfs_log_item_t*)base) { | ||
| 639 | /* | ||
| 640 | * Make sure the pointers are correct when the list | ||
| 641 | * is empty. | ||
| 642 | */ | ||
| 643 | ASSERT(base->ail_back == (xfs_log_item_t*)base); | ||
| 644 | return; | 626 | return; |
| 645 | } | ||
| 646 | 627 | ||
| 647 | /* | 628 | /* |
| 648 | * Check the next and previous entries are valid. | 629 | * Check the next and previous entries are valid. |
| 649 | */ | 630 | */ |
| 650 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | 631 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); |
| 651 | prev_lip = lip->li_ail.ail_back; | 632 | prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail); |
| 652 | if (prev_lip != (xfs_log_item_t*)base) { | 633 | if (&prev_lip->li_ail != &ailp->xa_ail) |
| 653 | ASSERT(prev_lip->li_ail.ail_forw == lip); | ||
| 654 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | 634 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); |
| 655 | } | 635 | |
| 656 | prev_lip = lip->li_ail.ail_forw; | 636 | prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail); |
| 657 | if (prev_lip != (xfs_log_item_t*)base) { | 637 | if (&prev_lip->li_ail != &ailp->xa_ail) |
| 658 | ASSERT(prev_lip->li_ail.ail_back == lip); | ||
| 659 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); | 638 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0); |
| 660 | } | ||
| 661 | 639 | ||
| 662 | 640 | ||
| 663 | #ifdef XFS_TRANS_DEBUG | 641 | #ifdef XFS_TRANS_DEBUG |
| 664 | /* | 642 | /* |
| 665 | * Walk the list checking forward and backward pointers, | 643 | * Walk the list checking lsn ordering, and that every entry has the |
| 666 | * lsn ordering, and that every entry has the XFS_LI_IN_AIL | 644 | * XFS_LI_IN_AIL flag set. This is really expensive, so only do it |
| 667 | * flag set. This is really expensive, so only do it when | 645 | * when specifically debugging the transaction subsystem. |
| 668 | * specifically debugging the transaction subsystem. | ||
| 669 | */ | 646 | */ |
| 670 | prev_lip = (xfs_log_item_t*)base; | 647 | prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail); |
| 671 | while (lip != (xfs_log_item_t*)base) { | 648 | list_for_each_entry(lip, &ailp->xa_ail, li_ail) { |
| 672 | if (prev_lip != (xfs_log_item_t*)base) { | 649 | if (&prev_lip->li_ail != &ailp->xa_ail) |
| 673 | ASSERT(prev_lip->li_ail.ail_forw == lip); | ||
| 674 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); | 650 | ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0); |
| 675 | } | ||
| 676 | ASSERT(lip->li_ail.ail_back == prev_lip); | ||
| 677 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); | 651 | ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0); |
| 678 | prev_lip = lip; | 652 | prev_lip = lip; |
| 679 | lip = lip->li_ail.ail_forw; | ||
| 680 | } | 653 | } |
| 681 | ASSERT(lip == (xfs_log_item_t*)base); | ||
| 682 | ASSERT(base->ail_back == prev_lip); | ||
| 683 | #endif /* XFS_TRANS_DEBUG */ | 654 | #endif /* XFS_TRANS_DEBUG */ |
| 684 | } | 655 | } |
| 685 | #endif /* DEBUG */ | 656 | #endif /* DEBUG */ |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 60b6b898022b..cb0c5839154b 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
| @@ -304,7 +304,8 @@ xfs_trans_read_buf( | |||
| 304 | if (tp == NULL) { | 304 | if (tp == NULL) { |
| 305 | bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); | 305 | bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); |
| 306 | if (!bp) | 306 | if (!bp) |
| 307 | return XFS_ERROR(ENOMEM); | 307 | return (flags & XFS_BUF_TRYLOCK) ? |
| 308 | EAGAIN : XFS_ERROR(ENOMEM); | ||
| 308 | 309 | ||
| 309 | if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { | 310 | if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { |
| 310 | xfs_ioerror_alert("xfs_trans_read_buf", mp, | 311 | xfs_ioerror_alert("xfs_trans_read_buf", mp, |
| @@ -353,17 +354,15 @@ xfs_trans_read_buf( | |||
| 353 | ASSERT(!XFS_BUF_ISASYNC(bp)); | 354 | ASSERT(!XFS_BUF_ISASYNC(bp)); |
| 354 | XFS_BUF_READ(bp); | 355 | XFS_BUF_READ(bp); |
| 355 | xfsbdstrat(tp->t_mountp, bp); | 356 | xfsbdstrat(tp->t_mountp, bp); |
| 356 | xfs_iowait(bp); | 357 | error = xfs_iowait(bp); |
| 357 | if (XFS_BUF_GETERROR(bp) != 0) { | 358 | if (error) { |
| 358 | xfs_ioerror_alert("xfs_trans_read_buf", mp, | 359 | xfs_ioerror_alert("xfs_trans_read_buf", mp, |
| 359 | bp, blkno); | 360 | bp, blkno); |
| 360 | error = XFS_BUF_GETERROR(bp); | ||
| 361 | xfs_buf_relse(bp); | 361 | xfs_buf_relse(bp); |
| 362 | /* | 362 | /* |
| 363 | * We can gracefully recover from most | 363 | * We can gracefully recover from most read |
| 364 | * read errors. Ones we can't are those | 364 | * errors. Ones we can't are those that happen |
| 365 | * that happen after the transaction's | 365 | * after the transaction's already dirty. |
| 366 | * already dirty. | ||
| 367 | */ | 366 | */ |
| 368 | if (tp->t_flags & XFS_TRANS_DIRTY) | 367 | if (tp->t_flags & XFS_TRANS_DIRTY) |
| 369 | xfs_force_shutdown(tp->t_mountp, | 368 | xfs_force_shutdown(tp->t_mountp, |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 5c89be475464..0f5191644ab2 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
| @@ -160,4 +160,9 @@ typedef enum { | |||
| 160 | XFS_BTNUM_MAX | 160 | XFS_BTNUM_MAX |
| 161 | } xfs_btnum_t; | 161 | } xfs_btnum_t; |
| 162 | 162 | ||
| 163 | struct xfs_name { | ||
| 164 | const char *name; | ||
| 165 | int len; | ||
| 166 | }; | ||
| 167 | |||
| 163 | #endif /* __XFS_TYPES_H__ */ | 168 | #endif /* __XFS_TYPES_H__ */ |
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 18a85e746680..2b8dc7e40772 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c | |||
| @@ -40,34 +40,12 @@ | |||
| 40 | #include "xfs_itable.h" | 40 | #include "xfs_itable.h" |
| 41 | #include "xfs_utils.h" | 41 | #include "xfs_utils.h" |
| 42 | 42 | ||
| 43 | /* | ||
| 44 | * xfs_get_dir_entry is used to get a reference to an inode given | ||
| 45 | * its parent directory inode and the name of the file. It does | ||
| 46 | * not lock the child inode, and it unlocks the directory before | ||
| 47 | * returning. The directory's generation number is returned for | ||
| 48 | * use by a later call to xfs_lock_dir_and_entry. | ||
| 49 | */ | ||
| 50 | int | ||
| 51 | xfs_get_dir_entry( | ||
| 52 | bhv_vname_t *dentry, | ||
| 53 | xfs_inode_t **ipp) | ||
| 54 | { | ||
| 55 | bhv_vnode_t *vp; | ||
| 56 | |||
| 57 | vp = VNAME_TO_VNODE(dentry); | ||
| 58 | |||
| 59 | *ipp = xfs_vtoi(vp); | ||
| 60 | if (!*ipp) | ||
| 61 | return XFS_ERROR(ENOENT); | ||
| 62 | VN_HOLD(vp); | ||
| 63 | return 0; | ||
| 64 | } | ||
| 65 | 43 | ||
| 66 | int | 44 | int |
| 67 | xfs_dir_lookup_int( | 45 | xfs_dir_lookup_int( |
| 68 | xfs_inode_t *dp, | 46 | xfs_inode_t *dp, |
| 69 | uint lock_mode, | 47 | uint lock_mode, |
| 70 | bhv_vname_t *dentry, | 48 | struct xfs_name *name, |
| 71 | xfs_ino_t *inum, | 49 | xfs_ino_t *inum, |
| 72 | xfs_inode_t **ipp) | 50 | xfs_inode_t **ipp) |
| 73 | { | 51 | { |
| @@ -75,7 +53,7 @@ xfs_dir_lookup_int( | |||
| 75 | 53 | ||
| 76 | xfs_itrace_entry(dp); | 54 | xfs_itrace_entry(dp); |
| 77 | 55 | ||
| 78 | error = xfs_dir_lookup(NULL, dp, VNAME(dentry), VNAMELEN(dentry), inum); | 56 | error = xfs_dir_lookup(NULL, dp, name, inum); |
| 79 | if (!error) { | 57 | if (!error) { |
| 80 | /* | 58 | /* |
| 81 | * Unlock the directory. We do this because we can't | 59 | * Unlock the directory. We do this because we can't |
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index f857fcccb723..175b126d2cab 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h | |||
| @@ -21,15 +21,14 @@ | |||
| 21 | #define IRELE(ip) VN_RELE(XFS_ITOV(ip)) | 21 | #define IRELE(ip) VN_RELE(XFS_ITOV(ip)) |
| 22 | #define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) | 22 | #define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) |
| 23 | 23 | ||
| 24 | extern int xfs_get_dir_entry (bhv_vname_t *, xfs_inode_t **); | 24 | extern int xfs_dir_lookup_int(xfs_inode_t *, uint, struct xfs_name *, |
| 25 | extern int xfs_dir_lookup_int (xfs_inode_t *, uint, bhv_vname_t *, xfs_ino_t *, | 25 | xfs_ino_t *, xfs_inode_t **); |
| 26 | xfs_inode_t **); | 26 | extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); |
| 27 | extern int xfs_truncate_file (xfs_mount_t *, xfs_inode_t *); | 27 | extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, |
| 28 | extern int xfs_dir_ialloc (xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, | ||
| 29 | xfs_dev_t, cred_t *, prid_t, int, | 28 | xfs_dev_t, cred_t *, prid_t, int, |
| 30 | xfs_inode_t **, int *); | 29 | xfs_inode_t **, int *); |
| 31 | extern int xfs_droplink (xfs_trans_t *, xfs_inode_t *); | 30 | extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *); |
| 32 | extern int xfs_bumplink (xfs_trans_t *, xfs_inode_t *); | 31 | extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *); |
| 33 | extern void xfs_bump_ino_vers2 (xfs_trans_t *, xfs_inode_t *); | 32 | extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *); |
| 34 | 33 | ||
| 35 | #endif /* __XFS_UTILS_H__ */ | 34 | #endif /* __XFS_UTILS_H__ */ |
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 7094caff13cf..fc48158fe479 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c | |||
| @@ -43,7 +43,6 @@ | |||
| 43 | #include "xfs_error.h" | 43 | #include "xfs_error.h" |
| 44 | #include "xfs_bmap.h" | 44 | #include "xfs_bmap.h" |
| 45 | #include "xfs_rw.h" | 45 | #include "xfs_rw.h" |
| 46 | #include "xfs_refcache.h" | ||
| 47 | #include "xfs_buf_item.h" | 46 | #include "xfs_buf_item.h" |
| 48 | #include "xfs_log_priv.h" | 47 | #include "xfs_log_priv.h" |
| 49 | #include "xfs_dir2_trace.h" | 48 | #include "xfs_dir2_trace.h" |
| @@ -56,6 +55,7 @@ | |||
| 56 | #include "xfs_fsops.h" | 55 | #include "xfs_fsops.h" |
| 57 | #include "xfs_vnodeops.h" | 56 | #include "xfs_vnodeops.h" |
| 58 | #include "xfs_vfsops.h" | 57 | #include "xfs_vfsops.h" |
| 58 | #include "xfs_utils.h" | ||
| 59 | 59 | ||
| 60 | 60 | ||
| 61 | int __init | 61 | int __init |
| @@ -69,15 +69,17 @@ xfs_init(void) | |||
| 69 | /* | 69 | /* |
| 70 | * Initialize all of the zone allocators we use. | 70 | * Initialize all of the zone allocators we use. |
| 71 | */ | 71 | */ |
| 72 | xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), | ||
| 73 | "xfs_log_ticket"); | ||
| 72 | xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), | 74 | xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), |
| 73 | "xfs_bmap_free_item"); | 75 | "xfs_bmap_free_item"); |
| 74 | xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), | 76 | xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), |
| 75 | "xfs_btree_cur"); | 77 | "xfs_btree_cur"); |
| 76 | xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); | 78 | xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t), |
| 77 | xfs_da_state_zone = | 79 | "xfs_da_state"); |
| 78 | kmem_zone_init(sizeof(xfs_da_state_t), "xfs_da_state"); | ||
| 79 | xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); | 80 | xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); |
| 80 | xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); | 81 | xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); |
| 82 | xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); | ||
| 81 | xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); | 83 | xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); |
| 82 | xfs_mru_cache_init(); | 84 | xfs_mru_cache_init(); |
| 83 | xfs_filestream_init(); | 85 | xfs_filestream_init(); |
| @@ -113,9 +115,6 @@ xfs_init(void) | |||
| 113 | xfs_ili_zone = | 115 | xfs_ili_zone = |
| 114 | kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", | 116 | kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", |
| 115 | KM_ZONE_SPREAD, NULL); | 117 | KM_ZONE_SPREAD, NULL); |
| 116 | xfs_icluster_zone = | ||
| 117 | kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster", | ||
| 118 | KM_ZONE_SPREAD, NULL); | ||
| 119 | 118 | ||
| 120 | /* | 119 | /* |
| 121 | * Allocate global trace buffers. | 120 | * Allocate global trace buffers. |
| @@ -153,11 +152,9 @@ xfs_cleanup(void) | |||
| 153 | extern kmem_zone_t *xfs_inode_zone; | 152 | extern kmem_zone_t *xfs_inode_zone; |
| 154 | extern kmem_zone_t *xfs_efd_zone; | 153 | extern kmem_zone_t *xfs_efd_zone; |
| 155 | extern kmem_zone_t *xfs_efi_zone; | 154 | extern kmem_zone_t *xfs_efi_zone; |
| 156 | extern kmem_zone_t *xfs_icluster_zone; | ||
| 157 | 155 | ||
| 158 | xfs_cleanup_procfs(); | 156 | xfs_cleanup_procfs(); |
| 159 | xfs_sysctl_unregister(); | 157 | xfs_sysctl_unregister(); |
| 160 | xfs_refcache_destroy(); | ||
| 161 | xfs_filestream_uninit(); | 158 | xfs_filestream_uninit(); |
| 162 | xfs_mru_cache_uninit(); | 159 | xfs_mru_cache_uninit(); |
| 163 | xfs_acl_zone_destroy(xfs_acl_zone); | 160 | xfs_acl_zone_destroy(xfs_acl_zone); |
| @@ -189,7 +186,6 @@ xfs_cleanup(void) | |||
| 189 | kmem_zone_destroy(xfs_efi_zone); | 186 | kmem_zone_destroy(xfs_efi_zone); |
| 190 | kmem_zone_destroy(xfs_ifork_zone); | 187 | kmem_zone_destroy(xfs_ifork_zone); |
| 191 | kmem_zone_destroy(xfs_ili_zone); | 188 | kmem_zone_destroy(xfs_ili_zone); |
| 192 | kmem_zone_destroy(xfs_icluster_zone); | ||
| 193 | } | 189 | } |
| 194 | 190 | ||
| 195 | /* | 191 | /* |
| @@ -573,7 +569,7 @@ xfs_unmount( | |||
| 573 | #ifdef HAVE_DMAPI | 569 | #ifdef HAVE_DMAPI |
| 574 | if (mp->m_flags & XFS_MOUNT_DMAPI) { | 570 | if (mp->m_flags & XFS_MOUNT_DMAPI) { |
| 575 | error = XFS_SEND_PREUNMOUNT(mp, | 571 | error = XFS_SEND_PREUNMOUNT(mp, |
| 576 | rvp, DM_RIGHT_NULL, rvp, DM_RIGHT_NULL, | 572 | rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL, |
| 577 | NULL, NULL, 0, 0, | 573 | NULL, NULL, 0, 0, |
| 578 | (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))? | 574 | (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))? |
| 579 | 0:DM_FLAGS_UNWANTED); | 575 | 0:DM_FLAGS_UNWANTED); |
| @@ -584,11 +580,6 @@ xfs_unmount( | |||
| 584 | 0 : DM_FLAGS_UNWANTED; | 580 | 0 : DM_FLAGS_UNWANTED; |
| 585 | } | 581 | } |
| 586 | #endif | 582 | #endif |
| 587 | /* | ||
| 588 | * First blow any referenced inode from this file system | ||
| 589 | * out of the reference cache, and delete the timer. | ||
| 590 | */ | ||
| 591 | xfs_refcache_purge_mp(mp); | ||
| 592 | 583 | ||
| 593 | /* | 584 | /* |
| 594 | * Blow away any referenced inode in the filestreams cache. | 585 | * Blow away any referenced inode in the filestreams cache. |
| @@ -607,7 +598,7 @@ xfs_unmount( | |||
| 607 | /* | 598 | /* |
| 608 | * Drop the reference count | 599 | * Drop the reference count |
| 609 | */ | 600 | */ |
| 610 | VN_RELE(rvp); | 601 | IRELE(rip); |
| 611 | 602 | ||
| 612 | /* | 603 | /* |
| 613 | * If we're forcing a shutdown, typically because of a media error, | 604 | * If we're forcing a shutdown, typically because of a media error, |
| @@ -629,7 +620,7 @@ out: | |||
| 629 | /* Note: mp structure must still exist for | 620 | /* Note: mp structure must still exist for |
| 630 | * XFS_SEND_UNMOUNT() call. | 621 | * XFS_SEND_UNMOUNT() call. |
| 631 | */ | 622 | */ |
| 632 | XFS_SEND_UNMOUNT(mp, error == 0 ? rvp : NULL, | 623 | XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL, |
| 633 | DM_RIGHT_NULL, 0, error, unmount_event_flags); | 624 | DM_RIGHT_NULL, 0, error, unmount_event_flags); |
| 634 | } | 625 | } |
| 635 | if (xfs_unmountfs_needed) { | 626 | if (xfs_unmountfs_needed) { |
| @@ -646,13 +637,12 @@ out: | |||
| 646 | return XFS_ERROR(error); | 637 | return XFS_ERROR(error); |
| 647 | } | 638 | } |
| 648 | 639 | ||
| 649 | STATIC int | 640 | STATIC void |
| 650 | xfs_quiesce_fs( | 641 | xfs_quiesce_fs( |
| 651 | xfs_mount_t *mp) | 642 | xfs_mount_t *mp) |
| 652 | { | 643 | { |
| 653 | int count = 0, pincount; | 644 | int count = 0, pincount; |
| 654 | 645 | ||
| 655 | xfs_refcache_purge_mp(mp); | ||
| 656 | xfs_flush_buftarg(mp->m_ddev_targp, 0); | 646 | xfs_flush_buftarg(mp->m_ddev_targp, 0); |
| 657 | xfs_finish_reclaim_all(mp, 0); | 647 | xfs_finish_reclaim_all(mp, 0); |
| 658 | 648 | ||
| @@ -671,8 +661,6 @@ xfs_quiesce_fs( | |||
| 671 | count++; | 661 | count++; |
| 672 | } | 662 | } |
| 673 | } while (count < 2); | 663 | } while (count < 2); |
| 674 | |||
| 675 | return 0; | ||
| 676 | } | 664 | } |
| 677 | 665 | ||
| 678 | /* | 666 | /* |
| @@ -684,6 +672,8 @@ void | |||
| 684 | xfs_attr_quiesce( | 672 | xfs_attr_quiesce( |
| 685 | xfs_mount_t *mp) | 673 | xfs_mount_t *mp) |
| 686 | { | 674 | { |
| 675 | int error = 0; | ||
| 676 | |||
| 687 | /* wait for all modifications to complete */ | 677 | /* wait for all modifications to complete */ |
| 688 | while (atomic_read(&mp->m_active_trans) > 0) | 678 | while (atomic_read(&mp->m_active_trans) > 0) |
| 689 | delay(100); | 679 | delay(100); |
| @@ -694,7 +684,11 @@ xfs_attr_quiesce( | |||
| 694 | ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); | 684 | ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); |
| 695 | 685 | ||
| 696 | /* Push the superblock and write an unmount record */ | 686 | /* Push the superblock and write an unmount record */ |
| 697 | xfs_log_sbcount(mp, 1); | 687 | error = xfs_log_sbcount(mp, 1); |
| 688 | if (error) | ||
| 689 | xfs_fs_cmn_err(CE_WARN, mp, | ||
| 690 | "xfs_attr_quiesce: failed to log sb changes. " | ||
| 691 | "Frozen image may not be consistent."); | ||
| 698 | xfs_log_unmount_write(mp); | 692 | xfs_log_unmount_write(mp); |
| 699 | xfs_unmountfs_writesb(mp); | 693 | xfs_unmountfs_writesb(mp); |
| 700 | } | 694 | } |
| @@ -790,8 +784,8 @@ xfs_unmount_flush( | |||
| 790 | goto fscorrupt_out2; | 784 | goto fscorrupt_out2; |
| 791 | 785 | ||
| 792 | if (rbmip) { | 786 | if (rbmip) { |
| 793 | VN_RELE(XFS_ITOV(rbmip)); | 787 | IRELE(rbmip); |
| 794 | VN_RELE(XFS_ITOV(rsumip)); | 788 | IRELE(rsumip); |
| 795 | } | 789 | } |
| 796 | 790 | ||
| 797 | xfs_iunlock(rip, XFS_ILOCK_EXCL); | 791 | xfs_iunlock(rip, XFS_ILOCK_EXCL); |
| @@ -1169,10 +1163,10 @@ xfs_sync_inodes( | |||
| 1169 | * above, then wait until after we've unlocked | 1163 | * above, then wait until after we've unlocked |
| 1170 | * the inode to release the reference. This is | 1164 | * the inode to release the reference. This is |
| 1171 | * because we can be already holding the inode | 1165 | * because we can be already holding the inode |
| 1172 | * lock when VN_RELE() calls xfs_inactive(). | 1166 | * lock when IRELE() calls xfs_inactive(). |
| 1173 | * | 1167 | * |
| 1174 | * Make sure to drop the mount lock before calling | 1168 | * Make sure to drop the mount lock before calling |
| 1175 | * VN_RELE() so that we don't trip over ourselves if | 1169 | * IRELE() so that we don't trip over ourselves if |
| 1176 | * we have to go for the mount lock again in the | 1170 | * we have to go for the mount lock again in the |
| 1177 | * inactive code. | 1171 | * inactive code. |
| 1178 | */ | 1172 | */ |
| @@ -1180,7 +1174,7 @@ xfs_sync_inodes( | |||
| 1180 | IPOINTER_INSERT(ip, mp); | 1174 | IPOINTER_INSERT(ip, mp); |
| 1181 | } | 1175 | } |
| 1182 | 1176 | ||
| 1183 | VN_RELE(vp); | 1177 | IRELE(ip); |
| 1184 | 1178 | ||
| 1185 | vnode_refed = B_FALSE; | 1179 | vnode_refed = B_FALSE; |
| 1186 | } | 1180 | } |
| @@ -1323,30 +1317,8 @@ xfs_syncsub( | |||
| 1323 | } | 1317 | } |
| 1324 | 1318 | ||
| 1325 | /* | 1319 | /* |
| 1326 | * If this is the periodic sync, then kick some entries out of | ||
| 1327 | * the reference cache. This ensures that idle entries are | ||
| 1328 | * eventually kicked out of the cache. | ||
| 1329 | */ | ||
| 1330 | if (flags & SYNC_REFCACHE) { | ||
| 1331 | if (flags & SYNC_WAIT) | ||
| 1332 | xfs_refcache_purge_mp(mp); | ||
| 1333 | else | ||
| 1334 | xfs_refcache_purge_some(mp); | ||
| 1335 | } | ||
| 1336 | |||
| 1337 | /* | ||
| 1338 | * If asked, update the disk superblock with incore counter values if we | ||
| 1339 | * are using non-persistent counters so that they don't get too far out | ||
| 1340 | * of sync if we crash or get a forced shutdown. We don't want to force | ||
| 1341 | * this to disk, just get a transaction into the iclogs.... | ||
| 1342 | */ | ||
| 1343 | if (flags & SYNC_SUPER) | ||
| 1344 | xfs_log_sbcount(mp, 0); | ||
| 1345 | |||
| 1346 | /* | ||
| 1347 | * Now check to see if the log needs a "dummy" transaction. | 1320 | * Now check to see if the log needs a "dummy" transaction. |
| 1348 | */ | 1321 | */ |
| 1349 | |||
| 1350 | if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { | 1322 | if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { |
| 1351 | xfs_trans_t *tp; | 1323 | xfs_trans_t *tp; |
| 1352 | xfs_inode_t *ip; | 1324 | xfs_inode_t *ip; |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 64c5953feca4..6650601c64f7 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
| @@ -48,7 +48,6 @@ | |||
| 48 | #include "xfs_quota.h" | 48 | #include "xfs_quota.h" |
| 49 | #include "xfs_utils.h" | 49 | #include "xfs_utils.h" |
| 50 | #include "xfs_rtalloc.h" | 50 | #include "xfs_rtalloc.h" |
| 51 | #include "xfs_refcache.h" | ||
| 52 | #include "xfs_trans_space.h" | 51 | #include "xfs_trans_space.h" |
| 53 | #include "xfs_log_priv.h" | 52 | #include "xfs_log_priv.h" |
| 54 | #include "xfs_filestream.h" | 53 | #include "xfs_filestream.h" |
| @@ -327,7 +326,7 @@ xfs_setattr( | |||
| 327 | if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && | 326 | if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && |
| 328 | !(flags & ATTR_DMI)) { | 327 | !(flags & ATTR_DMI)) { |
| 329 | int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; | 328 | int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; |
| 330 | code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp, | 329 | code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip, |
| 331 | vap->va_size, 0, dmflags, NULL); | 330 | vap->va_size, 0, dmflags, NULL); |
| 332 | if (code) { | 331 | if (code) { |
| 333 | lock_flags = 0; | 332 | lock_flags = 0; |
| @@ -634,6 +633,15 @@ xfs_setattr( | |||
| 634 | * Truncate file. Must have write permission and not be a directory. | 633 | * Truncate file. Must have write permission and not be a directory. |
| 635 | */ | 634 | */ |
| 636 | if (mask & XFS_AT_SIZE) { | 635 | if (mask & XFS_AT_SIZE) { |
| 636 | /* | ||
| 637 | * Only change the c/mtime if we are changing the size | ||
| 638 | * or we are explicitly asked to change it. This handles | ||
| 639 | * the semantic difference between truncate() and ftruncate() | ||
| 640 | * as implemented in the VFS. | ||
| 641 | */ | ||
| 642 | if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME)) | ||
| 643 | timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; | ||
| 644 | |||
| 637 | if (vap->va_size > ip->i_size) { | 645 | if (vap->va_size > ip->i_size) { |
| 638 | xfs_igrow_finish(tp, ip, vap->va_size, | 646 | xfs_igrow_finish(tp, ip, vap->va_size, |
| 639 | !(flags & ATTR_DMI)); | 647 | !(flags & ATTR_DMI)); |
| @@ -662,10 +670,6 @@ xfs_setattr( | |||
| 662 | */ | 670 | */ |
| 663 | xfs_iflags_set(ip, XFS_ITRUNCATED); | 671 | xfs_iflags_set(ip, XFS_ITRUNCATED); |
| 664 | } | 672 | } |
| 665 | /* | ||
| 666 | * Have to do this even if the file's size doesn't change. | ||
| 667 | */ | ||
| 668 | timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; | ||
| 669 | } | 673 | } |
| 670 | 674 | ||
| 671 | /* | 675 | /* |
| @@ -877,7 +881,7 @@ xfs_setattr( | |||
| 877 | 881 | ||
| 878 | if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && | 882 | if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && |
| 879 | !(flags & ATTR_DMI)) { | 883 | !(flags & ATTR_DMI)) { |
| 880 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL, | 884 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL, |
| 881 | NULL, DM_RIGHT_NULL, NULL, NULL, | 885 | NULL, DM_RIGHT_NULL, NULL, NULL, |
| 882 | 0, 0, AT_DELAY_FLAG(flags)); | 886 | 0, 0, AT_DELAY_FLAG(flags)); |
| 883 | } | 887 | } |
| @@ -1443,28 +1447,22 @@ xfs_inactive_attrs( | |||
| 1443 | tp = *tpp; | 1447 | tp = *tpp; |
| 1444 | mp = ip->i_mount; | 1448 | mp = ip->i_mount; |
| 1445 | ASSERT(ip->i_d.di_forkoff != 0); | 1449 | ASSERT(ip->i_d.di_forkoff != 0); |
| 1446 | xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 1450 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
| 1447 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1451 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
| 1452 | if (error) | ||
| 1453 | goto error_unlock; | ||
| 1448 | 1454 | ||
| 1449 | error = xfs_attr_inactive(ip); | 1455 | error = xfs_attr_inactive(ip); |
| 1450 | if (error) { | 1456 | if (error) |
| 1451 | *tpp = NULL; | 1457 | goto error_unlock; |
| 1452 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
| 1453 | return error; /* goto out */ | ||
| 1454 | } | ||
| 1455 | 1458 | ||
| 1456 | tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); | 1459 | tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); |
| 1457 | error = xfs_trans_reserve(tp, 0, | 1460 | error = xfs_trans_reserve(tp, 0, |
| 1458 | XFS_IFREE_LOG_RES(mp), | 1461 | XFS_IFREE_LOG_RES(mp), |
| 1459 | 0, XFS_TRANS_PERM_LOG_RES, | 1462 | 0, XFS_TRANS_PERM_LOG_RES, |
| 1460 | XFS_INACTIVE_LOG_COUNT); | 1463 | XFS_INACTIVE_LOG_COUNT); |
| 1461 | if (error) { | 1464 | if (error) |
| 1462 | ASSERT(XFS_FORCED_SHUTDOWN(mp)); | 1465 | goto error_cancel; |
| 1463 | xfs_trans_cancel(tp, 0); | ||
| 1464 | *tpp = NULL; | ||
| 1465 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
| 1466 | return error; | ||
| 1467 | } | ||
| 1468 | 1466 | ||
| 1469 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 1467 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
| 1470 | xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 1468 | xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); |
| @@ -1475,6 +1473,14 @@ xfs_inactive_attrs( | |||
| 1475 | 1473 | ||
| 1476 | *tpp = tp; | 1474 | *tpp = tp; |
| 1477 | return 0; | 1475 | return 0; |
| 1476 | |||
| 1477 | error_cancel: | ||
| 1478 | ASSERT(XFS_FORCED_SHUTDOWN(mp)); | ||
| 1479 | xfs_trans_cancel(tp, 0); | ||
| 1480 | error_unlock: | ||
| 1481 | *tpp = NULL; | ||
| 1482 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | ||
| 1483 | return error; | ||
| 1478 | } | 1484 | } |
| 1479 | 1485 | ||
| 1480 | int | 1486 | int |
| @@ -1520,12 +1526,6 @@ xfs_release( | |||
| 1520 | xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); | 1526 | xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); |
| 1521 | } | 1527 | } |
| 1522 | 1528 | ||
| 1523 | #ifdef HAVE_REFCACHE | ||
| 1524 | /* If we are in the NFS reference cache then don't do this now */ | ||
| 1525 | if (ip->i_refcache) | ||
| 1526 | return 0; | ||
| 1527 | #endif | ||
| 1528 | |||
| 1529 | if (ip->i_d.di_nlink != 0) { | 1529 | if (ip->i_d.di_nlink != 0) { |
| 1530 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && | 1530 | if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && |
| 1531 | ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || | 1531 | ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || |
| @@ -1588,9 +1588,8 @@ xfs_inactive( | |||
| 1588 | 1588 | ||
| 1589 | mp = ip->i_mount; | 1589 | mp = ip->i_mount; |
| 1590 | 1590 | ||
| 1591 | if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) { | 1591 | if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) |
| 1592 | (void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL); | 1592 | XFS_SEND_DESTROY(mp, ip, DM_RIGHT_NULL); |
| 1593 | } | ||
| 1594 | 1593 | ||
| 1595 | error = 0; | 1594 | error = 0; |
| 1596 | 1595 | ||
| @@ -1744,11 +1743,18 @@ xfs_inactive( | |||
| 1744 | XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); | 1743 | XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1); |
| 1745 | 1744 | ||
| 1746 | /* | 1745 | /* |
| 1747 | * Just ignore errors at this point. There is | 1746 | * Just ignore errors at this point. There is nothing we can |
| 1748 | * nothing we can do except to try to keep going. | 1747 | * do except to try to keep going. Make sure it's not a silent |
| 1748 | * error. | ||
| 1749 | */ | 1749 | */ |
| 1750 | (void) xfs_bmap_finish(&tp, &free_list, &committed); | 1750 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
| 1751 | (void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 1751 | if (error) |
| 1752 | xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " | ||
| 1753 | "xfs_bmap_finish() returned error %d", error); | ||
| 1754 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
| 1755 | if (error) | ||
| 1756 | xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " | ||
| 1757 | "xfs_trans_commit() returned error %d", error); | ||
| 1752 | } | 1758 | } |
| 1753 | /* | 1759 | /* |
| 1754 | * Release the dquots held by inode, if any. | 1760 | * Release the dquots held by inode, if any. |
| @@ -1765,8 +1771,8 @@ xfs_inactive( | |||
| 1765 | int | 1771 | int |
| 1766 | xfs_lookup( | 1772 | xfs_lookup( |
| 1767 | xfs_inode_t *dp, | 1773 | xfs_inode_t *dp, |
| 1768 | bhv_vname_t *dentry, | 1774 | struct xfs_name *name, |
| 1769 | bhv_vnode_t **vpp) | 1775 | xfs_inode_t **ipp) |
| 1770 | { | 1776 | { |
| 1771 | xfs_inode_t *ip; | 1777 | xfs_inode_t *ip; |
| 1772 | xfs_ino_t e_inum; | 1778 | xfs_ino_t e_inum; |
| @@ -1779,9 +1785,9 @@ xfs_lookup( | |||
| 1779 | return XFS_ERROR(EIO); | 1785 | return XFS_ERROR(EIO); |
| 1780 | 1786 | ||
| 1781 | lock_mode = xfs_ilock_map_shared(dp); | 1787 | lock_mode = xfs_ilock_map_shared(dp); |
| 1782 | error = xfs_dir_lookup_int(dp, lock_mode, dentry, &e_inum, &ip); | 1788 | error = xfs_dir_lookup_int(dp, lock_mode, name, &e_inum, &ip); |
| 1783 | if (!error) { | 1789 | if (!error) { |
| 1784 | *vpp = XFS_ITOV(ip); | 1790 | *ipp = ip; |
| 1785 | xfs_itrace_ref(ip); | 1791 | xfs_itrace_ref(ip); |
| 1786 | } | 1792 | } |
| 1787 | xfs_iunlock_map_shared(dp, lock_mode); | 1793 | xfs_iunlock_map_shared(dp, lock_mode); |
| @@ -1791,19 +1797,16 @@ xfs_lookup( | |||
| 1791 | int | 1797 | int |
| 1792 | xfs_create( | 1798 | xfs_create( |
| 1793 | xfs_inode_t *dp, | 1799 | xfs_inode_t *dp, |
| 1794 | bhv_vname_t *dentry, | 1800 | struct xfs_name *name, |
| 1795 | mode_t mode, | 1801 | mode_t mode, |
| 1796 | xfs_dev_t rdev, | 1802 | xfs_dev_t rdev, |
| 1797 | bhv_vnode_t **vpp, | 1803 | xfs_inode_t **ipp, |
| 1798 | cred_t *credp) | 1804 | cred_t *credp) |
| 1799 | { | 1805 | { |
| 1800 | char *name = VNAME(dentry); | 1806 | xfs_mount_t *mp = dp->i_mount; |
| 1801 | xfs_mount_t *mp = dp->i_mount; | ||
| 1802 | bhv_vnode_t *dir_vp = XFS_ITOV(dp); | ||
| 1803 | xfs_inode_t *ip; | 1807 | xfs_inode_t *ip; |
| 1804 | bhv_vnode_t *vp = NULL; | ||
| 1805 | xfs_trans_t *tp; | 1808 | xfs_trans_t *tp; |
| 1806 | int error; | 1809 | int error; |
| 1807 | xfs_bmap_free_t free_list; | 1810 | xfs_bmap_free_t free_list; |
| 1808 | xfs_fsblock_t first_block; | 1811 | xfs_fsblock_t first_block; |
| 1809 | boolean_t unlock_dp_on_error = B_FALSE; | 1812 | boolean_t unlock_dp_on_error = B_FALSE; |
| @@ -1813,17 +1816,14 @@ xfs_create( | |||
| 1813 | xfs_prid_t prid; | 1816 | xfs_prid_t prid; |
| 1814 | struct xfs_dquot *udqp, *gdqp; | 1817 | struct xfs_dquot *udqp, *gdqp; |
| 1815 | uint resblks; | 1818 | uint resblks; |
| 1816 | int namelen; | ||
| 1817 | 1819 | ||
| 1818 | ASSERT(!*vpp); | 1820 | ASSERT(!*ipp); |
| 1819 | xfs_itrace_entry(dp); | 1821 | xfs_itrace_entry(dp); |
| 1820 | 1822 | ||
| 1821 | namelen = VNAMELEN(dentry); | ||
| 1822 | |||
| 1823 | if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { | 1823 | if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { |
| 1824 | error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, | 1824 | error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, |
| 1825 | dir_vp, DM_RIGHT_NULL, NULL, | 1825 | dp, DM_RIGHT_NULL, NULL, |
| 1826 | DM_RIGHT_NULL, name, NULL, | 1826 | DM_RIGHT_NULL, name->name, NULL, |
| 1827 | mode, 0, 0); | 1827 | mode, 0, 0); |
| 1828 | 1828 | ||
| 1829 | if (error) | 1829 | if (error) |
| @@ -1855,7 +1855,7 @@ xfs_create( | |||
| 1855 | 1855 | ||
| 1856 | tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); | 1856 | tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE); |
| 1857 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | 1857 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; |
| 1858 | resblks = XFS_CREATE_SPACE_RES(mp, namelen); | 1858 | resblks = XFS_CREATE_SPACE_RES(mp, name->len); |
| 1859 | /* | 1859 | /* |
| 1860 | * Initially assume that the file does not exist and | 1860 | * Initially assume that the file does not exist and |
| 1861 | * reserve the resources for that case. If that is not | 1861 | * reserve the resources for that case. If that is not |
| @@ -1888,7 +1888,8 @@ xfs_create( | |||
| 1888 | if (error) | 1888 | if (error) |
| 1889 | goto error_return; | 1889 | goto error_return; |
| 1890 | 1890 | ||
| 1891 | if (resblks == 0 && (error = xfs_dir_canenter(tp, dp, name, namelen))) | 1891 | error = xfs_dir_canenter(tp, dp, name, resblks); |
| 1892 | if (error) | ||
| 1892 | goto error_return; | 1893 | goto error_return; |
| 1893 | error = xfs_dir_ialloc(&tp, dp, mode, 1, | 1894 | error = xfs_dir_ialloc(&tp, dp, mode, 1, |
| 1894 | rdev, credp, prid, resblks > 0, | 1895 | rdev, credp, prid, resblks > 0, |
| @@ -1914,11 +1915,11 @@ xfs_create( | |||
| 1914 | * the transaction cancel unlocking dp so don't do it explicitly in the | 1915 | * the transaction cancel unlocking dp so don't do it explicitly in the |
| 1915 | * error path. | 1916 | * error path. |
| 1916 | */ | 1917 | */ |
| 1917 | VN_HOLD(dir_vp); | 1918 | IHOLD(dp); |
| 1918 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); | 1919 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
| 1919 | unlock_dp_on_error = B_FALSE; | 1920 | unlock_dp_on_error = B_FALSE; |
| 1920 | 1921 | ||
| 1921 | error = xfs_dir_createname(tp, dp, name, namelen, ip->i_ino, | 1922 | error = xfs_dir_createname(tp, dp, name, ip->i_ino, |
| 1922 | &first_block, &free_list, resblks ? | 1923 | &first_block, &free_list, resblks ? |
| 1923 | resblks - XFS_IALLOC_SPACE_RES(mp) : 0); | 1924 | resblks - XFS_IALLOC_SPACE_RES(mp) : 0); |
| 1924 | if (error) { | 1925 | if (error) { |
| @@ -1952,7 +1953,6 @@ xfs_create( | |||
| 1952 | * vnode to the caller, we bump the vnode ref count now. | 1953 | * vnode to the caller, we bump the vnode ref count now. |
| 1953 | */ | 1954 | */ |
| 1954 | IHOLD(ip); | 1955 | IHOLD(ip); |
| 1955 | vp = XFS_ITOV(ip); | ||
| 1956 | 1956 | ||
| 1957 | error = xfs_bmap_finish(&tp, &free_list, &committed); | 1957 | error = xfs_bmap_finish(&tp, &free_list, &committed); |
| 1958 | if (error) { | 1958 | if (error) { |
| @@ -1970,17 +1970,17 @@ xfs_create( | |||
| 1970 | XFS_QM_DQRELE(mp, udqp); | 1970 | XFS_QM_DQRELE(mp, udqp); |
| 1971 | XFS_QM_DQRELE(mp, gdqp); | 1971 | XFS_QM_DQRELE(mp, gdqp); |
| 1972 | 1972 | ||
| 1973 | *vpp = vp; | 1973 | *ipp = ip; |
| 1974 | 1974 | ||
| 1975 | /* Fallthrough to std_return with error = 0 */ | 1975 | /* Fallthrough to std_return with error = 0 */ |
| 1976 | 1976 | ||
| 1977 | std_return: | 1977 | std_return: |
| 1978 | if ((*vpp || (error != 0 && dm_event_sent != 0)) && | 1978 | if ((*ipp || (error != 0 && dm_event_sent != 0)) && |
| 1979 | DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { | 1979 | DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { |
| 1980 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, | 1980 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, |
| 1981 | dir_vp, DM_RIGHT_NULL, | 1981 | dp, DM_RIGHT_NULL, |
| 1982 | *vpp ? vp:NULL, | 1982 | *ipp ? ip : NULL, |
| 1983 | DM_RIGHT_NULL, name, NULL, | 1983 | DM_RIGHT_NULL, name->name, NULL, |
| 1984 | mode, error, 0); | 1984 | mode, error, 0); |
| 1985 | } | 1985 | } |
| 1986 | return error; | 1986 | return error; |
| @@ -2272,46 +2272,32 @@ int remove_which_error_return = 0; | |||
| 2272 | int | 2272 | int |
| 2273 | xfs_remove( | 2273 | xfs_remove( |
| 2274 | xfs_inode_t *dp, | 2274 | xfs_inode_t *dp, |
| 2275 | bhv_vname_t *dentry) | 2275 | struct xfs_name *name, |
| 2276 | xfs_inode_t *ip) | ||
| 2276 | { | 2277 | { |
| 2277 | bhv_vnode_t *dir_vp = XFS_ITOV(dp); | ||
| 2278 | char *name = VNAME(dentry); | ||
| 2279 | xfs_mount_t *mp = dp->i_mount; | 2278 | xfs_mount_t *mp = dp->i_mount; |
| 2280 | xfs_inode_t *ip; | ||
| 2281 | xfs_trans_t *tp = NULL; | 2279 | xfs_trans_t *tp = NULL; |
| 2282 | int error = 0; | 2280 | int error = 0; |
| 2283 | xfs_bmap_free_t free_list; | 2281 | xfs_bmap_free_t free_list; |
| 2284 | xfs_fsblock_t first_block; | 2282 | xfs_fsblock_t first_block; |
| 2285 | int cancel_flags; | 2283 | int cancel_flags; |
| 2286 | int committed; | 2284 | int committed; |
| 2287 | int dm_di_mode = 0; | ||
| 2288 | int link_zero; | 2285 | int link_zero; |
| 2289 | uint resblks; | 2286 | uint resblks; |
| 2290 | int namelen; | ||
| 2291 | 2287 | ||
| 2292 | xfs_itrace_entry(dp); | 2288 | xfs_itrace_entry(dp); |
| 2293 | 2289 | ||
| 2294 | if (XFS_FORCED_SHUTDOWN(mp)) | 2290 | if (XFS_FORCED_SHUTDOWN(mp)) |
| 2295 | return XFS_ERROR(EIO); | 2291 | return XFS_ERROR(EIO); |
| 2296 | 2292 | ||
| 2297 | namelen = VNAMELEN(dentry); | ||
| 2298 | |||
| 2299 | if (!xfs_get_dir_entry(dentry, &ip)) { | ||
| 2300 | dm_di_mode = ip->i_d.di_mode; | ||
| 2301 | IRELE(ip); | ||
| 2302 | } | ||
| 2303 | |||
| 2304 | if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { | 2293 | if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { |
| 2305 | error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp, | 2294 | error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dp, DM_RIGHT_NULL, |
| 2306 | DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, | 2295 | NULL, DM_RIGHT_NULL, name->name, NULL, |
| 2307 | name, NULL, dm_di_mode, 0, 0); | 2296 | ip->i_d.di_mode, 0, 0); |
| 2308 | if (error) | 2297 | if (error) |
| 2309 | return error; | 2298 | return error; |
| 2310 | } | 2299 | } |
| 2311 | 2300 | ||
| 2312 | /* From this point on, return through std_return */ | ||
| 2313 | ip = NULL; | ||
| 2314 | |||
| 2315 | /* | 2301 | /* |
| 2316 | * We need to get a reference to ip before we get our log | 2302 | * We need to get a reference to ip before we get our log |
| 2317 | * reservation. The reason for this is that we cannot call | 2303 | * reservation. The reason for this is that we cannot call |
| @@ -2324,13 +2310,7 @@ xfs_remove( | |||
| 2324 | * when we call xfs_iget. Instead we get an unlocked reference | 2310 | * when we call xfs_iget. Instead we get an unlocked reference |
| 2325 | * to the inode before getting our log reservation. | 2311 | * to the inode before getting our log reservation. |
| 2326 | */ | 2312 | */ |
| 2327 | error = xfs_get_dir_entry(dentry, &ip); | 2313 | IHOLD(ip); |
| 2328 | if (error) { | ||
| 2329 | REMOVE_DEBUG_TRACE(__LINE__); | ||
| 2330 | goto std_return; | ||
| 2331 | } | ||
| 2332 | |||
| 2333 | dm_di_mode = ip->i_d.di_mode; | ||
| 2334 | 2314 | ||
| 2335 | xfs_itrace_entry(ip); | 2315 | xfs_itrace_entry(ip); |
| 2336 | xfs_itrace_ref(ip); | 2316 | xfs_itrace_ref(ip); |
| @@ -2398,7 +2378,7 @@ xfs_remove( | |||
| 2398 | * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. | 2378 | * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. |
| 2399 | */ | 2379 | */ |
| 2400 | XFS_BMAP_INIT(&free_list, &first_block); | 2380 | XFS_BMAP_INIT(&free_list, &first_block); |
| 2401 | error = xfs_dir_removename(tp, dp, name, namelen, ip->i_ino, | 2381 | error = xfs_dir_removename(tp, dp, name, ip->i_ino, |
| 2402 | &first_block, &free_list, 0); | 2382 | &first_block, &free_list, 0); |
| 2403 | if (error) { | 2383 | if (error) { |
| 2404 | ASSERT(error != ENOENT); | 2384 | ASSERT(error != ENOENT); |
| @@ -2449,14 +2429,6 @@ xfs_remove( | |||
| 2449 | } | 2429 | } |
| 2450 | 2430 | ||
| 2451 | /* | 2431 | /* |
| 2452 | * Before we drop our extra reference to the inode, purge it | ||
| 2453 | * from the refcache if it is there. By waiting until afterwards | ||
| 2454 | * to do the IRELE, we ensure that we won't go inactive in the | ||
| 2455 | * xfs_refcache_purge_ip routine (although that would be OK). | ||
| 2456 | */ | ||
| 2457 | xfs_refcache_purge_ip(ip); | ||
| 2458 | |||
| 2459 | /* | ||
| 2460 | * If we are using filestreams, kill the stream association. | 2432 | * If we are using filestreams, kill the stream association. |
| 2461 | * If the file is still open it may get a new one but that | 2433 | * If the file is still open it may get a new one but that |
| 2462 | * will get killed on last close in xfs_close() so we don't | 2434 | * will get killed on last close in xfs_close() so we don't |
| @@ -2472,9 +2444,9 @@ xfs_remove( | |||
| 2472 | std_return: | 2444 | std_return: |
| 2473 | if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { | 2445 | if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { |
| 2474 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, | 2446 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, |
| 2475 | dir_vp, DM_RIGHT_NULL, | 2447 | dp, DM_RIGHT_NULL, |
| 2476 | NULL, DM_RIGHT_NULL, | 2448 | NULL, DM_RIGHT_NULL, |
| 2477 | name, NULL, dm_di_mode, error, 0); | 2449 | name->name, NULL, ip->i_d.di_mode, error, 0); |
| 2478 | } | 2450 | } |
| 2479 | return error; | 2451 | return error; |
| 2480 | 2452 | ||
| @@ -2495,14 +2467,6 @@ xfs_remove( | |||
| 2495 | cancel_flags |= XFS_TRANS_ABORT; | 2467 | cancel_flags |= XFS_TRANS_ABORT; |
| 2496 | xfs_trans_cancel(tp, cancel_flags); | 2468 | xfs_trans_cancel(tp, cancel_flags); |
| 2497 | 2469 | ||
| 2498 | /* | ||
| 2499 | * Before we drop our extra reference to the inode, purge it | ||
| 2500 | * from the refcache if it is there. By waiting until afterwards | ||
| 2501 | * to do the IRELE, we ensure that we won't go inactive in the | ||
| 2502 | * xfs_refcache_purge_ip routine (although that would be OK). | ||
| 2503 | */ | ||
| 2504 | xfs_refcache_purge_ip(ip); | ||
| 2505 | |||
| 2506 | IRELE(ip); | 2470 | IRELE(ip); |
| 2507 | 2471 | ||
| 2508 | goto std_return; | 2472 | goto std_return; |
| @@ -2511,12 +2475,10 @@ xfs_remove( | |||
| 2511 | int | 2475 | int |
| 2512 | xfs_link( | 2476 | xfs_link( |
| 2513 | xfs_inode_t *tdp, | 2477 | xfs_inode_t *tdp, |
| 2514 | bhv_vnode_t *src_vp, | 2478 | xfs_inode_t *sip, |
| 2515 | bhv_vname_t *dentry) | 2479 | struct xfs_name *target_name) |
| 2516 | { | 2480 | { |
| 2517 | bhv_vnode_t *target_dir_vp = XFS_ITOV(tdp); | ||
| 2518 | xfs_mount_t *mp = tdp->i_mount; | 2481 | xfs_mount_t *mp = tdp->i_mount; |
| 2519 | xfs_inode_t *sip = xfs_vtoi(src_vp); | ||
| 2520 | xfs_trans_t *tp; | 2482 | xfs_trans_t *tp; |
| 2521 | xfs_inode_t *ips[2]; | 2483 | xfs_inode_t *ips[2]; |
| 2522 | int error; | 2484 | int error; |
| @@ -2525,23 +2487,20 @@ xfs_link( | |||
| 2525 | int cancel_flags; | 2487 | int cancel_flags; |
| 2526 | int committed; | 2488 | int committed; |
| 2527 | int resblks; | 2489 | int resblks; |
| 2528 | char *target_name = VNAME(dentry); | ||
| 2529 | int target_namelen; | ||
| 2530 | 2490 | ||
| 2531 | xfs_itrace_entry(tdp); | 2491 | xfs_itrace_entry(tdp); |
| 2532 | xfs_itrace_entry(xfs_vtoi(src_vp)); | 2492 | xfs_itrace_entry(sip); |
| 2533 | 2493 | ||
| 2534 | target_namelen = VNAMELEN(dentry); | 2494 | ASSERT(!S_ISDIR(sip->i_d.di_mode)); |
| 2535 | ASSERT(!VN_ISDIR(src_vp)); | ||
| 2536 | 2495 | ||
| 2537 | if (XFS_FORCED_SHUTDOWN(mp)) | 2496 | if (XFS_FORCED_SHUTDOWN(mp)) |
| 2538 | return XFS_ERROR(EIO); | 2497 | return XFS_ERROR(EIO); |
| 2539 | 2498 | ||
| 2540 | if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) { | 2499 | if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) { |
| 2541 | error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, | 2500 | error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK, |
| 2542 | target_dir_vp, DM_RIGHT_NULL, | 2501 | tdp, DM_RIGHT_NULL, |
| 2543 | src_vp, DM_RIGHT_NULL, | 2502 | sip, DM_RIGHT_NULL, |
| 2544 | target_name, NULL, 0, 0, 0); | 2503 | target_name->name, NULL, 0, 0, 0); |
| 2545 | if (error) | 2504 | if (error) |
| 2546 | return error; | 2505 | return error; |
| 2547 | } | 2506 | } |
| @@ -2556,7 +2515,7 @@ xfs_link( | |||
| 2556 | 2515 | ||
| 2557 | tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); | 2516 | tp = xfs_trans_alloc(mp, XFS_TRANS_LINK); |
| 2558 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | 2517 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; |
| 2559 | resblks = XFS_LINK_SPACE_RES(mp, target_namelen); | 2518 | resblks = XFS_LINK_SPACE_RES(mp, target_name->len); |
| 2560 | error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, | 2519 | error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0, |
| 2561 | XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); | 2520 | XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT); |
| 2562 | if (error == ENOSPC) { | 2521 | if (error == ENOSPC) { |
| @@ -2584,8 +2543,8 @@ xfs_link( | |||
| 2584 | * xfs_trans_cancel will both unlock the inodes and | 2543 | * xfs_trans_cancel will both unlock the inodes and |
| 2585 | * decrement the associated ref counts. | 2544 | * decrement the associated ref counts. |
| 2586 | */ | 2545 | */ |
| 2587 | VN_HOLD(src_vp); | 2546 | IHOLD(sip); |
| 2588 | VN_HOLD(target_dir_vp); | 2547 | IHOLD(tdp); |
| 2589 | xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); | 2548 | xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); |
| 2590 | xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); | 2549 | xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); |
| 2591 | 2550 | ||
| @@ -2608,15 +2567,14 @@ xfs_link( | |||
| 2608 | goto error_return; | 2567 | goto error_return; |
| 2609 | } | 2568 | } |
| 2610 | 2569 | ||
| 2611 | if (resblks == 0 && | 2570 | error = xfs_dir_canenter(tp, tdp, target_name, resblks); |
| 2612 | (error = xfs_dir_canenter(tp, tdp, target_name, target_namelen))) | 2571 | if (error) |
| 2613 | goto error_return; | 2572 | goto error_return; |
| 2614 | 2573 | ||
| 2615 | XFS_BMAP_INIT(&free_list, &first_block); | 2574 | XFS_BMAP_INIT(&free_list, &first_block); |
| 2616 | 2575 | ||
| 2617 | error = xfs_dir_createname(tp, tdp, target_name, target_namelen, | 2576 | error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, |
| 2618 | sip->i_ino, &first_block, &free_list, | 2577 | &first_block, &free_list, resblks); |
| 2619 | resblks); | ||
| 2620 | if (error) | 2578 | if (error) |
| 2621 | goto abort_return; | 2579 | goto abort_return; |
| 2622 | xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 2580 | xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
| @@ -2650,9 +2608,9 @@ xfs_link( | |||
| 2650 | std_return: | 2608 | std_return: |
| 2651 | if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) { | 2609 | if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) { |
| 2652 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, | 2610 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK, |
| 2653 | target_dir_vp, DM_RIGHT_NULL, | 2611 | tdp, DM_RIGHT_NULL, |
| 2654 | src_vp, DM_RIGHT_NULL, | 2612 | sip, DM_RIGHT_NULL, |
| 2655 | target_name, NULL, 0, error, 0); | 2613 | target_name->name, NULL, 0, error, 0); |
| 2656 | } | 2614 | } |
| 2657 | return error; | 2615 | return error; |
| 2658 | 2616 | ||
| @@ -2669,17 +2627,13 @@ std_return: | |||
| 2669 | int | 2627 | int |
| 2670 | xfs_mkdir( | 2628 | xfs_mkdir( |
| 2671 | xfs_inode_t *dp, | 2629 | xfs_inode_t *dp, |
| 2672 | bhv_vname_t *dentry, | 2630 | struct xfs_name *dir_name, |
| 2673 | mode_t mode, | 2631 | mode_t mode, |
| 2674 | bhv_vnode_t **vpp, | 2632 | xfs_inode_t **ipp, |
| 2675 | cred_t *credp) | 2633 | cred_t *credp) |
| 2676 | { | 2634 | { |
| 2677 | bhv_vnode_t *dir_vp = XFS_ITOV(dp); | ||
| 2678 | char *dir_name = VNAME(dentry); | ||
| 2679 | int dir_namelen = VNAMELEN(dentry); | ||
| 2680 | xfs_mount_t *mp = dp->i_mount; | 2635 | xfs_mount_t *mp = dp->i_mount; |
| 2681 | xfs_inode_t *cdp; /* inode of created dir */ | 2636 | xfs_inode_t *cdp; /* inode of created dir */ |
| 2682 | bhv_vnode_t *cvp; /* vnode of created dir */ | ||
| 2683 | xfs_trans_t *tp; | 2637 | xfs_trans_t *tp; |
| 2684 | int cancel_flags; | 2638 | int cancel_flags; |
| 2685 | int error; | 2639 | int error; |
| @@ -2700,8 +2654,8 @@ xfs_mkdir( | |||
| 2700 | 2654 | ||
| 2701 | if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { | 2655 | if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) { |
| 2702 | error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, | 2656 | error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE, |
| 2703 | dir_vp, DM_RIGHT_NULL, NULL, | 2657 | dp, DM_RIGHT_NULL, NULL, |
| 2704 | DM_RIGHT_NULL, dir_name, NULL, | 2658 | DM_RIGHT_NULL, dir_name->name, NULL, |
| 2705 | mode, 0, 0); | 2659 | mode, 0, 0); |
| 2706 | if (error) | 2660 | if (error) |
| 2707 | return error; | 2661 | return error; |
| @@ -2730,7 +2684,7 @@ xfs_mkdir( | |||
| 2730 | 2684 | ||
| 2731 | tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); | 2685 | tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR); |
| 2732 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | 2686 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; |
| 2733 | resblks = XFS_MKDIR_SPACE_RES(mp, dir_namelen); | 2687 | resblks = XFS_MKDIR_SPACE_RES(mp, dir_name->len); |
| 2734 | error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0, | 2688 | error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0, |
| 2735 | XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT); | 2689 | XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT); |
| 2736 | if (error == ENOSPC) { | 2690 | if (error == ENOSPC) { |
| @@ -2762,8 +2716,8 @@ xfs_mkdir( | |||
| 2762 | if (error) | 2716 | if (error) |
| 2763 | goto error_return; | 2717 | goto error_return; |
| 2764 | 2718 | ||
| 2765 | if (resblks == 0 && | 2719 | error = xfs_dir_canenter(tp, dp, dir_name, resblks); |
| 2766 | (error = xfs_dir_canenter(tp, dp, dir_name, dir_namelen))) | 2720 | if (error) |
| 2767 | goto error_return; | 2721 | goto error_return; |
| 2768 | /* | 2722 | /* |
| 2769 | * create the directory inode. | 2723 | * create the directory inode. |
| @@ -2786,15 +2740,15 @@ xfs_mkdir( | |||
| 2786 | * from here on will result in the transaction cancel | 2740 | * from here on will result in the transaction cancel |
| 2787 | * unlocking dp so don't do it explicitly in the error path. | 2741 | * unlocking dp so don't do it explicitly in the error path. |
| 2788 | */ | 2742 | */ |
| 2789 | VN_HOLD(dir_vp); | 2743 | IHOLD(dp); |
| 2790 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); | 2744 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
| 2791 | unlock_dp_on_error = B_FALSE; | 2745 | unlock_dp_on_error = B_FALSE; |
| 2792 | 2746 | ||
| 2793 | XFS_BMAP_INIT(&free_list, &first_block); | 2747 | XFS_BMAP_INIT(&free_list, &first_block); |
| 2794 | 2748 | ||
| 2795 | error = xfs_dir_createname(tp, dp, dir_name, dir_namelen, cdp->i_ino, | 2749 | error = xfs_dir_createname(tp, dp, dir_name, cdp->i_ino, |
| 2796 | &first_block, &free_list, resblks ? | 2750 | &first_block, &free_list, resblks ? |
| 2797 | resblks - XFS_IALLOC_SPACE_RES(mp) : 0); | 2751 | resblks - XFS_IALLOC_SPACE_RES(mp) : 0); |
| 2798 | if (error) { | 2752 | if (error) { |
| 2799 | ASSERT(error != ENOSPC); | 2753 | ASSERT(error != ENOSPC); |
| 2800 | goto error1; | 2754 | goto error1; |
| @@ -2817,11 +2771,9 @@ xfs_mkdir( | |||
| 2817 | if (error) | 2771 | if (error) |
| 2818 | goto error2; | 2772 | goto error2; |
| 2819 | 2773 | ||
| 2820 | cvp = XFS_ITOV(cdp); | ||
| 2821 | |||
| 2822 | created = B_TRUE; | 2774 | created = B_TRUE; |
| 2823 | 2775 | ||
| 2824 | *vpp = cvp; | 2776 | *ipp = cdp; |
| 2825 | IHOLD(cdp); | 2777 | IHOLD(cdp); |
| 2826 | 2778 | ||
| 2827 | /* | 2779 | /* |
| @@ -2858,10 +2810,10 @@ std_return: | |||
| 2858 | if ((created || (error != 0 && dm_event_sent != 0)) && | 2810 | if ((created || (error != 0 && dm_event_sent != 0)) && |
| 2859 | DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { | 2811 | DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) { |
| 2860 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, | 2812 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE, |
| 2861 | dir_vp, DM_RIGHT_NULL, | 2813 | dp, DM_RIGHT_NULL, |
| 2862 | created ? XFS_ITOV(cdp):NULL, | 2814 | created ? cdp : NULL, |
| 2863 | DM_RIGHT_NULL, | 2815 | DM_RIGHT_NULL, |
| 2864 | dir_name, NULL, | 2816 | dir_name->name, NULL, |
| 2865 | mode, error, 0); | 2817 | mode, error, 0); |
| 2866 | } | 2818 | } |
| 2867 | return error; | 2819 | return error; |
| @@ -2885,20 +2837,17 @@ std_return: | |||
| 2885 | int | 2837 | int |
| 2886 | xfs_rmdir( | 2838 | xfs_rmdir( |
| 2887 | xfs_inode_t *dp, | 2839 | xfs_inode_t *dp, |
| 2888 | bhv_vname_t *dentry) | 2840 | struct xfs_name *name, |
| 2841 | xfs_inode_t *cdp) | ||
| 2889 | { | 2842 | { |
| 2890 | bhv_vnode_t *dir_vp = XFS_ITOV(dp); | 2843 | bhv_vnode_t *dir_vp = XFS_ITOV(dp); |
| 2891 | char *name = VNAME(dentry); | ||
| 2892 | int namelen = VNAMELEN(dentry); | ||
| 2893 | xfs_mount_t *mp = dp->i_mount; | 2844 | xfs_mount_t *mp = dp->i_mount; |
| 2894 | xfs_inode_t *cdp; /* child directory */ | ||
| 2895 | xfs_trans_t *tp; | 2845 | xfs_trans_t *tp; |
| 2896 | int error; | 2846 | int error; |
| 2897 | xfs_bmap_free_t free_list; | 2847 | xfs_bmap_free_t free_list; |
| 2898 | xfs_fsblock_t first_block; | 2848 | xfs_fsblock_t first_block; |
| 2899 | int cancel_flags; | 2849 | int cancel_flags; |
| 2900 | int committed; | 2850 | int committed; |
| 2901 | int dm_di_mode = S_IFDIR; | ||
| 2902 | int last_cdp_link; | 2851 | int last_cdp_link; |
| 2903 | uint resblks; | 2852 | uint resblks; |
| 2904 | 2853 | ||
| @@ -2907,24 +2856,15 @@ xfs_rmdir( | |||
| 2907 | if (XFS_FORCED_SHUTDOWN(mp)) | 2856 | if (XFS_FORCED_SHUTDOWN(mp)) |
| 2908 | return XFS_ERROR(EIO); | 2857 | return XFS_ERROR(EIO); |
| 2909 | 2858 | ||
| 2910 | if (!xfs_get_dir_entry(dentry, &cdp)) { | ||
| 2911 | dm_di_mode = cdp->i_d.di_mode; | ||
| 2912 | IRELE(cdp); | ||
| 2913 | } | ||
| 2914 | |||
| 2915 | if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { | 2859 | if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { |
| 2916 | error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, | 2860 | error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, |
| 2917 | dir_vp, DM_RIGHT_NULL, | 2861 | dp, DM_RIGHT_NULL, |
| 2918 | NULL, DM_RIGHT_NULL, | 2862 | NULL, DM_RIGHT_NULL, name->name, |
| 2919 | name, NULL, dm_di_mode, 0, 0); | 2863 | NULL, cdp->i_d.di_mode, 0, 0); |
| 2920 | if (error) | 2864 | if (error) |
| 2921 | return XFS_ERROR(error); | 2865 | return XFS_ERROR(error); |
| 2922 | } | 2866 | } |
| 2923 | 2867 | ||
| 2924 | /* Return through std_return after this point. */ | ||
| 2925 | |||
| 2926 | cdp = NULL; | ||
| 2927 | |||
| 2928 | /* | 2868 | /* |
| 2929 | * We need to get a reference to cdp before we get our log | 2869 | * We need to get a reference to cdp before we get our log |
| 2930 | * reservation. The reason for this is that we cannot call | 2870 | * reservation. The reason for this is that we cannot call |
| @@ -2937,13 +2877,7 @@ xfs_rmdir( | |||
| 2937 | * when we call xfs_iget. Instead we get an unlocked reference | 2877 | * when we call xfs_iget. Instead we get an unlocked reference |
| 2938 | * to the inode before getting our log reservation. | 2878 | * to the inode before getting our log reservation. |
| 2939 | */ | 2879 | */ |
| 2940 | error = xfs_get_dir_entry(dentry, &cdp); | 2880 | IHOLD(cdp); |
| 2941 | if (error) { | ||
| 2942 | REMOVE_DEBUG_TRACE(__LINE__); | ||
| 2943 | goto std_return; | ||
| 2944 | } | ||
| 2945 | mp = dp->i_mount; | ||
| 2946 | dm_di_mode = cdp->i_d.di_mode; | ||
| 2947 | 2881 | ||
| 2948 | /* | 2882 | /* |
| 2949 | * Get the dquots for the inodes. | 2883 | * Get the dquots for the inodes. |
| @@ -3020,7 +2954,7 @@ xfs_rmdir( | |||
| 3020 | goto error_return; | 2954 | goto error_return; |
| 3021 | } | 2955 | } |
| 3022 | 2956 | ||
| 3023 | error = xfs_dir_removename(tp, dp, name, namelen, cdp->i_ino, | 2957 | error = xfs_dir_removename(tp, dp, name, cdp->i_ino, |
| 3024 | &first_block, &free_list, resblks); | 2958 | &first_block, &free_list, resblks); |
| 3025 | if (error) | 2959 | if (error) |
| 3026 | goto error1; | 2960 | goto error1; |
| @@ -3098,9 +3032,9 @@ xfs_rmdir( | |||
| 3098 | std_return: | 3032 | std_return: |
| 3099 | if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { | 3033 | if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { |
| 3100 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, | 3034 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, |
| 3101 | dir_vp, DM_RIGHT_NULL, | 3035 | dp, DM_RIGHT_NULL, |
| 3102 | NULL, DM_RIGHT_NULL, | 3036 | NULL, DM_RIGHT_NULL, |
| 3103 | name, NULL, dm_di_mode, | 3037 | name->name, NULL, cdp->i_d.di_mode, |
| 3104 | error, 0); | 3038 | error, 0); |
| 3105 | } | 3039 | } |
| 3106 | return error; | 3040 | return error; |
| @@ -3118,13 +3052,12 @@ xfs_rmdir( | |||
| 3118 | int | 3052 | int |
| 3119 | xfs_symlink( | 3053 | xfs_symlink( |
| 3120 | xfs_inode_t *dp, | 3054 | xfs_inode_t *dp, |
| 3121 | bhv_vname_t *dentry, | 3055 | struct xfs_name *link_name, |
| 3122 | char *target_path, | 3056 | const char *target_path, |
| 3123 | mode_t mode, | 3057 | mode_t mode, |
| 3124 | bhv_vnode_t **vpp, | 3058 | xfs_inode_t **ipp, |
| 3125 | cred_t *credp) | 3059 | cred_t *credp) |
| 3126 | { | 3060 | { |
| 3127 | bhv_vnode_t *dir_vp = XFS_ITOV(dp); | ||
| 3128 | xfs_mount_t *mp = dp->i_mount; | 3061 | xfs_mount_t *mp = dp->i_mount; |
| 3129 | xfs_trans_t *tp; | 3062 | xfs_trans_t *tp; |
| 3130 | xfs_inode_t *ip; | 3063 | xfs_inode_t *ip; |
| @@ -3140,17 +3073,15 @@ xfs_symlink( | |||
| 3140 | int nmaps; | 3073 | int nmaps; |
| 3141 | xfs_bmbt_irec_t mval[SYMLINK_MAPS]; | 3074 | xfs_bmbt_irec_t mval[SYMLINK_MAPS]; |
| 3142 | xfs_daddr_t d; | 3075 | xfs_daddr_t d; |
| 3143 | char *cur_chunk; | 3076 | const char *cur_chunk; |
| 3144 | int byte_cnt; | 3077 | int byte_cnt; |
| 3145 | int n; | 3078 | int n; |
| 3146 | xfs_buf_t *bp; | 3079 | xfs_buf_t *bp; |
| 3147 | xfs_prid_t prid; | 3080 | xfs_prid_t prid; |
| 3148 | struct xfs_dquot *udqp, *gdqp; | 3081 | struct xfs_dquot *udqp, *gdqp; |
| 3149 | uint resblks; | 3082 | uint resblks; |
| 3150 | char *link_name = VNAME(dentry); | ||
| 3151 | int link_namelen; | ||
| 3152 | 3083 | ||
| 3153 | *vpp = NULL; | 3084 | *ipp = NULL; |
| 3154 | error = 0; | 3085 | error = 0; |
| 3155 | ip = NULL; | 3086 | ip = NULL; |
| 3156 | tp = NULL; | 3087 | tp = NULL; |
| @@ -3160,44 +3091,17 @@ xfs_symlink( | |||
| 3160 | if (XFS_FORCED_SHUTDOWN(mp)) | 3091 | if (XFS_FORCED_SHUTDOWN(mp)) |
| 3161 | return XFS_ERROR(EIO); | 3092 | return XFS_ERROR(EIO); |
| 3162 | 3093 | ||
| 3163 | link_namelen = VNAMELEN(dentry); | ||
| 3164 | |||
| 3165 | /* | 3094 | /* |
| 3166 | * Check component lengths of the target path name. | 3095 | * Check component lengths of the target path name. |
| 3167 | */ | 3096 | */ |
| 3168 | pathlen = strlen(target_path); | 3097 | pathlen = strlen(target_path); |
| 3169 | if (pathlen >= MAXPATHLEN) /* total string too long */ | 3098 | if (pathlen >= MAXPATHLEN) /* total string too long */ |
| 3170 | return XFS_ERROR(ENAMETOOLONG); | 3099 | return XFS_ERROR(ENAMETOOLONG); |
| 3171 | if (pathlen >= MAXNAMELEN) { /* is any component too long? */ | ||
| 3172 | int len, total; | ||
| 3173 | char *path; | ||
| 3174 | |||
| 3175 | for (total = 0, path = target_path; total < pathlen;) { | ||
| 3176 | /* | ||
| 3177 | * Skip any slashes. | ||
| 3178 | */ | ||
| 3179 | while(*path == '/') { | ||
| 3180 | total++; | ||
| 3181 | path++; | ||
| 3182 | } | ||
| 3183 | |||
| 3184 | /* | ||
| 3185 | * Count up to the next slash or end of path. | ||
| 3186 | * Error out if the component is bigger than MAXNAMELEN. | ||
| 3187 | */ | ||
| 3188 | for(len = 0; *path != '/' && total < pathlen;total++, path++) { | ||
| 3189 | if (++len >= MAXNAMELEN) { | ||
| 3190 | error = ENAMETOOLONG; | ||
| 3191 | return error; | ||
| 3192 | } | ||
| 3193 | } | ||
| 3194 | } | ||
| 3195 | } | ||
| 3196 | 3100 | ||
| 3197 | if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { | 3101 | if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) { |
| 3198 | error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp, | 3102 | error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp, |
| 3199 | DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, | 3103 | DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, |
| 3200 | link_name, target_path, 0, 0, 0); | 3104 | link_name->name, target_path, 0, 0, 0); |
| 3201 | if (error) | 3105 | if (error) |
| 3202 | return error; | 3106 | return error; |
| 3203 | } | 3107 | } |
| @@ -3229,7 +3133,7 @@ xfs_symlink( | |||
| 3229 | fs_blocks = 0; | 3133 | fs_blocks = 0; |
| 3230 | else | 3134 | else |
| 3231 | fs_blocks = XFS_B_TO_FSB(mp, pathlen); | 3135 | fs_blocks = XFS_B_TO_FSB(mp, pathlen); |
| 3232 | resblks = XFS_SYMLINK_SPACE_RES(mp, link_namelen, fs_blocks); | 3136 | resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); |
| 3233 | error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, | 3137 | error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, |
| 3234 | XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); | 3138 | XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); |
| 3235 | if (error == ENOSPC && fs_blocks == 0) { | 3139 | if (error == ENOSPC && fs_blocks == 0) { |
| @@ -3263,8 +3167,8 @@ xfs_symlink( | |||
| 3263 | /* | 3167 | /* |
| 3264 | * Check for ability to enter directory entry, if no space reserved. | 3168 | * Check for ability to enter directory entry, if no space reserved. |
| 3265 | */ | 3169 | */ |
| 3266 | if (resblks == 0 && | 3170 | error = xfs_dir_canenter(tp, dp, link_name, resblks); |
| 3267 | (error = xfs_dir_canenter(tp, dp, link_name, link_namelen))) | 3171 | if (error) |
| 3268 | goto error_return; | 3172 | goto error_return; |
| 3269 | /* | 3173 | /* |
| 3270 | * Initialize the bmap freelist prior to calling either | 3174 | * Initialize the bmap freelist prior to calling either |
| @@ -3289,7 +3193,7 @@ xfs_symlink( | |||
| 3289 | * transaction cancel unlocking dp so don't do it explicitly in the | 3193 | * transaction cancel unlocking dp so don't do it explicitly in the |
| 3290 | * error path. | 3194 | * error path. |
| 3291 | */ | 3195 | */ |
| 3292 | VN_HOLD(dir_vp); | 3196 | IHOLD(dp); |
| 3293 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); | 3197 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
| 3294 | unlock_dp_on_error = B_FALSE; | 3198 | unlock_dp_on_error = B_FALSE; |
| 3295 | 3199 | ||
| @@ -3356,8 +3260,8 @@ xfs_symlink( | |||
| 3356 | /* | 3260 | /* |
| 3357 | * Create the directory entry for the symlink. | 3261 | * Create the directory entry for the symlink. |
| 3358 | */ | 3262 | */ |
| 3359 | error = xfs_dir_createname(tp, dp, link_name, link_namelen, ip->i_ino, | 3263 | error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, |
| 3360 | &first_block, &free_list, resblks); | 3264 | &first_block, &free_list, resblks); |
| 3361 | if (error) | 3265 | if (error) |
| 3362 | goto error1; | 3266 | goto error1; |
| 3363 | xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 3267 | xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
| @@ -3399,19 +3303,14 @@ xfs_symlink( | |||
| 3399 | std_return: | 3303 | std_return: |
| 3400 | if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) { | 3304 | if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) { |
| 3401 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, | 3305 | (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK, |
| 3402 | dir_vp, DM_RIGHT_NULL, | 3306 | dp, DM_RIGHT_NULL, |
| 3403 | error ? NULL : XFS_ITOV(ip), | 3307 | error ? NULL : ip, |
| 3404 | DM_RIGHT_NULL, link_name, target_path, | 3308 | DM_RIGHT_NULL, link_name->name, |
| 3405 | 0, error, 0); | 3309 | target_path, 0, error, 0); |
| 3406 | } | 3310 | } |
| 3407 | 3311 | ||
| 3408 | if (!error) { | 3312 | if (!error) |
| 3409 | bhv_vnode_t *vp; | 3313 | *ipp = ip; |
| 3410 | |||
| 3411 | ASSERT(ip); | ||
| 3412 | vp = XFS_ITOV(ip); | ||
| 3413 | *vpp = vp; | ||
| 3414 | } | ||
| 3415 | return error; | 3314 | return error; |
| 3416 | 3315 | ||
| 3417 | error2: | 3316 | error2: |
| @@ -3431,60 +3330,11 @@ std_return: | |||
| 3431 | } | 3330 | } |
| 3432 | 3331 | ||
| 3433 | int | 3332 | int |
| 3434 | xfs_rwlock( | ||
| 3435 | xfs_inode_t *ip, | ||
| 3436 | bhv_vrwlock_t locktype) | ||
| 3437 | { | ||
| 3438 | if (S_ISDIR(ip->i_d.di_mode)) | ||
| 3439 | return 1; | ||
| 3440 | if (locktype == VRWLOCK_WRITE) { | ||
| 3441 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | ||
| 3442 | } else if (locktype == VRWLOCK_TRY_READ) { | ||
| 3443 | return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED); | ||
| 3444 | } else if (locktype == VRWLOCK_TRY_WRITE) { | ||
| 3445 | return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL); | ||
| 3446 | } else { | ||
| 3447 | ASSERT((locktype == VRWLOCK_READ) || | ||
| 3448 | (locktype == VRWLOCK_WRITE_DIRECT)); | ||
| 3449 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
| 3450 | } | ||
| 3451 | |||
| 3452 | return 1; | ||
| 3453 | } | ||
| 3454 | |||
| 3455 | |||
| 3456 | void | ||
| 3457 | xfs_rwunlock( | ||
| 3458 | xfs_inode_t *ip, | ||
| 3459 | bhv_vrwlock_t locktype) | ||
| 3460 | { | ||
| 3461 | if (S_ISDIR(ip->i_d.di_mode)) | ||
| 3462 | return; | ||
| 3463 | if (locktype == VRWLOCK_WRITE) { | ||
| 3464 | /* | ||
| 3465 | * In the write case, we may have added a new entry to | ||
| 3466 | * the reference cache. This might store a pointer to | ||
| 3467 | * an inode to be released in this inode. If it is there, | ||
| 3468 | * clear the pointer and release the inode after unlocking | ||
| 3469 | * this one. | ||
| 3470 | */ | ||
| 3471 | xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL); | ||
| 3472 | } else { | ||
| 3473 | ASSERT((locktype == VRWLOCK_READ) || | ||
| 3474 | (locktype == VRWLOCK_WRITE_DIRECT)); | ||
| 3475 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | ||
| 3476 | } | ||
| 3477 | return; | ||
| 3478 | } | ||
| 3479 | |||
| 3480 | |||
| 3481 | int | ||
| 3482 | xfs_inode_flush( | 3333 | xfs_inode_flush( |
| 3483 | xfs_inode_t *ip, | 3334 | xfs_inode_t *ip, |
| 3484 | int flags) | 3335 | int flags) |
| 3485 | { | 3336 | { |
| 3486 | xfs_mount_t *mp = ip->i_mount; | 3337 | xfs_mount_t *mp = ip->i_mount; |
| 3487 | xfs_inode_log_item_t *iip = ip->i_itemp; | ||
| 3488 | int error = 0; | 3338 | int error = 0; |
| 3489 | 3339 | ||
| 3490 | if (XFS_FORCED_SHUTDOWN(mp)) | 3340 | if (XFS_FORCED_SHUTDOWN(mp)) |
| @@ -3494,33 +3344,9 @@ xfs_inode_flush( | |||
| 3494 | * Bypass inodes which have already been cleaned by | 3344 | * Bypass inodes which have already been cleaned by |
| 3495 | * the inode flush clustering code inside xfs_iflush | 3345 | * the inode flush clustering code inside xfs_iflush |
| 3496 | */ | 3346 | */ |
| 3497 | if ((ip->i_update_core == 0) && | 3347 | if (xfs_inode_clean(ip)) |
| 3498 | ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) | ||
| 3499 | return 0; | 3348 | return 0; |
| 3500 | 3349 | ||
| 3501 | if (flags & FLUSH_LOG) { | ||
| 3502 | if (iip && iip->ili_last_lsn) { | ||
| 3503 | xlog_t *log = mp->m_log; | ||
| 3504 | xfs_lsn_t sync_lsn; | ||
| 3505 | int log_flags = XFS_LOG_FORCE; | ||
| 3506 | |||
| 3507 | spin_lock(&log->l_grant_lock); | ||
| 3508 | sync_lsn = log->l_last_sync_lsn; | ||
| 3509 | spin_unlock(&log->l_grant_lock); | ||
| 3510 | |||
| 3511 | if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) { | ||
| 3512 | if (flags & FLUSH_SYNC) | ||
| 3513 | log_flags |= XFS_LOG_SYNC; | ||
| 3514 | error = xfs_log_force(mp, iip->ili_last_lsn, log_flags); | ||
| 3515 | if (error) | ||
| 3516 | return error; | ||
| 3517 | } | ||
| 3518 | |||
| 3519 | if (ip->i_update_core == 0) | ||
| 3520 | return 0; | ||
| 3521 | } | ||
| 3522 | } | ||
| 3523 | |||
| 3524 | /* | 3350 | /* |
| 3525 | * We make this non-blocking if the inode is contended, | 3351 | * We make this non-blocking if the inode is contended, |
| 3526 | * return EAGAIN to indicate to the caller that they | 3352 | * return EAGAIN to indicate to the caller that they |
| @@ -3528,30 +3354,22 @@ xfs_inode_flush( | |||
| 3528 | * blocking on inodes inside another operation right | 3354 | * blocking on inodes inside another operation right |
| 3529 | * now, they get caught later by xfs_sync. | 3355 | * now, they get caught later by xfs_sync. |
| 3530 | */ | 3356 | */ |
| 3531 | if (flags & FLUSH_INODE) { | 3357 | if (flags & FLUSH_SYNC) { |
| 3532 | int flush_flags; | 3358 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
| 3533 | 3359 | xfs_iflock(ip); | |
| 3534 | if (flags & FLUSH_SYNC) { | 3360 | } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { |
| 3535 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 3361 | if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { |
| 3536 | xfs_iflock(ip); | 3362 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
| 3537 | } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { | ||
| 3538 | if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { | ||
| 3539 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
| 3540 | return EAGAIN; | ||
| 3541 | } | ||
| 3542 | } else { | ||
| 3543 | return EAGAIN; | 3363 | return EAGAIN; |
| 3544 | } | 3364 | } |
| 3545 | 3365 | } else { | |
| 3546 | if (flags & FLUSH_SYNC) | 3366 | return EAGAIN; |
| 3547 | flush_flags = XFS_IFLUSH_SYNC; | ||
| 3548 | else | ||
| 3549 | flush_flags = XFS_IFLUSH_ASYNC; | ||
| 3550 | |||
| 3551 | error = xfs_iflush(ip, flush_flags); | ||
| 3552 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
| 3553 | } | 3367 | } |
| 3554 | 3368 | ||
| 3369 | error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC | ||
| 3370 | : XFS_IFLUSH_ASYNC_NOBLOCK); | ||
| 3371 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
| 3372 | |||
| 3555 | return error; | 3373 | return error; |
| 3556 | } | 3374 | } |
| 3557 | 3375 | ||
| @@ -3694,12 +3512,12 @@ xfs_finish_reclaim( | |||
| 3694 | * We get the flush lock regardless, though, just to make sure | 3512 | * We get the flush lock regardless, though, just to make sure |
| 3695 | * we don't free it while it is being flushed. | 3513 | * we don't free it while it is being flushed. |
| 3696 | */ | 3514 | */ |
| 3697 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 3515 | if (!locked) { |
| 3698 | if (!locked) { | 3516 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
| 3699 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 3517 | xfs_iflock(ip); |
| 3700 | xfs_iflock(ip); | 3518 | } |
| 3701 | } | ||
| 3702 | 3519 | ||
| 3520 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
| 3703 | if (ip->i_update_core || | 3521 | if (ip->i_update_core || |
| 3704 | ((ip->i_itemp != NULL) && | 3522 | ((ip->i_itemp != NULL) && |
| 3705 | (ip->i_itemp->ili_format.ilf_fields != 0))) { | 3523 | (ip->i_itemp->ili_format.ilf_fields != 0))) { |
| @@ -3719,17 +3537,11 @@ xfs_finish_reclaim( | |||
| 3719 | ASSERT(ip->i_update_core == 0); | 3537 | ASSERT(ip->i_update_core == 0); |
| 3720 | ASSERT(ip->i_itemp == NULL || | 3538 | ASSERT(ip->i_itemp == NULL || |
| 3721 | ip->i_itemp->ili_format.ilf_fields == 0); | 3539 | ip->i_itemp->ili_format.ilf_fields == 0); |
| 3722 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 3723 | } else if (locked) { | ||
| 3724 | /* | ||
| 3725 | * We are not interested in doing an iflush if we're | ||
| 3726 | * in the process of shutting down the filesystem forcibly. | ||
| 3727 | * So, just reclaim the inode. | ||
| 3728 | */ | ||
| 3729 | xfs_ifunlock(ip); | ||
| 3730 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 3731 | } | 3540 | } |
| 3732 | 3541 | ||
| 3542 | xfs_ifunlock(ip); | ||
| 3543 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 3544 | |||
| 3733 | reclaim: | 3545 | reclaim: |
| 3734 | xfs_ireclaim(ip); | 3546 | xfs_ireclaim(ip); |
| 3735 | return 0; | 3547 | return 0; |
| @@ -3845,9 +3657,8 @@ xfs_alloc_file_space( | |||
| 3845 | end_dmi_offset = offset+len; | 3657 | end_dmi_offset = offset+len; |
| 3846 | if (end_dmi_offset > ip->i_size) | 3658 | if (end_dmi_offset > ip->i_size) |
| 3847 | end_dmi_offset = ip->i_size; | 3659 | end_dmi_offset = ip->i_size; |
| 3848 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip), | 3660 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, offset, |
| 3849 | offset, end_dmi_offset - offset, | 3661 | end_dmi_offset - offset, 0, NULL); |
| 3850 | 0, NULL); | ||
| 3851 | if (error) | 3662 | if (error) |
| 3852 | return error; | 3663 | return error; |
| 3853 | } | 3664 | } |
| @@ -3956,8 +3767,8 @@ dmapi_enospc_check: | |||
| 3956 | if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 && | 3767 | if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 && |
| 3957 | DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { | 3768 | DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { |
| 3958 | error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, | 3769 | error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, |
| 3959 | XFS_ITOV(ip), DM_RIGHT_NULL, | 3770 | ip, DM_RIGHT_NULL, |
| 3960 | XFS_ITOV(ip), DM_RIGHT_NULL, | 3771 | ip, DM_RIGHT_NULL, |
| 3961 | NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ | 3772 | NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ |
| 3962 | if (error == 0) | 3773 | if (error == 0) |
| 3963 | goto retry; /* Maybe DMAPI app. has made space */ | 3774 | goto retry; /* Maybe DMAPI app. has made space */ |
| @@ -4021,7 +3832,8 @@ xfs_zero_remaining_bytes( | |||
| 4021 | XFS_BUF_READ(bp); | 3832 | XFS_BUF_READ(bp); |
| 4022 | XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock)); | 3833 | XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock)); |
| 4023 | xfsbdstrat(mp, bp); | 3834 | xfsbdstrat(mp, bp); |
| 4024 | if ((error = xfs_iowait(bp))) { | 3835 | error = xfs_iowait(bp); |
| 3836 | if (error) { | ||
| 4025 | xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", | 3837 | xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", |
| 4026 | mp, bp, XFS_BUF_ADDR(bp)); | 3838 | mp, bp, XFS_BUF_ADDR(bp)); |
| 4027 | break; | 3839 | break; |
| @@ -4033,7 +3845,8 @@ xfs_zero_remaining_bytes( | |||
| 4033 | XFS_BUF_UNREAD(bp); | 3845 | XFS_BUF_UNREAD(bp); |
| 4034 | XFS_BUF_WRITE(bp); | 3846 | XFS_BUF_WRITE(bp); |
| 4035 | xfsbdstrat(mp, bp); | 3847 | xfsbdstrat(mp, bp); |
| 4036 | if ((error = xfs_iowait(bp))) { | 3848 | error = xfs_iowait(bp); |
| 3849 | if (error) { | ||
| 4037 | xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", | 3850 | xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", |
| 4038 | mp, bp, XFS_BUF_ADDR(bp)); | 3851 | mp, bp, XFS_BUF_ADDR(bp)); |
| 4039 | break; | 3852 | break; |
| @@ -4102,7 +3915,7 @@ xfs_free_file_space( | |||
| 4102 | DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { | 3915 | DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { |
| 4103 | if (end_dmi_offset > ip->i_size) | 3916 | if (end_dmi_offset > ip->i_size) |
| 4104 | end_dmi_offset = ip->i_size; | 3917 | end_dmi_offset = ip->i_size; |
| 4105 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, | 3918 | error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, |
| 4106 | offset, end_dmi_offset - offset, | 3919 | offset, end_dmi_offset - offset, |
| 4107 | AT_DELAY_FLAG(attr_flags), NULL); | 3920 | AT_DELAY_FLAG(attr_flags), NULL); |
| 4108 | if (error) | 3921 | if (error) |
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 4e3970f0e5e3..24c53923dc2c 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h | |||
| @@ -23,31 +23,32 @@ int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start, | |||
| 23 | xfs_off_t stop); | 23 | xfs_off_t stop); |
| 24 | int xfs_release(struct xfs_inode *ip); | 24 | int xfs_release(struct xfs_inode *ip); |
| 25 | int xfs_inactive(struct xfs_inode *ip); | 25 | int xfs_inactive(struct xfs_inode *ip); |
| 26 | int xfs_lookup(struct xfs_inode *dp, bhv_vname_t *dentry, | 26 | int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, |
| 27 | bhv_vnode_t **vpp); | 27 | struct xfs_inode **ipp); |
| 28 | int xfs_create(struct xfs_inode *dp, bhv_vname_t *dentry, mode_t mode, | 28 | int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, |
| 29 | xfs_dev_t rdev, bhv_vnode_t **vpp, struct cred *credp); | 29 | xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp); |
| 30 | int xfs_remove(struct xfs_inode *dp, bhv_vname_t *dentry); | 30 | int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, |
| 31 | int xfs_link(struct xfs_inode *tdp, bhv_vnode_t *src_vp, | 31 | struct xfs_inode *ip); |
| 32 | bhv_vname_t *dentry); | 32 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, |
| 33 | int xfs_mkdir(struct xfs_inode *dp, bhv_vname_t *dentry, | 33 | struct xfs_name *target_name); |
| 34 | mode_t mode, bhv_vnode_t **vpp, struct cred *credp); | 34 | int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name, |
| 35 | int xfs_rmdir(struct xfs_inode *dp, bhv_vname_t *dentry); | 35 | mode_t mode, struct xfs_inode **ipp, struct cred *credp); |
| 36 | int xfs_rmdir(struct xfs_inode *dp, struct xfs_name *name, | ||
| 37 | struct xfs_inode *cdp); | ||
| 36 | int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, | 38 | int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, |
| 37 | xfs_off_t *offset, filldir_t filldir); | 39 | xfs_off_t *offset, filldir_t filldir); |
| 38 | int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry, | 40 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, |
| 39 | char *target_path, mode_t mode, bhv_vnode_t **vpp, | 41 | const char *target_path, mode_t mode, struct xfs_inode **ipp, |
| 40 | struct cred *credp); | 42 | struct cred *credp); |
| 41 | int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype); | ||
| 42 | void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype); | ||
| 43 | int xfs_inode_flush(struct xfs_inode *ip, int flags); | 43 | int xfs_inode_flush(struct xfs_inode *ip, int flags); |
| 44 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); | 44 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); |
| 45 | int xfs_reclaim(struct xfs_inode *ip); | 45 | int xfs_reclaim(struct xfs_inode *ip); |
| 46 | int xfs_change_file_space(struct xfs_inode *ip, int cmd, | 46 | int xfs_change_file_space(struct xfs_inode *ip, int cmd, |
| 47 | xfs_flock64_t *bf, xfs_off_t offset, | 47 | xfs_flock64_t *bf, xfs_off_t offset, |
| 48 | struct cred *credp, int attr_flags); | 48 | struct cred *credp, int attr_flags); |
| 49 | int xfs_rename(struct xfs_inode *src_dp, bhv_vname_t *src_vname, | 49 | int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, |
| 50 | bhv_vnode_t *target_dir_vp, bhv_vname_t *target_vname); | 50 | struct xfs_inode *src_ip, struct xfs_inode *target_dp, |
| 51 | struct xfs_name *target_name); | ||
| 51 | int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, | 52 | int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value, |
| 52 | int *valuelenp, int flags, cred_t *cred); | 53 | int *valuelenp, int flags, cred_t *cred); |
| 53 | int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, | 54 | int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, |
