diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 13:55:15 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 13:55:15 -0400 |
| commit | a74b81b0aff4a01e0816df5915c854fb52c5e87f (patch) | |
| tree | 98364cec6a9e0c0fd510e5fe9af46f1ddb28956b /fs/ocfs2 | |
| parent | f8d613e2a665bf1be9628a3c3f9bafe7599b32c0 (diff) | |
| parent | ece928df16494becd43f999aff9bd530182e7e81 (diff) | |
Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2
* 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (28 commits)
Ocfs2: Teach local-mounted ocfs2 to handle unwritten_extents correctly.
ocfs2/dlm: Do not migrate resource to a node that is leaving the domain
ocfs2/dlm: Add new dlm message DLM_BEGIN_EXIT_DOMAIN_MSG
Ocfs2/move_extents: Set several trivial constraints for threshold.
Ocfs2/move_extents: Let defrag handle partial extent moving.
Ocfs2/move_extents: move/defrag extents within a certain range.
Ocfs2/move_extents: helper to calculate the defraging length in one run.
Ocfs2/move_extents: move entire/partial extent.
Ocfs2/move_extents: helpers to update the group descriptor and global bitmap inode.
Ocfs2/move_extents: helper to probe a proper region to move in an alloc group.
Ocfs2/move_extents: helper to validate and adjust moving goal.
Ocfs2/move_extents: find the victim alloc group, where the given #blk fits.
Ocfs2/move_extents: defrag a range of extent.
Ocfs2/move_extents: move a range of extent.
Ocfs2/move_extents: lock allocators and reserve metadata blocks and data clusters for extents moving.
Ocfs2/move_extents: Add basic framework and source files for extent moving.
Ocfs2/move_extents: Adding new ioctl code 'OCFS2_IOC_MOVE_EXT' to ocfs2.
Ocfs2/refcounttree: Publicize couple of funcs from refcounttree.c
Ocfs2: Add a new code 'OCFS2_INFO_FREEFRAG' for o2info ioctl.
Ocfs2: Add a new code 'OCFS2_INFO_FREEINODE' for o2info ioctl.
...
Diffstat (limited to 'fs/ocfs2')
| -rw-r--r-- | fs/ocfs2/Makefile | 1 | ||||
| -rw-r--r-- | fs/ocfs2/alloc.c | 166 | ||||
| -rw-r--r-- | fs/ocfs2/alloc.h | 1 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/sys.c | 9 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmcommon.h | 14 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmdebug.c | 6 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 94 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 255 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 1 | ||||
| -rw-r--r-- | fs/ocfs2/dlmfs/dlmfs.c | 2 | ||||
| -rw-r--r-- | fs/ocfs2/file.c | 1 | ||||
| -rw-r--r-- | fs/ocfs2/ioctl.c | 492 | ||||
| -rw-r--r-- | fs/ocfs2/move_extents.c | 1153 | ||||
| -rw-r--r-- | fs/ocfs2/move_extents.h | 22 | ||||
| -rw-r--r-- | fs/ocfs2/ocfs2_ioctl.h | 68 | ||||
| -rw-r--r-- | fs/ocfs2/ocfs2_trace.h | 25 | ||||
| -rw-r--r-- | fs/ocfs2/refcounttree.c | 58 | ||||
| -rw-r--r-- | fs/ocfs2/refcounttree.h | 11 | ||||
| -rw-r--r-- | fs/ocfs2/super.c | 2 |
19 files changed, 2135 insertions, 246 deletions
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index d8a0313e99e6..f17e58b32989 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
| @@ -30,6 +30,7 @@ ocfs2-objs := \ | |||
| 30 | namei.o \ | 30 | namei.o \ |
| 31 | refcounttree.o \ | 31 | refcounttree.o \ |
| 32 | reservations.o \ | 32 | reservations.o \ |
| 33 | move_extents.o \ | ||
| 33 | resize.o \ | 34 | resize.o \ |
| 34 | slot_map.o \ | 35 | slot_map.o \ |
| 35 | suballoc.o \ | 36 | suballoc.o \ |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 48aa9c7401c7..ed553c60de82 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
| 30 | #include <linux/swap.h> | 30 | #include <linux/swap.h> |
| 31 | #include <linux/quotaops.h> | 31 | #include <linux/quotaops.h> |
| 32 | #include <linux/blkdev.h> | ||
| 32 | 33 | ||
| 33 | #include <cluster/masklog.h> | 34 | #include <cluster/masklog.h> |
| 34 | 35 | ||
| @@ -7184,3 +7185,168 @@ out_commit: | |||
| 7184 | out: | 7185 | out: |
| 7185 | return ret; | 7186 | return ret; |
| 7186 | } | 7187 | } |
| 7188 | |||
| 7189 | static int ocfs2_trim_extent(struct super_block *sb, | ||
| 7190 | struct ocfs2_group_desc *gd, | ||
| 7191 | u32 start, u32 count) | ||
| 7192 | { | ||
| 7193 | u64 discard, bcount; | ||
| 7194 | |||
| 7195 | bcount = ocfs2_clusters_to_blocks(sb, count); | ||
| 7196 | discard = le64_to_cpu(gd->bg_blkno) + | ||
| 7197 | ocfs2_clusters_to_blocks(sb, start); | ||
| 7198 | |||
| 7199 | trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount); | ||
| 7200 | |||
| 7201 | return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0); | ||
| 7202 | } | ||
| 7203 | |||
| 7204 | static int ocfs2_trim_group(struct super_block *sb, | ||
| 7205 | struct ocfs2_group_desc *gd, | ||
| 7206 | u32 start, u32 max, u32 minbits) | ||
| 7207 | { | ||
| 7208 | int ret = 0, count = 0, next; | ||
| 7209 | void *bitmap = gd->bg_bitmap; | ||
| 7210 | |||
| 7211 | if (le16_to_cpu(gd->bg_free_bits_count) < minbits) | ||
| 7212 | return 0; | ||
| 7213 | |||
| 7214 | trace_ocfs2_trim_group((unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
| 7215 | start, max, minbits); | ||
| 7216 | |||
| 7217 | while (start < max) { | ||
| 7218 | start = ocfs2_find_next_zero_bit(bitmap, max, start); | ||
| 7219 | if (start >= max) | ||
| 7220 | break; | ||
| 7221 | next = ocfs2_find_next_bit(bitmap, max, start); | ||
| 7222 | |||
| 7223 | if ((next - start) >= minbits) { | ||
| 7224 | ret = ocfs2_trim_extent(sb, gd, | ||
| 7225 | start, next - start); | ||
| 7226 | if (ret < 0) { | ||
| 7227 | mlog_errno(ret); | ||
| 7228 | break; | ||
| 7229 | } | ||
| 7230 | count += next - start; | ||
| 7231 | } | ||
| 7232 | start = next + 1; | ||
| 7233 | |||
| 7234 | if (fatal_signal_pending(current)) { | ||
| 7235 | count = -ERESTARTSYS; | ||
| 7236 | break; | ||
| 7237 | } | ||
| 7238 | |||
| 7239 | if ((le16_to_cpu(gd->bg_free_bits_count) - count) < minbits) | ||
| 7240 | break; | ||
| 7241 | } | ||
| 7242 | |||
| 7243 | if (ret < 0) | ||
| 7244 | count = ret; | ||
| 7245 | |||
| 7246 | return count; | ||
| 7247 | } | ||
| 7248 | |||
| 7249 | int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) | ||
| 7250 | { | ||
| 7251 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
| 7252 | u64 start, len, trimmed, first_group, last_group, group; | ||
| 7253 | int ret, cnt; | ||
| 7254 | u32 first_bit, last_bit, minlen; | ||
| 7255 | struct buffer_head *main_bm_bh = NULL; | ||
| 7256 | struct inode *main_bm_inode = NULL; | ||
| 7257 | struct buffer_head *gd_bh = NULL; | ||
| 7258 | struct ocfs2_dinode *main_bm; | ||
| 7259 | struct ocfs2_group_desc *gd = NULL; | ||
| 7260 | |||
| 7261 | start = range->start >> osb->s_clustersize_bits; | ||
| 7262 | len = range->len >> osb->s_clustersize_bits; | ||
| 7263 | minlen = range->minlen >> osb->s_clustersize_bits; | ||
| 7264 | trimmed = 0; | ||
| 7265 | |||
| 7266 | if (!len) { | ||
| 7267 | range->len = 0; | ||
| 7268 | return 0; | ||
| 7269 | } | ||
| 7270 | |||
| 7271 | if (minlen >= osb->bitmap_cpg) | ||
| 7272 | return -EINVAL; | ||
| 7273 | |||
| 7274 | main_bm_inode = ocfs2_get_system_file_inode(osb, | ||
| 7275 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
| 7276 | OCFS2_INVALID_SLOT); | ||
| 7277 | if (!main_bm_inode) { | ||
| 7278 | ret = -EIO; | ||
| 7279 | mlog_errno(ret); | ||
| 7280 | goto out; | ||
| 7281 | } | ||
| 7282 | |||
| 7283 | mutex_lock(&main_bm_inode->i_mutex); | ||
| 7284 | |||
| 7285 | ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0); | ||
| 7286 | if (ret < 0) { | ||
| 7287 | mlog_errno(ret); | ||
| 7288 | goto out_mutex; | ||
| 7289 | } | ||
| 7290 | main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data; | ||
| 7291 | |||
| 7292 | if (start >= le32_to_cpu(main_bm->i_clusters)) { | ||
| 7293 | ret = -EINVAL; | ||
| 7294 | goto out_unlock; | ||
| 7295 | } | ||
| 7296 | |||
| 7297 | if (start + len > le32_to_cpu(main_bm->i_clusters)) | ||
| 7298 | len = le32_to_cpu(main_bm->i_clusters) - start; | ||
| 7299 | |||
| 7300 | trace_ocfs2_trim_fs(start, len, minlen); | ||
| 7301 | |||
| 7302 | /* Determine first and last group to examine based on start and len */ | ||
| 7303 | first_group = ocfs2_which_cluster_group(main_bm_inode, start); | ||
| 7304 | if (first_group == osb->first_cluster_group_blkno) | ||
| 7305 | first_bit = start; | ||
| 7306 | else | ||
| 7307 | first_bit = start - ocfs2_blocks_to_clusters(sb, first_group); | ||
| 7308 | last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1); | ||
| 7309 | last_bit = osb->bitmap_cpg; | ||
| 7310 | |||
| 7311 | for (group = first_group; group <= last_group;) { | ||
| 7312 | if (first_bit + len >= osb->bitmap_cpg) | ||
| 7313 | last_bit = osb->bitmap_cpg; | ||
| 7314 | else | ||
| 7315 | last_bit = first_bit + len; | ||
| 7316 | |||
| 7317 | ret = ocfs2_read_group_descriptor(main_bm_inode, | ||
| 7318 | main_bm, group, | ||
| 7319 | &gd_bh); | ||
| 7320 | if (ret < 0) { | ||
| 7321 | mlog_errno(ret); | ||
| 7322 | break; | ||
| 7323 | } | ||
| 7324 | |||
| 7325 | gd = (struct ocfs2_group_desc *)gd_bh->b_data; | ||
| 7326 | cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen); | ||
| 7327 | brelse(gd_bh); | ||
| 7328 | gd_bh = NULL; | ||
| 7329 | if (cnt < 0) { | ||
| 7330 | ret = cnt; | ||
| 7331 | mlog_errno(ret); | ||
| 7332 | break; | ||
| 7333 | } | ||
| 7334 | |||
| 7335 | trimmed += cnt; | ||
| 7336 | len -= osb->bitmap_cpg - first_bit; | ||
| 7337 | first_bit = 0; | ||
| 7338 | if (group == osb->first_cluster_group_blkno) | ||
| 7339 | group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg); | ||
| 7340 | else | ||
| 7341 | group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg); | ||
| 7342 | } | ||
| 7343 | range->len = trimmed * sb->s_blocksize; | ||
| 7344 | out_unlock: | ||
| 7345 | ocfs2_inode_unlock(main_bm_inode, 0); | ||
| 7346 | brelse(main_bm_bh); | ||
| 7347 | out_mutex: | ||
| 7348 | mutex_unlock(&main_bm_inode->i_mutex); | ||
| 7349 | iput(main_bm_inode); | ||
| 7350 | out: | ||
| 7351 | return ret; | ||
| 7352 | } | ||
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 3bd08a03251c..ca381c584127 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
| @@ -239,6 +239,7 @@ int ocfs2_find_leaf(struct ocfs2_caching_info *ci, | |||
| 239 | struct buffer_head **leaf_bh); | 239 | struct buffer_head **leaf_bh); |
| 240 | int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); | 240 | int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); |
| 241 | 241 | ||
| 242 | int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range); | ||
| 242 | /* | 243 | /* |
| 243 | * Helper function to look at the # of clusters in an extent record. | 244 | * Helper function to look at the # of clusters in an extent record. |
| 244 | */ | 245 | */ |
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index bc702dab5d1f..a4b07730b2e1 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c | |||
| @@ -57,7 +57,6 @@ static struct kset *o2cb_kset; | |||
| 57 | void o2cb_sys_shutdown(void) | 57 | void o2cb_sys_shutdown(void) |
| 58 | { | 58 | { |
| 59 | mlog_sys_shutdown(); | 59 | mlog_sys_shutdown(); |
| 60 | sysfs_remove_link(NULL, "o2cb"); | ||
| 61 | kset_unregister(o2cb_kset); | 60 | kset_unregister(o2cb_kset); |
| 62 | } | 61 | } |
| 63 | 62 | ||
| @@ -69,14 +68,6 @@ int o2cb_sys_init(void) | |||
| 69 | if (!o2cb_kset) | 68 | if (!o2cb_kset) |
| 70 | return -ENOMEM; | 69 | return -ENOMEM; |
| 71 | 70 | ||
| 72 | /* | ||
| 73 | * Create this symlink for backwards compatibility with old | ||
| 74 | * versions of ocfs2-tools which look for things in /sys/o2cb. | ||
| 75 | */ | ||
| 76 | ret = sysfs_create_link(NULL, &o2cb_kset->kobj, "o2cb"); | ||
| 77 | if (ret) | ||
| 78 | goto error; | ||
| 79 | |||
| 80 | ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); | 71 | ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); |
| 81 | if (ret) | 72 | if (ret) |
| 82 | goto error; | 73 | goto error; |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 4bdf7baee344..d602abb51b61 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
| @@ -144,6 +144,7 @@ struct dlm_ctxt | |||
| 144 | wait_queue_head_t dlm_join_events; | 144 | wait_queue_head_t dlm_join_events; |
| 145 | unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 145 | unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 146 | unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 146 | unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 147 | unsigned long exit_domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 147 | unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 148 | unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 148 | struct dlm_recovery_ctxt reco; | 149 | struct dlm_recovery_ctxt reco; |
| 149 | spinlock_t master_lock; | 150 | spinlock_t master_lock; |
| @@ -401,6 +402,18 @@ static inline int dlm_lvb_is_empty(char *lvb) | |||
| 401 | return 1; | 402 | return 1; |
| 402 | } | 403 | } |
| 403 | 404 | ||
| 405 | static inline char *dlm_list_in_text(enum dlm_lockres_list idx) | ||
| 406 | { | ||
| 407 | if (idx == DLM_GRANTED_LIST) | ||
| 408 | return "granted"; | ||
| 409 | else if (idx == DLM_CONVERTING_LIST) | ||
| 410 | return "converting"; | ||
| 411 | else if (idx == DLM_BLOCKED_LIST) | ||
| 412 | return "blocked"; | ||
| 413 | else | ||
| 414 | return "unknown"; | ||
| 415 | } | ||
| 416 | |||
| 404 | static inline struct list_head * | 417 | static inline struct list_head * |
| 405 | dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) | 418 | dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) |
| 406 | { | 419 | { |
| @@ -448,6 +461,7 @@ enum { | |||
| 448 | DLM_FINALIZE_RECO_MSG = 518, | 461 | DLM_FINALIZE_RECO_MSG = 518, |
| 449 | DLM_QUERY_REGION = 519, | 462 | DLM_QUERY_REGION = 519, |
| 450 | DLM_QUERY_NODEINFO = 520, | 463 | DLM_QUERY_NODEINFO = 520, |
| 464 | DLM_BEGIN_EXIT_DOMAIN_MSG = 521, | ||
| 451 | }; | 465 | }; |
| 452 | 466 | ||
| 453 | struct dlm_reco_node_data | 467 | struct dlm_reco_node_data |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 04a32be0aeb9..56f82cb912e3 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
| @@ -756,6 +756,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) | |||
| 756 | buf + out, len - out); | 756 | buf + out, len - out); |
| 757 | out += snprintf(buf + out, len - out, "\n"); | 757 | out += snprintf(buf + out, len - out, "\n"); |
| 758 | 758 | ||
| 759 | /* Exit Domain Map: xx xx xx */ | ||
| 760 | out += snprintf(buf + out, len - out, "Exit Domain Map: "); | ||
| 761 | out += stringify_nodemap(dlm->exit_domain_map, O2NM_MAX_NODES, | ||
| 762 | buf + out, len - out); | ||
| 763 | out += snprintf(buf + out, len - out, "\n"); | ||
| 764 | |||
| 759 | /* Live Map: xx xx xx */ | 765 | /* Live Map: xx xx xx */ |
| 760 | out += snprintf(buf + out, len - out, "Live Map: "); | 766 | out += snprintf(buf + out, len - out, "Live Map: "); |
| 761 | out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, | 767 | out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 3b179d6cbde0..6ed6b95dcf93 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
| @@ -132,10 +132,12 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); | |||
| 132 | * New in version 1.1: | 132 | * New in version 1.1: |
| 133 | * - Message DLM_QUERY_REGION added to support global heartbeat | 133 | * - Message DLM_QUERY_REGION added to support global heartbeat |
| 134 | * - Message DLM_QUERY_NODEINFO added to allow online node removes | 134 | * - Message DLM_QUERY_NODEINFO added to allow online node removes |
| 135 | * New in version 1.2: | ||
| 136 | * - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain | ||
| 135 | */ | 137 | */ |
| 136 | static const struct dlm_protocol_version dlm_protocol = { | 138 | static const struct dlm_protocol_version dlm_protocol = { |
| 137 | .pv_major = 1, | 139 | .pv_major = 1, |
| 138 | .pv_minor = 1, | 140 | .pv_minor = 2, |
| 139 | }; | 141 | }; |
| 140 | 142 | ||
| 141 | #define DLM_DOMAIN_BACKOFF_MS 200 | 143 | #define DLM_DOMAIN_BACKOFF_MS 200 |
| @@ -449,14 +451,18 @@ redo_bucket: | |||
| 449 | dropped = dlm_empty_lockres(dlm, res); | 451 | dropped = dlm_empty_lockres(dlm, res); |
| 450 | 452 | ||
| 451 | spin_lock(&res->spinlock); | 453 | spin_lock(&res->spinlock); |
| 452 | __dlm_lockres_calc_usage(dlm, res); | 454 | if (dropped) |
| 453 | iter = res->hash_node.next; | 455 | __dlm_lockres_calc_usage(dlm, res); |
| 456 | else | ||
| 457 | iter = res->hash_node.next; | ||
| 454 | spin_unlock(&res->spinlock); | 458 | spin_unlock(&res->spinlock); |
| 455 | 459 | ||
| 456 | dlm_lockres_put(res); | 460 | dlm_lockres_put(res); |
| 457 | 461 | ||
| 458 | if (dropped) | 462 | if (dropped) { |
| 463 | cond_resched_lock(&dlm->spinlock); | ||
| 459 | goto redo_bucket; | 464 | goto redo_bucket; |
| 465 | } | ||
| 460 | } | 466 | } |
| 461 | cond_resched_lock(&dlm->spinlock); | 467 | cond_resched_lock(&dlm->spinlock); |
| 462 | num += n; | 468 | num += n; |
| @@ -486,6 +492,28 @@ static int dlm_no_joining_node(struct dlm_ctxt *dlm) | |||
| 486 | return ret; | 492 | return ret; |
| 487 | } | 493 | } |
| 488 | 494 | ||
| 495 | static int dlm_begin_exit_domain_handler(struct o2net_msg *msg, u32 len, | ||
| 496 | void *data, void **ret_data) | ||
| 497 | { | ||
| 498 | struct dlm_ctxt *dlm = data; | ||
| 499 | unsigned int node; | ||
| 500 | struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; | ||
| 501 | |||
| 502 | if (!dlm_grab(dlm)) | ||
| 503 | return 0; | ||
| 504 | |||
| 505 | node = exit_msg->node_idx; | ||
| 506 | mlog(0, "%s: Node %u sent a begin exit domain message\n", dlm->name, node); | ||
| 507 | |||
| 508 | spin_lock(&dlm->spinlock); | ||
| 509 | set_bit(node, dlm->exit_domain_map); | ||
| 510 | spin_unlock(&dlm->spinlock); | ||
| 511 | |||
| 512 | dlm_put(dlm); | ||
| 513 | |||
| 514 | return 0; | ||
| 515 | } | ||
| 516 | |||
| 489 | static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm) | 517 | static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm) |
| 490 | { | 518 | { |
| 491 | /* Yikes, a double spinlock! I need domain_lock for the dlm | 519 | /* Yikes, a double spinlock! I need domain_lock for the dlm |
| @@ -542,6 +570,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 542 | 570 | ||
| 543 | spin_lock(&dlm->spinlock); | 571 | spin_lock(&dlm->spinlock); |
| 544 | clear_bit(node, dlm->domain_map); | 572 | clear_bit(node, dlm->domain_map); |
| 573 | clear_bit(node, dlm->exit_domain_map); | ||
| 545 | __dlm_print_nodes(dlm); | 574 | __dlm_print_nodes(dlm); |
| 546 | 575 | ||
| 547 | /* notify anything attached to the heartbeat events */ | 576 | /* notify anything attached to the heartbeat events */ |
| @@ -554,29 +583,56 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 554 | return 0; | 583 | return 0; |
| 555 | } | 584 | } |
| 556 | 585 | ||
| 557 | static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, | 586 | static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, u32 msg_type, |
| 558 | unsigned int node) | 587 | unsigned int node) |
| 559 | { | 588 | { |
| 560 | int status; | 589 | int status; |
| 561 | struct dlm_exit_domain leave_msg; | 590 | struct dlm_exit_domain leave_msg; |
| 562 | 591 | ||
| 563 | mlog(0, "Asking node %u if we can leave the domain %s me = %u\n", | 592 | mlog(0, "%s: Sending domain exit message %u to node %u\n", dlm->name, |
| 564 | node, dlm->name, dlm->node_num); | 593 | msg_type, node); |
| 565 | 594 | ||
| 566 | memset(&leave_msg, 0, sizeof(leave_msg)); | 595 | memset(&leave_msg, 0, sizeof(leave_msg)); |
| 567 | leave_msg.node_idx = dlm->node_num; | 596 | leave_msg.node_idx = dlm->node_num; |
| 568 | 597 | ||
| 569 | status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key, | 598 | status = o2net_send_message(msg_type, dlm->key, &leave_msg, |
| 570 | &leave_msg, sizeof(leave_msg), node, | 599 | sizeof(leave_msg), node, NULL); |
| 571 | NULL); | ||
| 572 | if (status < 0) | 600 | if (status < 0) |
| 573 | mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " | 601 | mlog(ML_ERROR, "Error %d sending domain exit message %u " |
| 574 | "node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node); | 602 | "to node %u on domain %s\n", status, msg_type, node, |
| 575 | mlog(0, "status return %d from o2net_send_message\n", status); | 603 | dlm->name); |
| 576 | 604 | ||
| 577 | return status; | 605 | return status; |
| 578 | } | 606 | } |
| 579 | 607 | ||
| 608 | static void dlm_begin_exit_domain(struct dlm_ctxt *dlm) | ||
| 609 | { | ||
| 610 | int node = -1; | ||
| 611 | |||
| 612 | /* Support for begin exit domain was added in 1.2 */ | ||
| 613 | if (dlm->dlm_locking_proto.pv_major == 1 && | ||
| 614 | dlm->dlm_locking_proto.pv_minor < 2) | ||
| 615 | return; | ||
| 616 | |||
| 617 | /* | ||
| 618 | * Unlike DLM_EXIT_DOMAIN_MSG, DLM_BEGIN_EXIT_DOMAIN_MSG is purely | ||
| 619 | * informational. Meaning if a node does not receive the message, | ||
| 620 | * so be it. | ||
| 621 | */ | ||
| 622 | spin_lock(&dlm->spinlock); | ||
| 623 | while (1) { | ||
| 624 | node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, node + 1); | ||
| 625 | if (node >= O2NM_MAX_NODES) | ||
| 626 | break; | ||
| 627 | if (node == dlm->node_num) | ||
| 628 | continue; | ||
| 629 | |||
| 630 | spin_unlock(&dlm->spinlock); | ||
| 631 | dlm_send_one_domain_exit(dlm, DLM_BEGIN_EXIT_DOMAIN_MSG, node); | ||
| 632 | spin_lock(&dlm->spinlock); | ||
| 633 | } | ||
| 634 | spin_unlock(&dlm->spinlock); | ||
| 635 | } | ||
| 580 | 636 | ||
| 581 | static void dlm_leave_domain(struct dlm_ctxt *dlm) | 637 | static void dlm_leave_domain(struct dlm_ctxt *dlm) |
| 582 | { | 638 | { |
| @@ -602,7 +658,8 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm) | |||
| 602 | 658 | ||
| 603 | clear_node = 1; | 659 | clear_node = 1; |
| 604 | 660 | ||
| 605 | status = dlm_send_one_domain_exit(dlm, node); | 661 | status = dlm_send_one_domain_exit(dlm, DLM_EXIT_DOMAIN_MSG, |
| 662 | node); | ||
| 606 | if (status < 0 && | 663 | if (status < 0 && |
| 607 | status != -ENOPROTOOPT && | 664 | status != -ENOPROTOOPT && |
| 608 | status != -ENOTCONN) { | 665 | status != -ENOTCONN) { |
| @@ -677,6 +734,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
| 677 | 734 | ||
| 678 | if (leave) { | 735 | if (leave) { |
| 679 | mlog(0, "shutting down domain %s\n", dlm->name); | 736 | mlog(0, "shutting down domain %s\n", dlm->name); |
| 737 | dlm_begin_exit_domain(dlm); | ||
| 680 | 738 | ||
| 681 | /* We changed dlm state, notify the thread */ | 739 | /* We changed dlm state, notify the thread */ |
| 682 | dlm_kick_thread(dlm, NULL); | 740 | dlm_kick_thread(dlm, NULL); |
| @@ -909,6 +967,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 909 | * leftover join state. */ | 967 | * leftover join state. */ |
| 910 | BUG_ON(dlm->joining_node != assert->node_idx); | 968 | BUG_ON(dlm->joining_node != assert->node_idx); |
| 911 | set_bit(assert->node_idx, dlm->domain_map); | 969 | set_bit(assert->node_idx, dlm->domain_map); |
| 970 | clear_bit(assert->node_idx, dlm->exit_domain_map); | ||
| 912 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); | 971 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); |
| 913 | 972 | ||
| 914 | printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n", | 973 | printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n", |
| @@ -1793,6 +1852,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) | |||
| 1793 | if (status) | 1852 | if (status) |
| 1794 | goto bail; | 1853 | goto bail; |
| 1795 | 1854 | ||
| 1855 | status = o2net_register_handler(DLM_BEGIN_EXIT_DOMAIN_MSG, dlm->key, | ||
| 1856 | sizeof(struct dlm_exit_domain), | ||
| 1857 | dlm_begin_exit_domain_handler, | ||
| 1858 | dlm, NULL, &dlm->dlm_domain_handlers); | ||
| 1859 | if (status) | ||
| 1860 | goto bail; | ||
| 1861 | |||
| 1796 | bail: | 1862 | bail: |
| 1797 | if (status) | 1863 | if (status) |
| 1798 | dlm_unregister_domain_handlers(dlm); | 1864 | dlm_unregister_domain_handlers(dlm); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 84d166328cf7..11eefb8c12e9 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -2339,65 +2339,55 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) | |||
| 2339 | dlm_lockres_put(res); | 2339 | dlm_lockres_put(res); |
| 2340 | } | 2340 | } |
| 2341 | 2341 | ||
| 2342 | /* Checks whether the lockres can be migrated. Returns 0 if yes, < 0 | 2342 | /* |
| 2343 | * if not. If 0, numlocks is set to the number of locks in the lockres. | 2343 | * A migrateable resource is one that is : |
| 2344 | * 1. locally mastered, and, | ||
| 2345 | * 2. zero local locks, and, | ||
| 2346 | * 3. one or more non-local locks, or, one or more references | ||
| 2347 | * Returns 1 if yes, 0 if not. | ||
| 2344 | */ | 2348 | */ |
| 2345 | static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, | 2349 | static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, |
| 2346 | struct dlm_lock_resource *res, | 2350 | struct dlm_lock_resource *res) |
| 2347 | int *numlocks, | ||
| 2348 | int *hasrefs) | ||
| 2349 | { | 2351 | { |
| 2350 | int ret; | 2352 | enum dlm_lockres_list idx; |
| 2351 | int i; | 2353 | int nonlocal = 0, node_ref; |
| 2352 | int count = 0; | ||
| 2353 | struct list_head *queue; | 2354 | struct list_head *queue; |
| 2354 | struct dlm_lock *lock; | 2355 | struct dlm_lock *lock; |
| 2356 | u64 cookie; | ||
| 2355 | 2357 | ||
| 2356 | assert_spin_locked(&res->spinlock); | 2358 | assert_spin_locked(&res->spinlock); |
| 2357 | 2359 | ||
| 2358 | *numlocks = 0; | 2360 | if (res->owner != dlm->node_num) |
| 2359 | *hasrefs = 0; | 2361 | return 0; |
| 2360 | |||
| 2361 | ret = -EINVAL; | ||
| 2362 | if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { | ||
| 2363 | mlog(0, "cannot migrate lockres with unknown owner!\n"); | ||
| 2364 | goto leave; | ||
| 2365 | } | ||
| 2366 | |||
| 2367 | if (res->owner != dlm->node_num) { | ||
| 2368 | mlog(0, "cannot migrate lockres this node doesn't own!\n"); | ||
| 2369 | goto leave; | ||
| 2370 | } | ||
| 2371 | 2362 | ||
| 2372 | ret = 0; | 2363 | for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { |
| 2373 | queue = &res->granted; | 2364 | queue = dlm_list_idx_to_ptr(res, idx); |
| 2374 | for (i = 0; i < 3; i++) { | ||
| 2375 | list_for_each_entry(lock, queue, list) { | 2365 | list_for_each_entry(lock, queue, list) { |
| 2376 | ++count; | 2366 | if (lock->ml.node != dlm->node_num) { |
| 2377 | if (lock->ml.node == dlm->node_num) { | 2367 | nonlocal++; |
| 2378 | mlog(0, "found a lock owned by this node still " | 2368 | continue; |
| 2379 | "on the %s queue! will not migrate this " | ||
| 2380 | "lockres\n", (i == 0 ? "granted" : | ||
| 2381 | (i == 1 ? "converting" : | ||
| 2382 | "blocked"))); | ||
| 2383 | ret = -ENOTEMPTY; | ||
| 2384 | goto leave; | ||
| 2385 | } | 2369 | } |
| 2370 | cookie = be64_to_cpu(lock->ml.cookie); | ||
| 2371 | mlog(0, "%s: Not migrateable res %.*s, lock %u:%llu on " | ||
| 2372 | "%s list\n", dlm->name, res->lockname.len, | ||
| 2373 | res->lockname.name, | ||
| 2374 | dlm_get_lock_cookie_node(cookie), | ||
| 2375 | dlm_get_lock_cookie_seq(cookie), | ||
| 2376 | dlm_list_in_text(idx)); | ||
| 2377 | return 0; | ||
| 2386 | } | 2378 | } |
| 2387 | queue++; | ||
| 2388 | } | 2379 | } |
| 2389 | 2380 | ||
| 2390 | *numlocks = count; | 2381 | if (!nonlocal) { |
| 2391 | 2382 | node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | |
| 2392 | count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | 2383 | if (node_ref >= O2NM_MAX_NODES) |
| 2393 | if (count < O2NM_MAX_NODES) | 2384 | return 0; |
| 2394 | *hasrefs = 1; | 2385 | } |
| 2395 | 2386 | ||
| 2396 | mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name, | 2387 | mlog(0, "%s: res %.*s, Migrateable\n", dlm->name, res->lockname.len, |
| 2397 | res->lockname.len, res->lockname.name, *numlocks, *hasrefs); | 2388 | res->lockname.name); |
| 2398 | 2389 | ||
| 2399 | leave: | 2390 | return 1; |
| 2400 | return ret; | ||
| 2401 | } | 2391 | } |
| 2402 | 2392 | ||
| 2403 | /* | 2393 | /* |
| @@ -2406,8 +2396,7 @@ leave: | |||
| 2406 | 2396 | ||
| 2407 | 2397 | ||
| 2408 | static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | 2398 | static int dlm_migrate_lockres(struct dlm_ctxt *dlm, |
| 2409 | struct dlm_lock_resource *res, | 2399 | struct dlm_lock_resource *res, u8 target) |
| 2410 | u8 target) | ||
| 2411 | { | 2400 | { |
| 2412 | struct dlm_master_list_entry *mle = NULL; | 2401 | struct dlm_master_list_entry *mle = NULL; |
| 2413 | struct dlm_master_list_entry *oldmle = NULL; | 2402 | struct dlm_master_list_entry *oldmle = NULL; |
| @@ -2416,37 +2405,20 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | |||
| 2416 | const char *name; | 2405 | const char *name; |
| 2417 | unsigned int namelen; | 2406 | unsigned int namelen; |
| 2418 | int mle_added = 0; | 2407 | int mle_added = 0; |
| 2419 | int numlocks, hasrefs; | ||
| 2420 | int wake = 0; | 2408 | int wake = 0; |
| 2421 | 2409 | ||
| 2422 | if (!dlm_grab(dlm)) | 2410 | if (!dlm_grab(dlm)) |
| 2423 | return -EINVAL; | 2411 | return -EINVAL; |
| 2424 | 2412 | ||
| 2413 | BUG_ON(target == O2NM_MAX_NODES); | ||
| 2414 | |||
| 2425 | name = res->lockname.name; | 2415 | name = res->lockname.name; |
| 2426 | namelen = res->lockname.len; | 2416 | namelen = res->lockname.len; |
| 2427 | 2417 | ||
| 2428 | mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target); | 2418 | mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name, |
| 2429 | 2419 | target); | |
| 2430 | /* | ||
| 2431 | * ensure this lockres is a proper candidate for migration | ||
| 2432 | */ | ||
| 2433 | spin_lock(&res->spinlock); | ||
| 2434 | ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs); | ||
| 2435 | if (ret < 0) { | ||
| 2436 | spin_unlock(&res->spinlock); | ||
| 2437 | goto leave; | ||
| 2438 | } | ||
| 2439 | spin_unlock(&res->spinlock); | ||
| 2440 | |||
| 2441 | /* no work to do */ | ||
| 2442 | if (numlocks == 0 && !hasrefs) | ||
| 2443 | goto leave; | ||
| 2444 | |||
| 2445 | /* | ||
| 2446 | * preallocate up front | ||
| 2447 | * if this fails, abort | ||
| 2448 | */ | ||
| 2449 | 2420 | ||
| 2421 | /* preallocate up front. if this fails, abort */ | ||
| 2450 | ret = -ENOMEM; | 2422 | ret = -ENOMEM; |
| 2451 | mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); | 2423 | mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); |
| 2452 | if (!mres) { | 2424 | if (!mres) { |
| @@ -2462,35 +2434,10 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | |||
| 2462 | ret = 0; | 2434 | ret = 0; |
| 2463 | 2435 | ||
| 2464 | /* | 2436 | /* |
| 2465 | * find a node to migrate the lockres to | ||
| 2466 | */ | ||
| 2467 | |||
| 2468 | spin_lock(&dlm->spinlock); | ||
| 2469 | /* pick a new node */ | ||
| 2470 | if (!test_bit(target, dlm->domain_map) || | ||
| 2471 | target >= O2NM_MAX_NODES) { | ||
| 2472 | target = dlm_pick_migration_target(dlm, res); | ||
| 2473 | } | ||
| 2474 | mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name, | ||
| 2475 | namelen, name, target); | ||
| 2476 | |||
| 2477 | if (target >= O2NM_MAX_NODES || | ||
| 2478 | !test_bit(target, dlm->domain_map)) { | ||
| 2479 | /* target chosen is not alive */ | ||
| 2480 | ret = -EINVAL; | ||
| 2481 | } | ||
| 2482 | |||
| 2483 | if (ret) { | ||
| 2484 | spin_unlock(&dlm->spinlock); | ||
| 2485 | goto fail; | ||
| 2486 | } | ||
| 2487 | |||
| 2488 | mlog(0, "continuing with target = %u\n", target); | ||
| 2489 | |||
| 2490 | /* | ||
| 2491 | * clear any existing master requests and | 2437 | * clear any existing master requests and |
| 2492 | * add the migration mle to the list | 2438 | * add the migration mle to the list |
| 2493 | */ | 2439 | */ |
| 2440 | spin_lock(&dlm->spinlock); | ||
| 2494 | spin_lock(&dlm->master_lock); | 2441 | spin_lock(&dlm->master_lock); |
| 2495 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, | 2442 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, |
| 2496 | namelen, target, dlm->node_num); | 2443 | namelen, target, dlm->node_num); |
| @@ -2531,6 +2478,7 @@ fail: | |||
| 2531 | dlm_put_mle(mle); | 2478 | dlm_put_mle(mle); |
| 2532 | } else if (mle) { | 2479 | } else if (mle) { |
| 2533 | kmem_cache_free(dlm_mle_cache, mle); | 2480 | kmem_cache_free(dlm_mle_cache, mle); |
| 2481 | mle = NULL; | ||
| 2534 | } | 2482 | } |
| 2535 | goto leave; | 2483 | goto leave; |
| 2536 | } | 2484 | } |
| @@ -2652,69 +2600,52 @@ leave: | |||
| 2652 | if (wake) | 2600 | if (wake) |
| 2653 | wake_up(&res->wq); | 2601 | wake_up(&res->wq); |
| 2654 | 2602 | ||
| 2655 | /* TODO: cleanup */ | ||
| 2656 | if (mres) | 2603 | if (mres) |
| 2657 | free_page((unsigned long)mres); | 2604 | free_page((unsigned long)mres); |
| 2658 | 2605 | ||
| 2659 | dlm_put(dlm); | 2606 | dlm_put(dlm); |
| 2660 | 2607 | ||
| 2661 | mlog(0, "returning %d\n", ret); | 2608 | mlog(0, "%s: Migrating %.*s to %u, returns %d\n", dlm->name, namelen, |
| 2609 | name, target, ret); | ||
| 2662 | return ret; | 2610 | return ret; |
| 2663 | } | 2611 | } |
| 2664 | 2612 | ||
| 2665 | #define DLM_MIGRATION_RETRY_MS 100 | 2613 | #define DLM_MIGRATION_RETRY_MS 100 |
| 2666 | 2614 | ||
| 2667 | /* Should be called only after beginning the domain leave process. | 2615 | /* |
| 2616 | * Should be called only after beginning the domain leave process. | ||
| 2668 | * There should not be any remaining locks on nonlocal lock resources, | 2617 | * There should not be any remaining locks on nonlocal lock resources, |
| 2669 | * and there should be no local locks left on locally mastered resources. | 2618 | * and there should be no local locks left on locally mastered resources. |
| 2670 | * | 2619 | * |
| 2671 | * Called with the dlm spinlock held, may drop it to do migration, but | 2620 | * Called with the dlm spinlock held, may drop it to do migration, but |
| 2672 | * will re-acquire before exit. | 2621 | * will re-acquire before exit. |
| 2673 | * | 2622 | * |
| 2674 | * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped */ | 2623 | * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped |
| 2624 | */ | ||
| 2675 | int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | 2625 | int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) |
| 2676 | { | 2626 | { |
| 2677 | int ret; | 2627 | int ret; |
| 2678 | int lock_dropped = 0; | 2628 | int lock_dropped = 0; |
| 2679 | int numlocks, hasrefs; | 2629 | u8 target = O2NM_MAX_NODES; |
| 2630 | |||
| 2631 | assert_spin_locked(&dlm->spinlock); | ||
| 2680 | 2632 | ||
| 2681 | spin_lock(&res->spinlock); | 2633 | spin_lock(&res->spinlock); |
| 2682 | if (res->owner != dlm->node_num) { | 2634 | if (dlm_is_lockres_migrateable(dlm, res)) |
| 2683 | if (!__dlm_lockres_unused(res)) { | 2635 | target = dlm_pick_migration_target(dlm, res); |
| 2684 | mlog(ML_ERROR, "%s:%.*s: this node is not master, " | 2636 | spin_unlock(&res->spinlock); |
| 2685 | "trying to free this but locks remain\n", | ||
| 2686 | dlm->name, res->lockname.len, res->lockname.name); | ||
| 2687 | } | ||
| 2688 | spin_unlock(&res->spinlock); | ||
| 2689 | goto leave; | ||
| 2690 | } | ||
| 2691 | 2637 | ||
| 2692 | /* No need to migrate a lockres having no locks */ | 2638 | if (target == O2NM_MAX_NODES) |
| 2693 | ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs); | ||
| 2694 | if (ret >= 0 && numlocks == 0 && !hasrefs) { | ||
| 2695 | spin_unlock(&res->spinlock); | ||
| 2696 | goto leave; | 2639 | goto leave; |
| 2697 | } | ||
| 2698 | spin_unlock(&res->spinlock); | ||
| 2699 | 2640 | ||
| 2700 | /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ | 2641 | /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ |
| 2701 | spin_unlock(&dlm->spinlock); | 2642 | spin_unlock(&dlm->spinlock); |
| 2702 | lock_dropped = 1; | 2643 | lock_dropped = 1; |
| 2703 | while (1) { | 2644 | ret = dlm_migrate_lockres(dlm, res, target); |
| 2704 | ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); | 2645 | if (ret) |
| 2705 | if (ret >= 0) | 2646 | mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n", |
| 2706 | break; | 2647 | dlm->name, res->lockname.len, res->lockname.name, |
| 2707 | if (ret == -ENOTEMPTY) { | 2648 | target, ret); |
| 2708 | mlog(ML_ERROR, "lockres %.*s still has local locks!\n", | ||
| 2709 | res->lockname.len, res->lockname.name); | ||
| 2710 | BUG(); | ||
| 2711 | } | ||
| 2712 | |||
| 2713 | mlog(0, "lockres %.*s: migrate failed, " | ||
| 2714 | "retrying\n", res->lockname.len, | ||
| 2715 | res->lockname.name); | ||
| 2716 | msleep(DLM_MIGRATION_RETRY_MS); | ||
| 2717 | } | ||
| 2718 | spin_lock(&dlm->spinlock); | 2649 | spin_lock(&dlm->spinlock); |
| 2719 | leave: | 2650 | leave: |
| 2720 | return lock_dropped; | 2651 | return lock_dropped; |
| @@ -2898,61 +2829,55 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, | |||
| 2898 | } | 2829 | } |
| 2899 | } | 2830 | } |
| 2900 | 2831 | ||
| 2901 | /* for now this is not too intelligent. we will | 2832 | /* |
| 2902 | * need stats to make this do the right thing. | 2833 | * Pick a node to migrate the lock resource to. This function selects a |
| 2903 | * this just finds the first lock on one of the | 2834 | * potential target based first on the locks and then on refmap. It skips |
| 2904 | * queues and uses that node as the target. */ | 2835 | * nodes that are in the process of exiting the domain. |
| 2836 | */ | ||
| 2905 | static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, | 2837 | static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, |
| 2906 | struct dlm_lock_resource *res) | 2838 | struct dlm_lock_resource *res) |
| 2907 | { | 2839 | { |
| 2908 | int i; | 2840 | enum dlm_lockres_list idx; |
| 2909 | struct list_head *queue = &res->granted; | 2841 | struct list_head *queue = &res->granted; |
| 2910 | struct dlm_lock *lock; | 2842 | struct dlm_lock *lock; |
| 2911 | int nodenum; | 2843 | int noderef; |
| 2844 | u8 nodenum = O2NM_MAX_NODES; | ||
| 2912 | 2845 | ||
| 2913 | assert_spin_locked(&dlm->spinlock); | 2846 | assert_spin_locked(&dlm->spinlock); |
| 2847 | assert_spin_locked(&res->spinlock); | ||
| 2914 | 2848 | ||
| 2915 | spin_lock(&res->spinlock); | 2849 | /* Go through all the locks */ |
| 2916 | for (i=0; i<3; i++) { | 2850 | for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { |
| 2851 | queue = dlm_list_idx_to_ptr(res, idx); | ||
| 2917 | list_for_each_entry(lock, queue, list) { | 2852 | list_for_each_entry(lock, queue, list) { |
| 2918 | /* up to the caller to make sure this node | 2853 | if (lock->ml.node == dlm->node_num) |
| 2919 | * is alive */ | 2854 | continue; |
| 2920 | if (lock->ml.node != dlm->node_num) { | 2855 | if (test_bit(lock->ml.node, dlm->exit_domain_map)) |
| 2921 | spin_unlock(&res->spinlock); | 2856 | continue; |
| 2922 | return lock->ml.node; | 2857 | nodenum = lock->ml.node; |
| 2923 | } | 2858 | goto bail; |
| 2924 | } | 2859 | } |
| 2925 | queue++; | ||
| 2926 | } | ||
| 2927 | |||
| 2928 | nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | ||
| 2929 | if (nodenum < O2NM_MAX_NODES) { | ||
| 2930 | spin_unlock(&res->spinlock); | ||
| 2931 | return nodenum; | ||
| 2932 | } | 2860 | } |
| 2933 | spin_unlock(&res->spinlock); | ||
| 2934 | mlog(0, "have not found a suitable target yet! checking domain map\n"); | ||
| 2935 | 2861 | ||
| 2936 | /* ok now we're getting desperate. pick anyone alive. */ | 2862 | /* Go thru the refmap */ |
| 2937 | nodenum = -1; | 2863 | noderef = -1; |
| 2938 | while (1) { | 2864 | while (1) { |
| 2939 | nodenum = find_next_bit(dlm->domain_map, | 2865 | noderef = find_next_bit(res->refmap, O2NM_MAX_NODES, |
| 2940 | O2NM_MAX_NODES, nodenum+1); | 2866 | noderef + 1); |
| 2941 | mlog(0, "found %d in domain map\n", nodenum); | 2867 | if (noderef >= O2NM_MAX_NODES) |
| 2942 | if (nodenum >= O2NM_MAX_NODES) | ||
| 2943 | break; | 2868 | break; |
| 2944 | if (nodenum != dlm->node_num) { | 2869 | if (noderef == dlm->node_num) |
| 2945 | mlog(0, "picking %d\n", nodenum); | 2870 | continue; |
| 2946 | return nodenum; | 2871 | if (test_bit(noderef, dlm->exit_domain_map)) |
| 2947 | } | 2872 | continue; |
| 2873 | nodenum = noderef; | ||
| 2874 | goto bail; | ||
| 2948 | } | 2875 | } |
| 2949 | 2876 | ||
| 2950 | mlog(0, "giving up. no master to migrate to\n"); | 2877 | bail: |
| 2951 | return DLM_LOCK_RES_OWNER_UNKNOWN; | 2878 | return nodenum; |
| 2952 | } | 2879 | } |
| 2953 | 2880 | ||
| 2954 | |||
| 2955 | |||
| 2956 | /* this is called by the new master once all lockres | 2881 | /* this is called by the new master once all lockres |
| 2957 | * data has been received */ | 2882 | * data has been received */ |
| 2958 | static int dlm_do_migrate_request(struct dlm_ctxt *dlm, | 2883 | static int dlm_do_migrate_request(struct dlm_ctxt *dlm, |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f1beb6fc254d..7efab6d28a21 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -2393,6 +2393,7 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) | |||
| 2393 | 2393 | ||
| 2394 | mlog(0, "node %u being removed from domain map!\n", idx); | 2394 | mlog(0, "node %u being removed from domain map!\n", idx); |
| 2395 | clear_bit(idx, dlm->domain_map); | 2395 | clear_bit(idx, dlm->domain_map); |
| 2396 | clear_bit(idx, dlm->exit_domain_map); | ||
| 2396 | /* wake up migration waiters if a node goes down. | 2397 | /* wake up migration waiters if a node goes down. |
| 2397 | * perhaps later we can genericize this for other waiters. */ | 2398 | * perhaps later we can genericize this for other waiters. */ |
| 2398 | wake_up(&dlm->migration_wq); | 2399 | wake_up(&dlm->migration_wq); |
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 8c5c0eddc365..b42076797049 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
| @@ -88,7 +88,7 @@ struct workqueue_struct *user_dlm_worker; | |||
| 88 | * signifies a bast fired on the lock. | 88 | * signifies a bast fired on the lock. |
| 89 | */ | 89 | */ |
| 90 | #define DLMFS_CAPABILITIES "bast stackglue" | 90 | #define DLMFS_CAPABILITIES "bast stackglue" |
| 91 | extern int param_set_dlmfs_capabilities(const char *val, | 91 | static int param_set_dlmfs_capabilities(const char *val, |
| 92 | struct kernel_param *kp) | 92 | struct kernel_param *kp) |
| 93 | { | 93 | { |
| 94 | printk(KERN_ERR "%s: readonly parameter\n", kp->name); | 94 | printk(KERN_ERR "%s: readonly parameter\n", kp->name); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 89659d6dc206..b1e35a392ca5 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -2670,6 +2670,7 @@ const struct file_operations ocfs2_fops_no_plocks = { | |||
| 2670 | .flock = ocfs2_flock, | 2670 | .flock = ocfs2_flock, |
| 2671 | .splice_read = ocfs2_file_splice_read, | 2671 | .splice_read = ocfs2_file_splice_read, |
| 2672 | .splice_write = ocfs2_file_splice_write, | 2672 | .splice_write = ocfs2_file_splice_write, |
| 2673 | .fallocate = ocfs2_fallocate, | ||
| 2673 | }; | 2674 | }; |
| 2674 | 2675 | ||
| 2675 | const struct file_operations ocfs2_dops_no_plocks = { | 2676 | const struct file_operations ocfs2_dops_no_plocks = { |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 8f13c5989eae..bc91072b7219 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
| @@ -22,6 +22,11 @@ | |||
| 22 | #include "ioctl.h" | 22 | #include "ioctl.h" |
| 23 | #include "resize.h" | 23 | #include "resize.h" |
| 24 | #include "refcounttree.h" | 24 | #include "refcounttree.h" |
| 25 | #include "sysfile.h" | ||
| 26 | #include "dir.h" | ||
| 27 | #include "buffer_head_io.h" | ||
| 28 | #include "suballoc.h" | ||
| 29 | #include "move_extents.h" | ||
| 25 | 30 | ||
| 26 | #include <linux/ext2_fs.h> | 31 | #include <linux/ext2_fs.h> |
| 27 | 32 | ||
| @@ -35,31 +40,27 @@ | |||
| 35 | * be -EFAULT. The error will be returned from the ioctl(2) call. It's | 40 | * be -EFAULT. The error will be returned from the ioctl(2) call. It's |
| 36 | * just a best-effort to tell userspace that this request caused the error. | 41 | * just a best-effort to tell userspace that this request caused the error. |
| 37 | */ | 42 | */ |
| 38 | static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq, | 43 | static inline void o2info_set_request_error(struct ocfs2_info_request *kreq, |
| 39 | struct ocfs2_info_request __user *req) | 44 | struct ocfs2_info_request __user *req) |
| 40 | { | 45 | { |
| 41 | kreq->ir_flags |= OCFS2_INFO_FL_ERROR; | 46 | kreq->ir_flags |= OCFS2_INFO_FL_ERROR; |
| 42 | (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); | 47 | (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); |
| 43 | } | 48 | } |
| 44 | 49 | ||
| 45 | #define o2info_set_request_error(a, b) \ | 50 | static inline void o2info_set_request_filled(struct ocfs2_info_request *req) |
| 46 | __o2info_set_request_error((struct ocfs2_info_request *)&(a), b) | ||
| 47 | |||
| 48 | static inline void __o2info_set_request_filled(struct ocfs2_info_request *req) | ||
| 49 | { | 51 | { |
| 50 | req->ir_flags |= OCFS2_INFO_FL_FILLED; | 52 | req->ir_flags |= OCFS2_INFO_FL_FILLED; |
| 51 | } | 53 | } |
| 52 | 54 | ||
| 53 | #define o2info_set_request_filled(a) \ | 55 | static inline void o2info_clear_request_filled(struct ocfs2_info_request *req) |
| 54 | __o2info_set_request_filled((struct ocfs2_info_request *)&(a)) | ||
| 55 | |||
| 56 | static inline void __o2info_clear_request_filled(struct ocfs2_info_request *req) | ||
| 57 | { | 56 | { |
| 58 | req->ir_flags &= ~OCFS2_INFO_FL_FILLED; | 57 | req->ir_flags &= ~OCFS2_INFO_FL_FILLED; |
| 59 | } | 58 | } |
| 60 | 59 | ||
| 61 | #define o2info_clear_request_filled(a) \ | 60 | static inline int o2info_coherent(struct ocfs2_info_request *req) |
| 62 | __o2info_clear_request_filled((struct ocfs2_info_request *)&(a)) | 61 | { |
| 62 | return (!(req->ir_flags & OCFS2_INFO_FL_NON_COHERENT)); | ||
| 63 | } | ||
| 63 | 64 | ||
| 64 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) | 65 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) |
| 65 | { | 66 | { |
| @@ -153,7 +154,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode, | |||
| 153 | 154 | ||
| 154 | oib.ib_blocksize = inode->i_sb->s_blocksize; | 155 | oib.ib_blocksize = inode->i_sb->s_blocksize; |
| 155 | 156 | ||
| 156 | o2info_set_request_filled(oib); | 157 | o2info_set_request_filled(&oib.ib_req); |
| 157 | 158 | ||
| 158 | if (o2info_to_user(oib, req)) | 159 | if (o2info_to_user(oib, req)) |
| 159 | goto bail; | 160 | goto bail; |
| @@ -161,7 +162,7 @@ int ocfs2_info_handle_blocksize(struct inode *inode, | |||
| 161 | status = 0; | 162 | status = 0; |
| 162 | bail: | 163 | bail: |
| 163 | if (status) | 164 | if (status) |
| 164 | o2info_set_request_error(oib, req); | 165 | o2info_set_request_error(&oib.ib_req, req); |
| 165 | 166 | ||
| 166 | return status; | 167 | return status; |
| 167 | } | 168 | } |
| @@ -178,7 +179,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode, | |||
| 178 | 179 | ||
| 179 | oic.ic_clustersize = osb->s_clustersize; | 180 | oic.ic_clustersize = osb->s_clustersize; |
| 180 | 181 | ||
| 181 | o2info_set_request_filled(oic); | 182 | o2info_set_request_filled(&oic.ic_req); |
| 182 | 183 | ||
| 183 | if (o2info_to_user(oic, req)) | 184 | if (o2info_to_user(oic, req)) |
| 184 | goto bail; | 185 | goto bail; |
| @@ -186,7 +187,7 @@ int ocfs2_info_handle_clustersize(struct inode *inode, | |||
| 186 | status = 0; | 187 | status = 0; |
| 187 | bail: | 188 | bail: |
| 188 | if (status) | 189 | if (status) |
| 189 | o2info_set_request_error(oic, req); | 190 | o2info_set_request_error(&oic.ic_req, req); |
| 190 | 191 | ||
| 191 | return status; | 192 | return status; |
| 192 | } | 193 | } |
| @@ -203,7 +204,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode, | |||
| 203 | 204 | ||
| 204 | oim.im_max_slots = osb->max_slots; | 205 | oim.im_max_slots = osb->max_slots; |
| 205 | 206 | ||
| 206 | o2info_set_request_filled(oim); | 207 | o2info_set_request_filled(&oim.im_req); |
| 207 | 208 | ||
| 208 | if (o2info_to_user(oim, req)) | 209 | if (o2info_to_user(oim, req)) |
| 209 | goto bail; | 210 | goto bail; |
| @@ -211,7 +212,7 @@ int ocfs2_info_handle_maxslots(struct inode *inode, | |||
| 211 | status = 0; | 212 | status = 0; |
| 212 | bail: | 213 | bail: |
| 213 | if (status) | 214 | if (status) |
| 214 | o2info_set_request_error(oim, req); | 215 | o2info_set_request_error(&oim.im_req, req); |
| 215 | 216 | ||
| 216 | return status; | 217 | return status; |
| 217 | } | 218 | } |
| @@ -228,7 +229,7 @@ int ocfs2_info_handle_label(struct inode *inode, | |||
| 228 | 229 | ||
| 229 | memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); | 230 | memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); |
| 230 | 231 | ||
| 231 | o2info_set_request_filled(oil); | 232 | o2info_set_request_filled(&oil.il_req); |
| 232 | 233 | ||
| 233 | if (o2info_to_user(oil, req)) | 234 | if (o2info_to_user(oil, req)) |
| 234 | goto bail; | 235 | goto bail; |
| @@ -236,7 +237,7 @@ int ocfs2_info_handle_label(struct inode *inode, | |||
| 236 | status = 0; | 237 | status = 0; |
| 237 | bail: | 238 | bail: |
| 238 | if (status) | 239 | if (status) |
| 239 | o2info_set_request_error(oil, req); | 240 | o2info_set_request_error(&oil.il_req, req); |
| 240 | 241 | ||
| 241 | return status; | 242 | return status; |
| 242 | } | 243 | } |
| @@ -253,7 +254,7 @@ int ocfs2_info_handle_uuid(struct inode *inode, | |||
| 253 | 254 | ||
| 254 | memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); | 255 | memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); |
| 255 | 256 | ||
| 256 | o2info_set_request_filled(oiu); | 257 | o2info_set_request_filled(&oiu.iu_req); |
| 257 | 258 | ||
| 258 | if (o2info_to_user(oiu, req)) | 259 | if (o2info_to_user(oiu, req)) |
| 259 | goto bail; | 260 | goto bail; |
| @@ -261,7 +262,7 @@ int ocfs2_info_handle_uuid(struct inode *inode, | |||
| 261 | status = 0; | 262 | status = 0; |
| 262 | bail: | 263 | bail: |
| 263 | if (status) | 264 | if (status) |
| 264 | o2info_set_request_error(oiu, req); | 265 | o2info_set_request_error(&oiu.iu_req, req); |
| 265 | 266 | ||
| 266 | return status; | 267 | return status; |
| 267 | } | 268 | } |
| @@ -280,7 +281,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode, | |||
| 280 | oif.if_incompat_features = osb->s_feature_incompat; | 281 | oif.if_incompat_features = osb->s_feature_incompat; |
| 281 | oif.if_ro_compat_features = osb->s_feature_ro_compat; | 282 | oif.if_ro_compat_features = osb->s_feature_ro_compat; |
| 282 | 283 | ||
| 283 | o2info_set_request_filled(oif); | 284 | o2info_set_request_filled(&oif.if_req); |
| 284 | 285 | ||
| 285 | if (o2info_to_user(oif, req)) | 286 | if (o2info_to_user(oif, req)) |
| 286 | goto bail; | 287 | goto bail; |
| @@ -288,7 +289,7 @@ int ocfs2_info_handle_fs_features(struct inode *inode, | |||
| 288 | status = 0; | 289 | status = 0; |
| 289 | bail: | 290 | bail: |
| 290 | if (status) | 291 | if (status) |
| 291 | o2info_set_request_error(oif, req); | 292 | o2info_set_request_error(&oif.if_req, req); |
| 292 | 293 | ||
| 293 | return status; | 294 | return status; |
| 294 | } | 295 | } |
| @@ -305,7 +306,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode, | |||
| 305 | 306 | ||
| 306 | oij.ij_journal_size = osb->journal->j_inode->i_size; | 307 | oij.ij_journal_size = osb->journal->j_inode->i_size; |
| 307 | 308 | ||
| 308 | o2info_set_request_filled(oij); | 309 | o2info_set_request_filled(&oij.ij_req); |
| 309 | 310 | ||
| 310 | if (o2info_to_user(oij, req)) | 311 | if (o2info_to_user(oij, req)) |
| 311 | goto bail; | 312 | goto bail; |
| @@ -313,7 +314,408 @@ int ocfs2_info_handle_journal_size(struct inode *inode, | |||
| 313 | status = 0; | 314 | status = 0; |
| 314 | bail: | 315 | bail: |
| 315 | if (status) | 316 | if (status) |
| 316 | o2info_set_request_error(oij, req); | 317 | o2info_set_request_error(&oij.ij_req, req); |
| 318 | |||
| 319 | return status; | ||
| 320 | } | ||
| 321 | |||
| 322 | int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, | ||
| 323 | struct inode *inode_alloc, u64 blkno, | ||
| 324 | struct ocfs2_info_freeinode *fi, u32 slot) | ||
| 325 | { | ||
| 326 | int status = 0, unlock = 0; | ||
| 327 | |||
| 328 | struct buffer_head *bh = NULL; | ||
| 329 | struct ocfs2_dinode *dinode_alloc = NULL; | ||
| 330 | |||
| 331 | if (inode_alloc) | ||
| 332 | mutex_lock(&inode_alloc->i_mutex); | ||
| 333 | |||
| 334 | if (o2info_coherent(&fi->ifi_req)) { | ||
| 335 | status = ocfs2_inode_lock(inode_alloc, &bh, 0); | ||
| 336 | if (status < 0) { | ||
| 337 | mlog_errno(status); | ||
| 338 | goto bail; | ||
| 339 | } | ||
| 340 | unlock = 1; | ||
| 341 | } else { | ||
| 342 | status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh); | ||
| 343 | if (status < 0) { | ||
| 344 | mlog_errno(status); | ||
| 345 | goto bail; | ||
| 346 | } | ||
| 347 | } | ||
| 348 | |||
| 349 | dinode_alloc = (struct ocfs2_dinode *)bh->b_data; | ||
| 350 | |||
| 351 | fi->ifi_stat[slot].lfi_total = | ||
| 352 | le32_to_cpu(dinode_alloc->id1.bitmap1.i_total); | ||
| 353 | fi->ifi_stat[slot].lfi_free = | ||
| 354 | le32_to_cpu(dinode_alloc->id1.bitmap1.i_total) - | ||
| 355 | le32_to_cpu(dinode_alloc->id1.bitmap1.i_used); | ||
| 356 | |||
| 357 | bail: | ||
| 358 | if (unlock) | ||
| 359 | ocfs2_inode_unlock(inode_alloc, 0); | ||
| 360 | |||
| 361 | if (inode_alloc) | ||
| 362 | mutex_unlock(&inode_alloc->i_mutex); | ||
| 363 | |||
| 364 | brelse(bh); | ||
| 365 | |||
| 366 | return status; | ||
| 367 | } | ||
| 368 | |||
| 369 | int ocfs2_info_handle_freeinode(struct inode *inode, | ||
| 370 | struct ocfs2_info_request __user *req) | ||
| 371 | { | ||
| 372 | u32 i; | ||
| 373 | u64 blkno = -1; | ||
| 374 | char namebuf[40]; | ||
| 375 | int status = -EFAULT, type = INODE_ALLOC_SYSTEM_INODE; | ||
| 376 | struct ocfs2_info_freeinode *oifi = NULL; | ||
| 377 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 378 | struct inode *inode_alloc = NULL; | ||
| 379 | |||
| 380 | oifi = kzalloc(sizeof(struct ocfs2_info_freeinode), GFP_KERNEL); | ||
| 381 | if (!oifi) { | ||
| 382 | status = -ENOMEM; | ||
| 383 | mlog_errno(status); | ||
| 384 | goto bail; | ||
| 385 | } | ||
| 386 | |||
| 387 | if (o2info_from_user(*oifi, req)) | ||
| 388 | goto bail; | ||
| 389 | |||
| 390 | oifi->ifi_slotnum = osb->max_slots; | ||
| 391 | |||
| 392 | for (i = 0; i < oifi->ifi_slotnum; i++) { | ||
| 393 | if (o2info_coherent(&oifi->ifi_req)) { | ||
| 394 | inode_alloc = ocfs2_get_system_file_inode(osb, type, i); | ||
| 395 | if (!inode_alloc) { | ||
| 396 | mlog(ML_ERROR, "unable to get alloc inode in " | ||
| 397 | "slot %u\n", i); | ||
| 398 | status = -EIO; | ||
| 399 | goto bail; | ||
| 400 | } | ||
| 401 | } else { | ||
| 402 | ocfs2_sprintf_system_inode_name(namebuf, | ||
| 403 | sizeof(namebuf), | ||
| 404 | type, i); | ||
| 405 | status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, | ||
| 406 | namebuf, | ||
| 407 | strlen(namebuf), | ||
| 408 | &blkno); | ||
| 409 | if (status < 0) { | ||
| 410 | status = -ENOENT; | ||
| 411 | goto bail; | ||
| 412 | } | ||
| 413 | } | ||
| 414 | |||
| 415 | status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i); | ||
| 416 | if (status < 0) | ||
| 417 | goto bail; | ||
| 418 | |||
| 419 | iput(inode_alloc); | ||
| 420 | inode_alloc = NULL; | ||
| 421 | } | ||
| 422 | |||
| 423 | o2info_set_request_filled(&oifi->ifi_req); | ||
| 424 | |||
| 425 | if (o2info_to_user(*oifi, req)) | ||
| 426 | goto bail; | ||
| 427 | |||
| 428 | status = 0; | ||
| 429 | bail: | ||
| 430 | if (status) | ||
| 431 | o2info_set_request_error(&oifi->ifi_req, req); | ||
| 432 | |||
| 433 | kfree(oifi); | ||
| 434 | |||
| 435 | return status; | ||
| 436 | } | ||
| 437 | |||
| 438 | static void o2ffg_update_histogram(struct ocfs2_info_free_chunk_list *hist, | ||
| 439 | unsigned int chunksize) | ||
| 440 | { | ||
| 441 | int index; | ||
| 442 | |||
| 443 | index = __ilog2_u32(chunksize); | ||
| 444 | if (index >= OCFS2_INFO_MAX_HIST) | ||
| 445 | index = OCFS2_INFO_MAX_HIST - 1; | ||
| 446 | |||
| 447 | hist->fc_chunks[index]++; | ||
| 448 | hist->fc_clusters[index] += chunksize; | ||
| 449 | } | ||
| 450 | |||
| 451 | static void o2ffg_update_stats(struct ocfs2_info_freefrag_stats *stats, | ||
| 452 | unsigned int chunksize) | ||
| 453 | { | ||
| 454 | if (chunksize > stats->ffs_max) | ||
| 455 | stats->ffs_max = chunksize; | ||
| 456 | |||
| 457 | if (chunksize < stats->ffs_min) | ||
| 458 | stats->ffs_min = chunksize; | ||
| 459 | |||
| 460 | stats->ffs_avg += chunksize; | ||
| 461 | stats->ffs_free_chunks_real++; | ||
| 462 | } | ||
| 463 | |||
| 464 | void ocfs2_info_update_ffg(struct ocfs2_info_freefrag *ffg, | ||
| 465 | unsigned int chunksize) | ||
| 466 | { | ||
| 467 | o2ffg_update_histogram(&(ffg->iff_ffs.ffs_fc_hist), chunksize); | ||
| 468 | o2ffg_update_stats(&(ffg->iff_ffs), chunksize); | ||
| 469 | } | ||
| 470 | |||
| 471 | int ocfs2_info_freefrag_scan_chain(struct ocfs2_super *osb, | ||
| 472 | struct inode *gb_inode, | ||
| 473 | struct ocfs2_dinode *gb_dinode, | ||
| 474 | struct ocfs2_chain_rec *rec, | ||
| 475 | struct ocfs2_info_freefrag *ffg, | ||
| 476 | u32 chunks_in_group) | ||
| 477 | { | ||
| 478 | int status = 0, used; | ||
| 479 | u64 blkno; | ||
| 480 | |||
| 481 | struct buffer_head *bh = NULL; | ||
| 482 | struct ocfs2_group_desc *bg = NULL; | ||
| 483 | |||
| 484 | unsigned int max_bits, num_clusters; | ||
| 485 | unsigned int offset = 0, cluster, chunk; | ||
| 486 | unsigned int chunk_free, last_chunksize = 0; | ||
| 487 | |||
| 488 | if (!le32_to_cpu(rec->c_free)) | ||
| 489 | goto bail; | ||
| 490 | |||
| 491 | do { | ||
| 492 | if (!bg) | ||
| 493 | blkno = le64_to_cpu(rec->c_blkno); | ||
| 494 | else | ||
| 495 | blkno = le64_to_cpu(bg->bg_next_group); | ||
| 496 | |||
| 497 | if (bh) { | ||
| 498 | brelse(bh); | ||
| 499 | bh = NULL; | ||
| 500 | } | ||
| 501 | |||
| 502 | if (o2info_coherent(&ffg->iff_req)) | ||
| 503 | status = ocfs2_read_group_descriptor(gb_inode, | ||
| 504 | gb_dinode, | ||
| 505 | blkno, &bh); | ||
| 506 | else | ||
| 507 | status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh); | ||
| 508 | |||
| 509 | if (status < 0) { | ||
| 510 | mlog(ML_ERROR, "Can't read the group descriptor # " | ||
| 511 | "%llu from device.", (unsigned long long)blkno); | ||
| 512 | status = -EIO; | ||
| 513 | goto bail; | ||
| 514 | } | ||
| 515 | |||
| 516 | bg = (struct ocfs2_group_desc *)bh->b_data; | ||
| 517 | |||
| 518 | if (!le16_to_cpu(bg->bg_free_bits_count)) | ||
| 519 | continue; | ||
| 520 | |||
| 521 | max_bits = le16_to_cpu(bg->bg_bits); | ||
| 522 | offset = 0; | ||
| 523 | |||
| 524 | for (chunk = 0; chunk < chunks_in_group; chunk++) { | ||
| 525 | /* | ||
| 526 | * last chunk may be not an entire one. | ||
| 527 | */ | ||
| 528 | if ((offset + ffg->iff_chunksize) > max_bits) | ||
| 529 | num_clusters = max_bits - offset; | ||
| 530 | else | ||
| 531 | num_clusters = ffg->iff_chunksize; | ||
| 532 | |||
| 533 | chunk_free = 0; | ||
| 534 | for (cluster = 0; cluster < num_clusters; cluster++) { | ||
| 535 | used = ocfs2_test_bit(offset, | ||
| 536 | (unsigned long *)bg->bg_bitmap); | ||
| 537 | /* | ||
| 538 | * - chunk_free counts free clusters in #N chunk. | ||
| 539 | * - last_chunksize records the size(in) clusters | ||
| 540 | * for the last real free chunk being counted. | ||
| 541 | */ | ||
| 542 | if (!used) { | ||
| 543 | last_chunksize++; | ||
| 544 | chunk_free++; | ||
| 545 | } | ||
| 546 | |||
| 547 | if (used && last_chunksize) { | ||
| 548 | ocfs2_info_update_ffg(ffg, | ||
| 549 | last_chunksize); | ||
| 550 | last_chunksize = 0; | ||
| 551 | } | ||
| 552 | |||
| 553 | offset++; | ||
| 554 | } | ||
| 555 | |||
| 556 | if (chunk_free == ffg->iff_chunksize) | ||
| 557 | ffg->iff_ffs.ffs_free_chunks++; | ||
| 558 | } | ||
| 559 | |||
| 560 | /* | ||
| 561 | * need to update the info for last free chunk. | ||
| 562 | */ | ||
| 563 | if (last_chunksize) | ||
| 564 | ocfs2_info_update_ffg(ffg, last_chunksize); | ||
| 565 | |||
| 566 | } while (le64_to_cpu(bg->bg_next_group)); | ||
| 567 | |||
| 568 | bail: | ||
| 569 | brelse(bh); | ||
| 570 | |||
| 571 | return status; | ||
| 572 | } | ||
| 573 | |||
| 574 | int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb, | ||
| 575 | struct inode *gb_inode, u64 blkno, | ||
| 576 | struct ocfs2_info_freefrag *ffg) | ||
| 577 | { | ||
| 578 | u32 chunks_in_group; | ||
| 579 | int status = 0, unlock = 0, i; | ||
| 580 | |||
| 581 | struct buffer_head *bh = NULL; | ||
| 582 | struct ocfs2_chain_list *cl = NULL; | ||
| 583 | struct ocfs2_chain_rec *rec = NULL; | ||
| 584 | struct ocfs2_dinode *gb_dinode = NULL; | ||
| 585 | |||
| 586 | if (gb_inode) | ||
| 587 | mutex_lock(&gb_inode->i_mutex); | ||
| 588 | |||
| 589 | if (o2info_coherent(&ffg->iff_req)) { | ||
| 590 | status = ocfs2_inode_lock(gb_inode, &bh, 0); | ||
| 591 | if (status < 0) { | ||
| 592 | mlog_errno(status); | ||
| 593 | goto bail; | ||
| 594 | } | ||
| 595 | unlock = 1; | ||
| 596 | } else { | ||
| 597 | status = ocfs2_read_blocks_sync(osb, blkno, 1, &bh); | ||
| 598 | if (status < 0) { | ||
| 599 | mlog_errno(status); | ||
| 600 | goto bail; | ||
| 601 | } | ||
| 602 | } | ||
| 603 | |||
| 604 | gb_dinode = (struct ocfs2_dinode *)bh->b_data; | ||
| 605 | cl = &(gb_dinode->id2.i_chain); | ||
| 606 | |||
| 607 | /* | ||
| 608 | * Chunksize(in) clusters from userspace should be | ||
| 609 | * less than clusters in a group. | ||
| 610 | */ | ||
| 611 | if (ffg->iff_chunksize > le16_to_cpu(cl->cl_cpg)) { | ||
| 612 | status = -EINVAL; | ||
| 613 | goto bail; | ||
| 614 | } | ||
| 615 | |||
| 616 | memset(&ffg->iff_ffs, 0, sizeof(struct ocfs2_info_freefrag_stats)); | ||
| 617 | |||
| 618 | ffg->iff_ffs.ffs_min = ~0U; | ||
| 619 | ffg->iff_ffs.ffs_clusters = | ||
| 620 | le32_to_cpu(gb_dinode->id1.bitmap1.i_total); | ||
| 621 | ffg->iff_ffs.ffs_free_clusters = ffg->iff_ffs.ffs_clusters - | ||
| 622 | le32_to_cpu(gb_dinode->id1.bitmap1.i_used); | ||
| 623 | |||
| 624 | chunks_in_group = le16_to_cpu(cl->cl_cpg) / ffg->iff_chunksize + 1; | ||
| 625 | |||
| 626 | for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) { | ||
| 627 | rec = &(cl->cl_recs[i]); | ||
| 628 | status = ocfs2_info_freefrag_scan_chain(osb, gb_inode, | ||
| 629 | gb_dinode, | ||
| 630 | rec, ffg, | ||
| 631 | chunks_in_group); | ||
| 632 | if (status) | ||
| 633 | goto bail; | ||
| 634 | } | ||
| 635 | |||
| 636 | if (ffg->iff_ffs.ffs_free_chunks_real) | ||
| 637 | ffg->iff_ffs.ffs_avg = (ffg->iff_ffs.ffs_avg / | ||
| 638 | ffg->iff_ffs.ffs_free_chunks_real); | ||
| 639 | bail: | ||
| 640 | if (unlock) | ||
| 641 | ocfs2_inode_unlock(gb_inode, 0); | ||
| 642 | |||
| 643 | if (gb_inode) | ||
| 644 | mutex_unlock(&gb_inode->i_mutex); | ||
| 645 | |||
| 646 | if (gb_inode) | ||
| 647 | iput(gb_inode); | ||
| 648 | |||
| 649 | brelse(bh); | ||
| 650 | |||
| 651 | return status; | ||
| 652 | } | ||
| 653 | |||
| 654 | int ocfs2_info_handle_freefrag(struct inode *inode, | ||
| 655 | struct ocfs2_info_request __user *req) | ||
| 656 | { | ||
| 657 | u64 blkno = -1; | ||
| 658 | char namebuf[40]; | ||
| 659 | int status = -EFAULT, type = GLOBAL_BITMAP_SYSTEM_INODE; | ||
| 660 | |||
| 661 | struct ocfs2_info_freefrag *oiff; | ||
| 662 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 663 | struct inode *gb_inode = NULL; | ||
| 664 | |||
| 665 | oiff = kzalloc(sizeof(struct ocfs2_info_freefrag), GFP_KERNEL); | ||
| 666 | if (!oiff) { | ||
| 667 | status = -ENOMEM; | ||
| 668 | mlog_errno(status); | ||
| 669 | goto bail; | ||
| 670 | } | ||
| 671 | |||
| 672 | if (o2info_from_user(*oiff, req)) | ||
| 673 | goto bail; | ||
| 674 | /* | ||
| 675 | * chunksize from userspace should be power of 2. | ||
| 676 | */ | ||
| 677 | if ((oiff->iff_chunksize & (oiff->iff_chunksize - 1)) || | ||
| 678 | (!oiff->iff_chunksize)) { | ||
| 679 | status = -EINVAL; | ||
| 680 | goto bail; | ||
| 681 | } | ||
| 682 | |||
| 683 | if (o2info_coherent(&oiff->iff_req)) { | ||
| 684 | gb_inode = ocfs2_get_system_file_inode(osb, type, | ||
| 685 | OCFS2_INVALID_SLOT); | ||
| 686 | if (!gb_inode) { | ||
| 687 | mlog(ML_ERROR, "unable to get global_bitmap inode\n"); | ||
| 688 | status = -EIO; | ||
| 689 | goto bail; | ||
| 690 | } | ||
| 691 | } else { | ||
| 692 | ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, | ||
| 693 | OCFS2_INVALID_SLOT); | ||
| 694 | status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, | ||
| 695 | namebuf, | ||
| 696 | strlen(namebuf), | ||
| 697 | &blkno); | ||
| 698 | if (status < 0) { | ||
| 699 | status = -ENOENT; | ||
| 700 | goto bail; | ||
| 701 | } | ||
| 702 | } | ||
| 703 | |||
| 704 | status = ocfs2_info_freefrag_scan_bitmap(osb, gb_inode, blkno, oiff); | ||
| 705 | if (status < 0) | ||
| 706 | goto bail; | ||
| 707 | |||
| 708 | o2info_set_request_filled(&oiff->iff_req); | ||
| 709 | |||
| 710 | if (o2info_to_user(*oiff, req)) | ||
| 711 | goto bail; | ||
| 712 | |||
| 713 | status = 0; | ||
| 714 | bail: | ||
| 715 | if (status) | ||
| 716 | o2info_set_request_error(&oiff->iff_req, req); | ||
| 717 | |||
| 718 | kfree(oiff); | ||
| 317 | 719 | ||
| 318 | return status; | 720 | return status; |
| 319 | } | 721 | } |
| @@ -327,7 +729,7 @@ int ocfs2_info_handle_unknown(struct inode *inode, | |||
| 327 | if (o2info_from_user(oir, req)) | 729 | if (o2info_from_user(oir, req)) |
| 328 | goto bail; | 730 | goto bail; |
| 329 | 731 | ||
| 330 | o2info_clear_request_filled(oir); | 732 | o2info_clear_request_filled(&oir); |
| 331 | 733 | ||
| 332 | if (o2info_to_user(oir, req)) | 734 | if (o2info_to_user(oir, req)) |
| 333 | goto bail; | 735 | goto bail; |
| @@ -335,7 +737,7 @@ int ocfs2_info_handle_unknown(struct inode *inode, | |||
| 335 | status = 0; | 737 | status = 0; |
| 336 | bail: | 738 | bail: |
| 337 | if (status) | 739 | if (status) |
| 338 | o2info_set_request_error(oir, req); | 740 | o2info_set_request_error(&oir, req); |
| 339 | 741 | ||
| 340 | return status; | 742 | return status; |
| 341 | } | 743 | } |
| @@ -389,6 +791,14 @@ int ocfs2_info_handle_request(struct inode *inode, | |||
| 389 | if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) | 791 | if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) |
| 390 | status = ocfs2_info_handle_journal_size(inode, req); | 792 | status = ocfs2_info_handle_journal_size(inode, req); |
| 391 | break; | 793 | break; |
| 794 | case OCFS2_INFO_FREEINODE: | ||
| 795 | if (oir.ir_size == sizeof(struct ocfs2_info_freeinode)) | ||
| 796 | status = ocfs2_info_handle_freeinode(inode, req); | ||
| 797 | break; | ||
| 798 | case OCFS2_INFO_FREEFRAG: | ||
| 799 | if (oir.ir_size == sizeof(struct ocfs2_info_freefrag)) | ||
| 800 | status = ocfs2_info_handle_freefrag(inode, req); | ||
| 801 | break; | ||
| 392 | default: | 802 | default: |
| 393 | status = ocfs2_info_handle_unknown(inode, req); | 803 | status = ocfs2_info_handle_unknown(inode, req); |
| 394 | break; | 804 | break; |
| @@ -542,6 +952,31 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 542 | return -EFAULT; | 952 | return -EFAULT; |
| 543 | 953 | ||
| 544 | return ocfs2_info_handle(inode, &info, 0); | 954 | return ocfs2_info_handle(inode, &info, 0); |
| 955 | case FITRIM: | ||
| 956 | { | ||
| 957 | struct super_block *sb = inode->i_sb; | ||
| 958 | struct fstrim_range range; | ||
| 959 | int ret = 0; | ||
| 960 | |||
| 961 | if (!capable(CAP_SYS_ADMIN)) | ||
| 962 | return -EPERM; | ||
| 963 | |||
| 964 | if (copy_from_user(&range, (struct fstrim_range *)arg, | ||
| 965 | sizeof(range))) | ||
| 966 | return -EFAULT; | ||
| 967 | |||
| 968 | ret = ocfs2_trim_fs(sb, &range); | ||
| 969 | if (ret < 0) | ||
| 970 | return ret; | ||
| 971 | |||
| 972 | if (copy_to_user((struct fstrim_range *)arg, &range, | ||
| 973 | sizeof(range))) | ||
| 974 | return -EFAULT; | ||
| 975 | |||
| 976 | return 0; | ||
| 977 | } | ||
| 978 | case OCFS2_IOC_MOVE_EXT: | ||
| 979 | return ocfs2_ioctl_move_extents(filp, (void __user *)arg); | ||
| 545 | default: | 980 | default: |
| 546 | return -ENOTTY; | 981 | return -ENOTTY; |
| 547 | } | 982 | } |
| @@ -569,6 +1004,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 569 | case OCFS2_IOC_GROUP_EXTEND: | 1004 | case OCFS2_IOC_GROUP_EXTEND: |
| 570 | case OCFS2_IOC_GROUP_ADD: | 1005 | case OCFS2_IOC_GROUP_ADD: |
| 571 | case OCFS2_IOC_GROUP_ADD64: | 1006 | case OCFS2_IOC_GROUP_ADD64: |
| 1007 | case FITRIM: | ||
| 572 | break; | 1008 | break; |
| 573 | case OCFS2_IOC_REFLINK: | 1009 | case OCFS2_IOC_REFLINK: |
| 574 | if (copy_from_user(&args, (struct reflink_arguments *)arg, | 1010 | if (copy_from_user(&args, (struct reflink_arguments *)arg, |
| @@ -584,6 +1020,8 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 584 | return -EFAULT; | 1020 | return -EFAULT; |
| 585 | 1021 | ||
| 586 | return ocfs2_info_handle(inode, &info, 1); | 1022 | return ocfs2_info_handle(inode, &info, 1); |
| 1023 | case OCFS2_IOC_MOVE_EXT: | ||
| 1024 | break; | ||
| 587 | default: | 1025 | default: |
| 588 | return -ENOIOCTLCMD; | 1026 | return -ENOIOCTLCMD; |
| 589 | } | 1027 | } |
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c new file mode 100644 index 000000000000..4c5488468c14 --- /dev/null +++ b/fs/ocfs2/move_extents.c | |||
| @@ -0,0 +1,1153 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * move_extents.c | ||
| 5 | * | ||
| 6 | * Copyright (C) 2011 Oracle. All rights reserved. | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public | ||
| 10 | * License version 2 as published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * General Public License for more details. | ||
| 16 | */ | ||
| 17 | #include <linux/fs.h> | ||
| 18 | #include <linux/types.h> | ||
| 19 | #include <linux/mount.h> | ||
| 20 | #include <linux/swap.h> | ||
| 21 | |||
| 22 | #include <cluster/masklog.h> | ||
| 23 | |||
| 24 | #include "ocfs2.h" | ||
| 25 | #include "ocfs2_ioctl.h" | ||
| 26 | |||
| 27 | #include "alloc.h" | ||
| 28 | #include "aops.h" | ||
| 29 | #include "dlmglue.h" | ||
| 30 | #include "extent_map.h" | ||
| 31 | #include "inode.h" | ||
| 32 | #include "journal.h" | ||
| 33 | #include "suballoc.h" | ||
| 34 | #include "uptodate.h" | ||
| 35 | #include "super.h" | ||
| 36 | #include "dir.h" | ||
| 37 | #include "buffer_head_io.h" | ||
| 38 | #include "sysfile.h" | ||
| 39 | #include "suballoc.h" | ||
| 40 | #include "refcounttree.h" | ||
| 41 | #include "move_extents.h" | ||
| 42 | |||
| 43 | struct ocfs2_move_extents_context { | ||
| 44 | struct inode *inode; | ||
| 45 | struct file *file; | ||
| 46 | int auto_defrag; | ||
| 47 | int partial; | ||
| 48 | int credits; | ||
| 49 | u32 new_phys_cpos; | ||
| 50 | u32 clusters_moved; | ||
| 51 | u64 refcount_loc; | ||
| 52 | struct ocfs2_move_extents *range; | ||
| 53 | struct ocfs2_extent_tree et; | ||
| 54 | struct ocfs2_alloc_context *meta_ac; | ||
| 55 | struct ocfs2_alloc_context *data_ac; | ||
| 56 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
| 57 | }; | ||
| 58 | |||
| 59 | static int __ocfs2_move_extent(handle_t *handle, | ||
| 60 | struct ocfs2_move_extents_context *context, | ||
| 61 | u32 cpos, u32 len, u32 p_cpos, u32 new_p_cpos, | ||
| 62 | int ext_flags) | ||
| 63 | { | ||
| 64 | int ret = 0, index; | ||
| 65 | struct inode *inode = context->inode; | ||
| 66 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 67 | struct ocfs2_extent_rec *rec, replace_rec; | ||
| 68 | struct ocfs2_path *path = NULL; | ||
| 69 | struct ocfs2_extent_list *el; | ||
| 70 | u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); | ||
| 71 | u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); | ||
| 72 | |||
| 73 | ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos, | ||
| 74 | p_cpos, new_p_cpos, len); | ||
| 75 | if (ret) { | ||
| 76 | mlog_errno(ret); | ||
| 77 | goto out; | ||
| 78 | } | ||
| 79 | |||
| 80 | memset(&replace_rec, 0, sizeof(replace_rec)); | ||
| 81 | replace_rec.e_cpos = cpu_to_le32(cpos); | ||
| 82 | replace_rec.e_leaf_clusters = cpu_to_le16(len); | ||
| 83 | replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, | ||
| 84 | new_p_cpos)); | ||
| 85 | |||
| 86 | path = ocfs2_new_path_from_et(&context->et); | ||
| 87 | if (!path) { | ||
| 88 | ret = -ENOMEM; | ||
| 89 | mlog_errno(ret); | ||
| 90 | goto out; | ||
| 91 | } | ||
| 92 | |||
| 93 | ret = ocfs2_find_path(INODE_CACHE(inode), path, cpos); | ||
| 94 | if (ret) { | ||
| 95 | mlog_errno(ret); | ||
| 96 | goto out; | ||
| 97 | } | ||
| 98 | |||
| 99 | el = path_leaf_el(path); | ||
| 100 | |||
| 101 | index = ocfs2_search_extent_list(el, cpos); | ||
| 102 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | ||
| 103 | ocfs2_error(inode->i_sb, | ||
| 104 | "Inode %llu has an extent at cpos %u which can no " | ||
| 105 | "longer be found.\n", | ||
| 106 | (unsigned long long)ino, cpos); | ||
| 107 | ret = -EROFS; | ||
| 108 | goto out; | ||
| 109 | } | ||
| 110 | |||
| 111 | rec = &el->l_recs[index]; | ||
| 112 | |||
| 113 | BUG_ON(ext_flags != rec->e_flags); | ||
| 114 | /* | ||
| 115 | * after moving/defraging to new location, the extent is not going | ||
| 116 | * to be refcounted anymore. | ||
| 117 | */ | ||
| 118 | replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED; | ||
| 119 | |||
| 120 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), | ||
| 121 | context->et.et_root_bh, | ||
| 122 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 123 | if (ret) { | ||
| 124 | mlog_errno(ret); | ||
| 125 | goto out; | ||
| 126 | } | ||
| 127 | |||
| 128 | ret = ocfs2_split_extent(handle, &context->et, path, index, | ||
| 129 | &replace_rec, context->meta_ac, | ||
| 130 | &context->dealloc); | ||
| 131 | if (ret) { | ||
| 132 | mlog_errno(ret); | ||
| 133 | goto out; | ||
| 134 | } | ||
| 135 | |||
| 136 | ocfs2_journal_dirty(handle, context->et.et_root_bh); | ||
| 137 | |||
| 138 | context->new_phys_cpos = new_p_cpos; | ||
| 139 | |||
| 140 | /* | ||
| 141 | * need I to append truncate log for old clusters? | ||
| 142 | */ | ||
| 143 | if (old_blkno) { | ||
| 144 | if (ext_flags & OCFS2_EXT_REFCOUNTED) | ||
| 145 | ret = ocfs2_decrease_refcount(inode, handle, | ||
| 146 | ocfs2_blocks_to_clusters(osb->sb, | ||
| 147 | old_blkno), | ||
| 148 | len, context->meta_ac, | ||
| 149 | &context->dealloc, 1); | ||
| 150 | else | ||
| 151 | ret = ocfs2_truncate_log_append(osb, handle, | ||
| 152 | old_blkno, len); | ||
| 153 | } | ||
| 154 | |||
| 155 | out: | ||
| 156 | return ret; | ||
| 157 | } | ||
| 158 | |||
| 159 | /* | ||
| 160 | * lock allocators, and reserving appropriate number of bits for | ||
| 161 | * meta blocks and data clusters. | ||
| 162 | * | ||
| 163 | * in some cases, we don't need to reserve clusters, just let data_ac | ||
| 164 | * be NULL. | ||
| 165 | */ | ||
| 166 | static int ocfs2_lock_allocators_move_extents(struct inode *inode, | ||
| 167 | struct ocfs2_extent_tree *et, | ||
| 168 | u32 clusters_to_move, | ||
| 169 | u32 extents_to_split, | ||
| 170 | struct ocfs2_alloc_context **meta_ac, | ||
| 171 | struct ocfs2_alloc_context **data_ac, | ||
| 172 | int extra_blocks, | ||
| 173 | int *credits) | ||
| 174 | { | ||
| 175 | int ret, num_free_extents; | ||
| 176 | unsigned int max_recs_needed = 2 * extents_to_split + clusters_to_move; | ||
| 177 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 178 | |||
| 179 | num_free_extents = ocfs2_num_free_extents(osb, et); | ||
| 180 | if (num_free_extents < 0) { | ||
| 181 | ret = num_free_extents; | ||
| 182 | mlog_errno(ret); | ||
| 183 | goto out; | ||
| 184 | } | ||
| 185 | |||
| 186 | if (!num_free_extents || | ||
| 187 | (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) | ||
| 188 | extra_blocks += ocfs2_extend_meta_needed(et->et_root_el); | ||
| 189 | |||
| 190 | ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, meta_ac); | ||
| 191 | if (ret) { | ||
| 192 | mlog_errno(ret); | ||
| 193 | goto out; | ||
| 194 | } | ||
| 195 | |||
| 196 | if (data_ac) { | ||
| 197 | ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac); | ||
| 198 | if (ret) { | ||
| 199 | mlog_errno(ret); | ||
| 200 | goto out; | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | *credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el, | ||
| 205 | clusters_to_move + 2); | ||
| 206 | |||
| 207 | mlog(0, "reserve metadata_blocks: %d, data_clusters: %u, credits: %d\n", | ||
| 208 | extra_blocks, clusters_to_move, *credits); | ||
| 209 | out: | ||
| 210 | if (ret) { | ||
| 211 | if (*meta_ac) { | ||
| 212 | ocfs2_free_alloc_context(*meta_ac); | ||
| 213 | *meta_ac = NULL; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 | return ret; | ||
| 218 | } | ||
| 219 | |||
| 220 | /* | ||
| 221 | * Using one journal handle to guarantee the data consistency in case | ||
| 222 | * crash happens anywhere. | ||
| 223 | * | ||
| 224 | * XXX: defrag can end up with finishing partial extent as requested, | ||
| 225 | * due to not enough contiguous clusters can be found in allocator. | ||
| 226 | */ | ||
| 227 | static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, | ||
| 228 | u32 cpos, u32 phys_cpos, u32 *len, int ext_flags) | ||
| 229 | { | ||
| 230 | int ret, credits = 0, extra_blocks = 0, partial = context->partial; | ||
| 231 | handle_t *handle; | ||
| 232 | struct inode *inode = context->inode; | ||
| 233 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 234 | struct inode *tl_inode = osb->osb_tl_inode; | ||
| 235 | struct ocfs2_refcount_tree *ref_tree = NULL; | ||
| 236 | u32 new_phys_cpos, new_len; | ||
| 237 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
| 238 | |||
| 239 | if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { | ||
| 240 | |||
| 241 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & | ||
| 242 | OCFS2_HAS_REFCOUNT_FL)); | ||
| 243 | |||
| 244 | BUG_ON(!context->refcount_loc); | ||
| 245 | |||
| 246 | ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, | ||
| 247 | &ref_tree, NULL); | ||
| 248 | if (ret) { | ||
| 249 | mlog_errno(ret); | ||
| 250 | return ret; | ||
| 251 | } | ||
| 252 | |||
| 253 | ret = ocfs2_prepare_refcount_change_for_del(inode, | ||
| 254 | context->refcount_loc, | ||
| 255 | phys_blkno, | ||
| 256 | *len, | ||
| 257 | &credits, | ||
| 258 | &extra_blocks); | ||
| 259 | if (ret) { | ||
| 260 | mlog_errno(ret); | ||
| 261 | goto out; | ||
| 262 | } | ||
| 263 | } | ||
| 264 | |||
| 265 | ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1, | ||
| 266 | &context->meta_ac, | ||
| 267 | &context->data_ac, | ||
| 268 | extra_blocks, &credits); | ||
| 269 | if (ret) { | ||
| 270 | mlog_errno(ret); | ||
| 271 | goto out; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* | ||
| 275 | * should be using allocation reservation strategy there? | ||
| 276 | * | ||
| 277 | * if (context->data_ac) | ||
| 278 | * context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; | ||
| 279 | */ | ||
| 280 | |||
| 281 | mutex_lock(&tl_inode->i_mutex); | ||
| 282 | |||
| 283 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
| 284 | ret = __ocfs2_flush_truncate_log(osb); | ||
| 285 | if (ret < 0) { | ||
| 286 | mlog_errno(ret); | ||
| 287 | goto out_unlock_mutex; | ||
| 288 | } | ||
| 289 | } | ||
| 290 | |||
| 291 | handle = ocfs2_start_trans(osb, credits); | ||
| 292 | if (IS_ERR(handle)) { | ||
| 293 | ret = PTR_ERR(handle); | ||
| 294 | mlog_errno(ret); | ||
| 295 | goto out_unlock_mutex; | ||
| 296 | } | ||
| 297 | |||
| 298 | ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len, | ||
| 299 | &new_phys_cpos, &new_len); | ||
| 300 | if (ret) { | ||
| 301 | mlog_errno(ret); | ||
| 302 | goto out_commit; | ||
| 303 | } | ||
| 304 | |||
| 305 | /* | ||
| 306 | * allowing partial extent moving is kind of 'pros and cons', it makes | ||
| 307 | * whole defragmentation less likely to fail, on the contrary, the bad | ||
| 308 | * thing is it may make the fs even more fragmented after moving, let | ||
| 309 | * userspace make a good decision here. | ||
| 310 | */ | ||
| 311 | if (new_len != *len) { | ||
| 312 | mlog(0, "len_claimed: %u, len: %u\n", new_len, *len); | ||
| 313 | if (!partial) { | ||
| 314 | context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; | ||
| 315 | ret = -ENOSPC; | ||
| 316 | goto out_commit; | ||
| 317 | } | ||
| 318 | } | ||
| 319 | |||
| 320 | mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos, | ||
| 321 | phys_cpos, new_phys_cpos); | ||
| 322 | |||
| 323 | ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos, | ||
| 324 | new_phys_cpos, ext_flags); | ||
| 325 | if (ret) | ||
| 326 | mlog_errno(ret); | ||
| 327 | |||
| 328 | if (partial && (new_len != *len)) | ||
| 329 | *len = new_len; | ||
| 330 | |||
| 331 | /* | ||
| 332 | * Here we should write the new page out first if we are | ||
| 333 | * in write-back mode. | ||
| 334 | */ | ||
| 335 | ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len); | ||
| 336 | if (ret) | ||
| 337 | mlog_errno(ret); | ||
| 338 | |||
| 339 | out_commit: | ||
| 340 | ocfs2_commit_trans(osb, handle); | ||
| 341 | |||
| 342 | out_unlock_mutex: | ||
| 343 | mutex_unlock(&tl_inode->i_mutex); | ||
| 344 | |||
| 345 | if (context->data_ac) { | ||
| 346 | ocfs2_free_alloc_context(context->data_ac); | ||
| 347 | context->data_ac = NULL; | ||
| 348 | } | ||
| 349 | |||
| 350 | if (context->meta_ac) { | ||
| 351 | ocfs2_free_alloc_context(context->meta_ac); | ||
| 352 | context->meta_ac = NULL; | ||
| 353 | } | ||
| 354 | |||
| 355 | out: | ||
| 356 | if (ref_tree) | ||
| 357 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
| 358 | |||
| 359 | return ret; | ||
| 360 | } | ||
| 361 | |||
| 362 | /* | ||
| 363 | * find the victim alloc group, where #blkno fits. | ||
| 364 | */ | ||
| 365 | static int ocfs2_find_victim_alloc_group(struct inode *inode, | ||
| 366 | u64 vict_blkno, | ||
| 367 | int type, int slot, | ||
| 368 | int *vict_bit, | ||
| 369 | struct buffer_head **ret_bh) | ||
| 370 | { | ||
| 371 | int ret, i, blocks_per_unit = 1; | ||
| 372 | u64 blkno; | ||
| 373 | char namebuf[40]; | ||
| 374 | |||
| 375 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 376 | struct buffer_head *ac_bh = NULL, *gd_bh = NULL; | ||
| 377 | struct ocfs2_chain_list *cl; | ||
| 378 | struct ocfs2_chain_rec *rec; | ||
| 379 | struct ocfs2_dinode *ac_dinode; | ||
| 380 | struct ocfs2_group_desc *bg; | ||
| 381 | |||
| 382 | ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot); | ||
| 383 | ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, | ||
| 384 | strlen(namebuf), &blkno); | ||
| 385 | if (ret) { | ||
| 386 | ret = -ENOENT; | ||
| 387 | goto out; | ||
| 388 | } | ||
| 389 | |||
| 390 | ret = ocfs2_read_blocks_sync(osb, blkno, 1, &ac_bh); | ||
| 391 | if (ret) { | ||
| 392 | mlog_errno(ret); | ||
| 393 | goto out; | ||
| 394 | } | ||
| 395 | |||
| 396 | ac_dinode = (struct ocfs2_dinode *)ac_bh->b_data; | ||
| 397 | cl = &(ac_dinode->id2.i_chain); | ||
| 398 | rec = &(cl->cl_recs[0]); | ||
| 399 | |||
| 400 | if (type == GLOBAL_BITMAP_SYSTEM_INODE) | ||
| 401 | blocks_per_unit <<= (osb->s_clustersize_bits - | ||
| 402 | inode->i_sb->s_blocksize_bits); | ||
| 403 | /* | ||
| 404 | * 'vict_blkno' was out of the valid range. | ||
| 405 | */ | ||
| 406 | if ((vict_blkno < le64_to_cpu(rec->c_blkno)) || | ||
| 407 | (vict_blkno >= (le32_to_cpu(ac_dinode->id1.bitmap1.i_total) * | ||
| 408 | blocks_per_unit))) { | ||
| 409 | ret = -EINVAL; | ||
| 410 | goto out; | ||
| 411 | } | ||
| 412 | |||
| 413 | for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i++) { | ||
| 414 | |||
| 415 | rec = &(cl->cl_recs[i]); | ||
| 416 | if (!rec) | ||
| 417 | continue; | ||
| 418 | |||
| 419 | bg = NULL; | ||
| 420 | |||
| 421 | do { | ||
| 422 | if (!bg) | ||
| 423 | blkno = le64_to_cpu(rec->c_blkno); | ||
| 424 | else | ||
| 425 | blkno = le64_to_cpu(bg->bg_next_group); | ||
| 426 | |||
| 427 | if (gd_bh) { | ||
| 428 | brelse(gd_bh); | ||
| 429 | gd_bh = NULL; | ||
| 430 | } | ||
| 431 | |||
| 432 | ret = ocfs2_read_blocks_sync(osb, blkno, 1, &gd_bh); | ||
| 433 | if (ret) { | ||
| 434 | mlog_errno(ret); | ||
| 435 | goto out; | ||
| 436 | } | ||
| 437 | |||
| 438 | bg = (struct ocfs2_group_desc *)gd_bh->b_data; | ||
| 439 | |||
| 440 | if (vict_blkno < (le64_to_cpu(bg->bg_blkno) + | ||
| 441 | le16_to_cpu(bg->bg_bits))) { | ||
| 442 | |||
| 443 | *ret_bh = gd_bh; | ||
| 444 | *vict_bit = (vict_blkno - blkno) / | ||
| 445 | blocks_per_unit; | ||
| 446 | mlog(0, "find the victim group: #%llu, " | ||
| 447 | "total_bits: %u, vict_bit: %u\n", | ||
| 448 | blkno, le16_to_cpu(bg->bg_bits), | ||
| 449 | *vict_bit); | ||
| 450 | goto out; | ||
| 451 | } | ||
| 452 | |||
| 453 | } while (le64_to_cpu(bg->bg_next_group)); | ||
| 454 | } | ||
| 455 | |||
| 456 | ret = -EINVAL; | ||
| 457 | out: | ||
| 458 | brelse(ac_bh); | ||
| 459 | |||
| 460 | /* | ||
| 461 | * caller has to release the gd_bh properly. | ||
| 462 | */ | ||
| 463 | return ret; | ||
| 464 | } | ||
| 465 | |||
| 466 | /* | ||
| 467 | * XXX: helper to validate and adjust moving goal. | ||
| 468 | */ | ||
| 469 | static int ocfs2_validate_and_adjust_move_goal(struct inode *inode, | ||
| 470 | struct ocfs2_move_extents *range) | ||
| 471 | { | ||
| 472 | int ret, goal_bit = 0; | ||
| 473 | |||
| 474 | struct buffer_head *gd_bh = NULL; | ||
| 475 | struct ocfs2_group_desc *bg; | ||
| 476 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 477 | int c_to_b = 1 << (osb->s_clustersize_bits - | ||
| 478 | inode->i_sb->s_blocksize_bits); | ||
| 479 | |||
| 480 | /* | ||
| 481 | * validate goal sits within global_bitmap, and return the victim | ||
| 482 | * group desc | ||
| 483 | */ | ||
| 484 | ret = ocfs2_find_victim_alloc_group(inode, range->me_goal, | ||
| 485 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
| 486 | OCFS2_INVALID_SLOT, | ||
| 487 | &goal_bit, &gd_bh); | ||
| 488 | if (ret) | ||
| 489 | goto out; | ||
| 490 | |||
| 491 | bg = (struct ocfs2_group_desc *)gd_bh->b_data; | ||
| 492 | |||
| 493 | /* | ||
| 494 | * make goal become cluster aligned. | ||
| 495 | */ | ||
| 496 | if (range->me_goal % c_to_b) | ||
| 497 | range->me_goal = range->me_goal / c_to_b * c_to_b; | ||
| 498 | |||
| 499 | /* | ||
| 500 | * moving goal is not allowd to start with a group desc blok(#0 blk) | ||
| 501 | * let's compromise to the latter cluster. | ||
| 502 | */ | ||
| 503 | if (range->me_goal == le64_to_cpu(bg->bg_blkno)) | ||
| 504 | range->me_goal += c_to_b; | ||
| 505 | |||
| 506 | /* | ||
| 507 | * movement is not gonna cross two groups. | ||
| 508 | */ | ||
| 509 | if ((le16_to_cpu(bg->bg_bits) - goal_bit) * osb->s_clustersize < | ||
| 510 | range->me_len) { | ||
| 511 | ret = -EINVAL; | ||
| 512 | goto out; | ||
| 513 | } | ||
| 514 | /* | ||
| 515 | * more exact validations/adjustments will be performed later during | ||
| 516 | * moving operation for each extent range. | ||
| 517 | */ | ||
| 518 | mlog(0, "extents get ready to be moved to #%llu block\n", | ||
| 519 | range->me_goal); | ||
| 520 | |||
| 521 | out: | ||
| 522 | brelse(gd_bh); | ||
| 523 | |||
| 524 | return ret; | ||
| 525 | } | ||
| 526 | |||
| 527 | static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh, | ||
| 528 | int *goal_bit, u32 move_len, u32 max_hop, | ||
| 529 | u32 *phys_cpos) | ||
| 530 | { | ||
| 531 | int i, used, last_free_bits = 0, base_bit = *goal_bit; | ||
| 532 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; | ||
| 533 | u32 base_cpos = ocfs2_blocks_to_clusters(inode->i_sb, | ||
| 534 | le64_to_cpu(gd->bg_blkno)); | ||
| 535 | |||
| 536 | for (i = base_bit; i < le16_to_cpu(gd->bg_bits); i++) { | ||
| 537 | |||
| 538 | used = ocfs2_test_bit(i, (unsigned long *)gd->bg_bitmap); | ||
| 539 | if (used) { | ||
| 540 | /* | ||
| 541 | * we even tried searching the free chunk by jumping | ||
| 542 | * a 'max_hop' distance, but still failed. | ||
| 543 | */ | ||
| 544 | if ((i - base_bit) > max_hop) { | ||
| 545 | *phys_cpos = 0; | ||
| 546 | break; | ||
| 547 | } | ||
| 548 | |||
| 549 | if (last_free_bits) | ||
| 550 | last_free_bits = 0; | ||
| 551 | |||
| 552 | continue; | ||
| 553 | } else | ||
| 554 | last_free_bits++; | ||
| 555 | |||
| 556 | if (last_free_bits == move_len) { | ||
| 557 | *goal_bit = i; | ||
| 558 | *phys_cpos = base_cpos + i; | ||
| 559 | break; | ||
| 560 | } | ||
| 561 | } | ||
| 562 | |||
| 563 | mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos); | ||
| 564 | } | ||
| 565 | |||
| 566 | static int ocfs2_alloc_dinode_update_counts(struct inode *inode, | ||
| 567 | handle_t *handle, | ||
| 568 | struct buffer_head *di_bh, | ||
| 569 | u32 num_bits, | ||
| 570 | u16 chain) | ||
| 571 | { | ||
| 572 | int ret; | ||
| 573 | u32 tmp_used; | ||
| 574 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; | ||
| 575 | struct ocfs2_chain_list *cl = | ||
| 576 | (struct ocfs2_chain_list *) &di->id2.i_chain; | ||
| 577 | |||
| 578 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
| 579 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 580 | if (ret < 0) { | ||
| 581 | mlog_errno(ret); | ||
| 582 | goto out; | ||
| 583 | } | ||
| 584 | |||
| 585 | tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); | ||
| 586 | di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); | ||
| 587 | le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); | ||
| 588 | ocfs2_journal_dirty(handle, di_bh); | ||
| 589 | |||
| 590 | out: | ||
| 591 | return ret; | ||
| 592 | } | ||
| 593 | |||
| 594 | static inline int ocfs2_block_group_set_bits(handle_t *handle, | ||
| 595 | struct inode *alloc_inode, | ||
| 596 | struct ocfs2_group_desc *bg, | ||
| 597 | struct buffer_head *group_bh, | ||
| 598 | unsigned int bit_off, | ||
| 599 | unsigned int num_bits) | ||
| 600 | { | ||
| 601 | int status; | ||
| 602 | void *bitmap = bg->bg_bitmap; | ||
| 603 | int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; | ||
| 604 | |||
| 605 | /* All callers get the descriptor via | ||
| 606 | * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ | ||
| 607 | BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); | ||
| 608 | BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); | ||
| 609 | |||
| 610 | mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, | ||
| 611 | num_bits); | ||
| 612 | |||
| 613 | if (ocfs2_is_cluster_bitmap(alloc_inode)) | ||
| 614 | journal_type = OCFS2_JOURNAL_ACCESS_UNDO; | ||
| 615 | |||
| 616 | status = ocfs2_journal_access_gd(handle, | ||
| 617 | INODE_CACHE(alloc_inode), | ||
| 618 | group_bh, | ||
| 619 | journal_type); | ||
| 620 | if (status < 0) { | ||
| 621 | mlog_errno(status); | ||
| 622 | goto bail; | ||
| 623 | } | ||
| 624 | |||
| 625 | le16_add_cpu(&bg->bg_free_bits_count, -num_bits); | ||
| 626 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { | ||
| 627 | ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" | ||
| 628 | " count %u but claims %u are freed. num_bits %d", | ||
| 629 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | ||
| 630 | le16_to_cpu(bg->bg_bits), | ||
| 631 | le16_to_cpu(bg->bg_free_bits_count), num_bits); | ||
| 632 | return -EROFS; | ||
| 633 | } | ||
| 634 | while (num_bits--) | ||
| 635 | ocfs2_set_bit(bit_off++, bitmap); | ||
| 636 | |||
| 637 | ocfs2_journal_dirty(handle, group_bh); | ||
| 638 | |||
| 639 | bail: | ||
| 640 | return status; | ||
| 641 | } | ||
| 642 | |||
| 643 | static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, | ||
| 644 | u32 cpos, u32 phys_cpos, u32 *new_phys_cpos, | ||
| 645 | u32 len, int ext_flags) | ||
| 646 | { | ||
| 647 | int ret, credits = 0, extra_blocks = 0, goal_bit = 0; | ||
| 648 | handle_t *handle; | ||
| 649 | struct inode *inode = context->inode; | ||
| 650 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 651 | struct inode *tl_inode = osb->osb_tl_inode; | ||
| 652 | struct inode *gb_inode = NULL; | ||
| 653 | struct buffer_head *gb_bh = NULL; | ||
| 654 | struct buffer_head *gd_bh = NULL; | ||
| 655 | struct ocfs2_group_desc *gd; | ||
| 656 | struct ocfs2_refcount_tree *ref_tree = NULL; | ||
| 657 | u32 move_max_hop = ocfs2_blocks_to_clusters(inode->i_sb, | ||
| 658 | context->range->me_threshold); | ||
| 659 | u64 phys_blkno, new_phys_blkno; | ||
| 660 | |||
| 661 | phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
| 662 | |||
| 663 | if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) { | ||
| 664 | |||
| 665 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & | ||
| 666 | OCFS2_HAS_REFCOUNT_FL)); | ||
| 667 | |||
| 668 | BUG_ON(!context->refcount_loc); | ||
| 669 | |||
| 670 | ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, | ||
| 671 | &ref_tree, NULL); | ||
| 672 | if (ret) { | ||
| 673 | mlog_errno(ret); | ||
| 674 | return ret; | ||
| 675 | } | ||
| 676 | |||
| 677 | ret = ocfs2_prepare_refcount_change_for_del(inode, | ||
| 678 | context->refcount_loc, | ||
| 679 | phys_blkno, | ||
| 680 | len, | ||
| 681 | &credits, | ||
| 682 | &extra_blocks); | ||
| 683 | if (ret) { | ||
| 684 | mlog_errno(ret); | ||
| 685 | goto out; | ||
| 686 | } | ||
| 687 | } | ||
| 688 | |||
| 689 | ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, | ||
| 690 | &context->meta_ac, | ||
| 691 | NULL, extra_blocks, &credits); | ||
| 692 | if (ret) { | ||
| 693 | mlog_errno(ret); | ||
| 694 | goto out; | ||
| 695 | } | ||
| 696 | |||
| 697 | /* | ||
| 698 | * need to count 2 extra credits for global_bitmap inode and | ||
| 699 | * group descriptor. | ||
| 700 | */ | ||
| 701 | credits += OCFS2_INODE_UPDATE_CREDITS + 1; | ||
| 702 | |||
| 703 | /* | ||
| 704 | * ocfs2_move_extent() didn't reserve any clusters in lock_allocators() | ||
| 705 | * logic, while we still need to lock the global_bitmap. | ||
| 706 | */ | ||
| 707 | gb_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, | ||
| 708 | OCFS2_INVALID_SLOT); | ||
| 709 | if (!gb_inode) { | ||
| 710 | mlog(ML_ERROR, "unable to get global_bitmap inode\n"); | ||
| 711 | ret = -EIO; | ||
| 712 | goto out; | ||
| 713 | } | ||
| 714 | |||
| 715 | mutex_lock(&gb_inode->i_mutex); | ||
| 716 | |||
| 717 | ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1); | ||
| 718 | if (ret) { | ||
| 719 | mlog_errno(ret); | ||
| 720 | goto out_unlock_gb_mutex; | ||
| 721 | } | ||
| 722 | |||
| 723 | mutex_lock(&tl_inode->i_mutex); | ||
| 724 | |||
| 725 | handle = ocfs2_start_trans(osb, credits); | ||
| 726 | if (IS_ERR(handle)) { | ||
| 727 | ret = PTR_ERR(handle); | ||
| 728 | mlog_errno(ret); | ||
| 729 | goto out_unlock_tl_inode; | ||
| 730 | } | ||
| 731 | |||
| 732 | new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos); | ||
| 733 | ret = ocfs2_find_victim_alloc_group(inode, new_phys_blkno, | ||
| 734 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
| 735 | OCFS2_INVALID_SLOT, | ||
| 736 | &goal_bit, &gd_bh); | ||
| 737 | if (ret) { | ||
| 738 | mlog_errno(ret); | ||
| 739 | goto out_commit; | ||
| 740 | } | ||
| 741 | |||
| 742 | /* | ||
| 743 | * probe the victim cluster group to find a proper | ||
| 744 | * region to fit wanted movement, it even will perfrom | ||
| 745 | * a best-effort attempt by compromising to a threshold | ||
| 746 | * around the goal. | ||
| 747 | */ | ||
| 748 | ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop, | ||
| 749 | new_phys_cpos); | ||
| 750 | if (!new_phys_cpos) { | ||
| 751 | ret = -ENOSPC; | ||
| 752 | goto out_commit; | ||
| 753 | } | ||
| 754 | |||
| 755 | ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos, | ||
| 756 | *new_phys_cpos, ext_flags); | ||
| 757 | if (ret) { | ||
| 758 | mlog_errno(ret); | ||
| 759 | goto out_commit; | ||
| 760 | } | ||
| 761 | |||
| 762 | gd = (struct ocfs2_group_desc *)gd_bh->b_data; | ||
| 763 | ret = ocfs2_alloc_dinode_update_counts(gb_inode, handle, gb_bh, len, | ||
| 764 | le16_to_cpu(gd->bg_chain)); | ||
| 765 | if (ret) { | ||
| 766 | mlog_errno(ret); | ||
| 767 | goto out_commit; | ||
| 768 | } | ||
| 769 | |||
| 770 | ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, | ||
| 771 | goal_bit, len); | ||
| 772 | if (ret) | ||
| 773 | mlog_errno(ret); | ||
| 774 | |||
| 775 | /* | ||
| 776 | * Here we should write the new page out first if we are | ||
| 777 | * in write-back mode. | ||
| 778 | */ | ||
| 779 | ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len); | ||
| 780 | if (ret) | ||
| 781 | mlog_errno(ret); | ||
| 782 | |||
| 783 | out_commit: | ||
| 784 | ocfs2_commit_trans(osb, handle); | ||
| 785 | brelse(gd_bh); | ||
| 786 | |||
| 787 | out_unlock_tl_inode: | ||
| 788 | mutex_unlock(&tl_inode->i_mutex); | ||
| 789 | |||
| 790 | ocfs2_inode_unlock(gb_inode, 1); | ||
| 791 | out_unlock_gb_mutex: | ||
| 792 | mutex_unlock(&gb_inode->i_mutex); | ||
| 793 | brelse(gb_bh); | ||
| 794 | iput(gb_inode); | ||
| 795 | |||
| 796 | out: | ||
| 797 | if (context->meta_ac) { | ||
| 798 | ocfs2_free_alloc_context(context->meta_ac); | ||
| 799 | context->meta_ac = NULL; | ||
| 800 | } | ||
| 801 | |||
| 802 | if (ref_tree) | ||
| 803 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
| 804 | |||
| 805 | return ret; | ||
| 806 | } | ||
| 807 | |||
| 808 | /* | ||
| 809 | * Helper to calculate the defraging length in one run according to threshold. | ||
| 810 | */ | ||
| 811 | static void ocfs2_calc_extent_defrag_len(u32 *alloc_size, u32 *len_defraged, | ||
| 812 | u32 threshold, int *skip) | ||
| 813 | { | ||
| 814 | if ((*alloc_size + *len_defraged) < threshold) { | ||
| 815 | /* | ||
| 816 | * proceed defragmentation until we meet the thresh | ||
| 817 | */ | ||
| 818 | *len_defraged += *alloc_size; | ||
| 819 | } else if (*len_defraged == 0) { | ||
| 820 | /* | ||
| 821 | * XXX: skip a large extent. | ||
| 822 | */ | ||
| 823 | *skip = 1; | ||
| 824 | } else { | ||
| 825 | /* | ||
| 826 | * split this extent to coalesce with former pieces as | ||
| 827 | * to reach the threshold. | ||
| 828 | * | ||
| 829 | * we're done here with one cycle of defragmentation | ||
| 830 | * in a size of 'thresh', resetting 'len_defraged' | ||
| 831 | * forces a new defragmentation. | ||
| 832 | */ | ||
| 833 | *alloc_size = threshold - *len_defraged; | ||
| 834 | *len_defraged = 0; | ||
| 835 | } | ||
| 836 | } | ||
| 837 | |||
| 838 | static int __ocfs2_move_extents_range(struct buffer_head *di_bh, | ||
| 839 | struct ocfs2_move_extents_context *context) | ||
| 840 | { | ||
| 841 | int ret = 0, flags, do_defrag, skip = 0; | ||
| 842 | u32 cpos, phys_cpos, move_start, len_to_move, alloc_size; | ||
| 843 | u32 len_defraged = 0, defrag_thresh = 0, new_phys_cpos = 0; | ||
| 844 | |||
| 845 | struct inode *inode = context->inode; | ||
| 846 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 847 | struct ocfs2_move_extents *range = context->range; | ||
| 848 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 849 | |||
| 850 | if ((inode->i_size == 0) || (range->me_len == 0)) | ||
| 851 | return 0; | ||
| 852 | |||
| 853 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | ||
| 854 | return 0; | ||
| 855 | |||
| 856 | context->refcount_loc = le64_to_cpu(di->i_refcount_loc); | ||
| 857 | |||
| 858 | ocfs2_init_dinode_extent_tree(&context->et, INODE_CACHE(inode), di_bh); | ||
| 859 | ocfs2_init_dealloc_ctxt(&context->dealloc); | ||
| 860 | |||
| 861 | /* | ||
| 862 | * TO-DO XXX: | ||
| 863 | * | ||
| 864 | * - xattr extents. | ||
| 865 | */ | ||
| 866 | |||
| 867 | do_defrag = context->auto_defrag; | ||
| 868 | |||
| 869 | /* | ||
| 870 | * extents moving happens in unit of clusters, for the sake | ||
| 871 | * of simplicity, we may ignore two clusters where 'byte_start' | ||
| 872 | * and 'byte_start + len' were within. | ||
| 873 | */ | ||
| 874 | move_start = ocfs2_clusters_for_bytes(osb->sb, range->me_start); | ||
| 875 | len_to_move = (range->me_start + range->me_len) >> | ||
| 876 | osb->s_clustersize_bits; | ||
| 877 | if (len_to_move >= move_start) | ||
| 878 | len_to_move -= move_start; | ||
| 879 | else | ||
| 880 | len_to_move = 0; | ||
| 881 | |||
| 882 | if (do_defrag) { | ||
| 883 | defrag_thresh = range->me_threshold >> osb->s_clustersize_bits; | ||
| 884 | if (defrag_thresh <= 1) | ||
| 885 | goto done; | ||
| 886 | } else | ||
| 887 | new_phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, | ||
| 888 | range->me_goal); | ||
| 889 | |||
| 890 | mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u, " | ||
| 891 | "thresh: %u\n", | ||
| 892 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 893 | (unsigned long long)range->me_start, | ||
| 894 | (unsigned long long)range->me_len, | ||
| 895 | move_start, len_to_move, defrag_thresh); | ||
| 896 | |||
| 897 | cpos = move_start; | ||
| 898 | while (len_to_move) { | ||
| 899 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &alloc_size, | ||
| 900 | &flags); | ||
| 901 | if (ret) { | ||
| 902 | mlog_errno(ret); | ||
| 903 | goto out; | ||
| 904 | } | ||
| 905 | |||
| 906 | if (alloc_size > len_to_move) | ||
| 907 | alloc_size = len_to_move; | ||
| 908 | |||
| 909 | /* | ||
| 910 | * XXX: how to deal with a hole: | ||
| 911 | * | ||
| 912 | * - skip the hole of course | ||
| 913 | * - force a new defragmentation | ||
| 914 | */ | ||
| 915 | if (!phys_cpos) { | ||
| 916 | if (do_defrag) | ||
| 917 | len_defraged = 0; | ||
| 918 | |||
| 919 | goto next; | ||
| 920 | } | ||
| 921 | |||
| 922 | if (do_defrag) { | ||
| 923 | ocfs2_calc_extent_defrag_len(&alloc_size, &len_defraged, | ||
| 924 | defrag_thresh, &skip); | ||
| 925 | /* | ||
| 926 | * skip large extents | ||
| 927 | */ | ||
| 928 | if (skip) { | ||
| 929 | skip = 0; | ||
| 930 | goto next; | ||
| 931 | } | ||
| 932 | |||
| 933 | mlog(0, "#Defrag: cpos: %u, phys_cpos: %u, " | ||
| 934 | "alloc_size: %u, len_defraged: %u\n", | ||
| 935 | cpos, phys_cpos, alloc_size, len_defraged); | ||
| 936 | |||
| 937 | ret = ocfs2_defrag_extent(context, cpos, phys_cpos, | ||
| 938 | &alloc_size, flags); | ||
| 939 | } else { | ||
| 940 | ret = ocfs2_move_extent(context, cpos, phys_cpos, | ||
| 941 | &new_phys_cpos, alloc_size, | ||
| 942 | flags); | ||
| 943 | |||
| 944 | new_phys_cpos += alloc_size; | ||
| 945 | } | ||
| 946 | |||
| 947 | if (ret < 0) { | ||
| 948 | mlog_errno(ret); | ||
| 949 | goto out; | ||
| 950 | } | ||
| 951 | |||
| 952 | context->clusters_moved += alloc_size; | ||
| 953 | next: | ||
| 954 | cpos += alloc_size; | ||
| 955 | len_to_move -= alloc_size; | ||
| 956 | } | ||
| 957 | |||
| 958 | done: | ||
| 959 | range->me_flags |= OCFS2_MOVE_EXT_FL_COMPLETE; | ||
| 960 | |||
| 961 | out: | ||
| 962 | range->me_moved_len = ocfs2_clusters_to_bytes(osb->sb, | ||
| 963 | context->clusters_moved); | ||
| 964 | range->me_new_offset = ocfs2_clusters_to_bytes(osb->sb, | ||
| 965 | context->new_phys_cpos); | ||
| 966 | |||
| 967 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
| 968 | ocfs2_run_deallocs(osb, &context->dealloc); | ||
| 969 | |||
| 970 | return ret; | ||
| 971 | } | ||
| 972 | |||
| 973 | static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) | ||
| 974 | { | ||
| 975 | int status; | ||
| 976 | handle_t *handle; | ||
| 977 | struct inode *inode = context->inode; | ||
| 978 | struct ocfs2_dinode *di; | ||
| 979 | struct buffer_head *di_bh = NULL; | ||
| 980 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 981 | |||
| 982 | if (!inode) | ||
| 983 | return -ENOENT; | ||
| 984 | |||
| 985 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
| 986 | return -EROFS; | ||
| 987 | |||
| 988 | mutex_lock(&inode->i_mutex); | ||
| 989 | |||
| 990 | /* | ||
| 991 | * This prevents concurrent writes from other nodes | ||
| 992 | */ | ||
| 993 | status = ocfs2_rw_lock(inode, 1); | ||
| 994 | if (status) { | ||
| 995 | mlog_errno(status); | ||
| 996 | goto out; | ||
| 997 | } | ||
| 998 | |||
| 999 | status = ocfs2_inode_lock(inode, &di_bh, 1); | ||
| 1000 | if (status) { | ||
| 1001 | mlog_errno(status); | ||
| 1002 | goto out_rw_unlock; | ||
| 1003 | } | ||
| 1004 | |||
| 1005 | /* | ||
| 1006 | * rememer ip_xattr_sem also needs to be held if necessary | ||
| 1007 | */ | ||
| 1008 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 1009 | |||
| 1010 | status = __ocfs2_move_extents_range(di_bh, context); | ||
| 1011 | |||
| 1012 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 1013 | if (status) { | ||
| 1014 | mlog_errno(status); | ||
| 1015 | goto out_inode_unlock; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | /* | ||
| 1019 | * We update ctime for these changes | ||
| 1020 | */ | ||
| 1021 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
| 1022 | if (IS_ERR(handle)) { | ||
| 1023 | status = PTR_ERR(handle); | ||
| 1024 | mlog_errno(status); | ||
| 1025 | goto out_inode_unlock; | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
| 1029 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 1030 | if (status) { | ||
| 1031 | mlog_errno(status); | ||
| 1032 | goto out_commit; | ||
| 1033 | } | ||
| 1034 | |||
| 1035 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 1036 | inode->i_ctime = CURRENT_TIME; | ||
| 1037 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
| 1038 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
| 1039 | |||
| 1040 | ocfs2_journal_dirty(handle, di_bh); | ||
| 1041 | |||
| 1042 | out_commit: | ||
| 1043 | ocfs2_commit_trans(osb, handle); | ||
| 1044 | |||
| 1045 | out_inode_unlock: | ||
| 1046 | brelse(di_bh); | ||
| 1047 | ocfs2_inode_unlock(inode, 1); | ||
| 1048 | out_rw_unlock: | ||
| 1049 | ocfs2_rw_unlock(inode, 1); | ||
| 1050 | out: | ||
| 1051 | mutex_unlock(&inode->i_mutex); | ||
| 1052 | |||
| 1053 | return status; | ||
| 1054 | } | ||
| 1055 | |||
| 1056 | int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) | ||
| 1057 | { | ||
| 1058 | int status; | ||
| 1059 | |||
| 1060 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
| 1061 | struct ocfs2_move_extents range; | ||
| 1062 | struct ocfs2_move_extents_context *context = NULL; | ||
| 1063 | |||
| 1064 | status = mnt_want_write(filp->f_path.mnt); | ||
| 1065 | if (status) | ||
| 1066 | return status; | ||
| 1067 | |||
| 1068 | if ((!S_ISREG(inode->i_mode)) || !(filp->f_mode & FMODE_WRITE)) | ||
| 1069 | goto out; | ||
| 1070 | |||
| 1071 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { | ||
| 1072 | status = -EPERM; | ||
| 1073 | goto out; | ||
| 1074 | } | ||
| 1075 | |||
| 1076 | context = kzalloc(sizeof(struct ocfs2_move_extents_context), GFP_NOFS); | ||
| 1077 | if (!context) { | ||
| 1078 | status = -ENOMEM; | ||
| 1079 | mlog_errno(status); | ||
| 1080 | goto out; | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | context->inode = inode; | ||
| 1084 | context->file = filp; | ||
| 1085 | |||
| 1086 | if (argp) { | ||
| 1087 | if (copy_from_user(&range, (struct ocfs2_move_extents *)argp, | ||
| 1088 | sizeof(range))) { | ||
| 1089 | status = -EFAULT; | ||
| 1090 | goto out; | ||
| 1091 | } | ||
| 1092 | } else { | ||
| 1093 | status = -EINVAL; | ||
| 1094 | goto out; | ||
| 1095 | } | ||
| 1096 | |||
| 1097 | if (range.me_start > i_size_read(inode)) | ||
| 1098 | goto out; | ||
| 1099 | |||
| 1100 | if (range.me_start + range.me_len > i_size_read(inode)) | ||
| 1101 | range.me_len = i_size_read(inode) - range.me_start; | ||
| 1102 | |||
| 1103 | context->range = ⦥ | ||
| 1104 | |||
| 1105 | if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) { | ||
| 1106 | context->auto_defrag = 1; | ||
| 1107 | /* | ||
| 1108 | * ok, the default theshold for the defragmentation | ||
| 1109 | * is 1M, since our maximum clustersize was 1M also. | ||
| 1110 | * any thought? | ||
| 1111 | */ | ||
| 1112 | if (!range.me_threshold) | ||
| 1113 | range.me_threshold = 1024 * 1024; | ||
| 1114 | |||
| 1115 | if (range.me_threshold > i_size_read(inode)) | ||
| 1116 | range.me_threshold = i_size_read(inode); | ||
| 1117 | |||
| 1118 | if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG) | ||
| 1119 | context->partial = 1; | ||
| 1120 | } else { | ||
| 1121 | /* | ||
| 1122 | * first best-effort attempt to validate and adjust the goal | ||
| 1123 | * (physical address in block), while it can't guarantee later | ||
| 1124 | * operation can succeed all the time since global_bitmap may | ||
| 1125 | * change a bit over time. | ||
| 1126 | */ | ||
| 1127 | |||
| 1128 | status = ocfs2_validate_and_adjust_move_goal(inode, &range); | ||
| 1129 | if (status) | ||
| 1130 | goto out; | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | status = ocfs2_move_extents(context); | ||
| 1134 | if (status) | ||
| 1135 | mlog_errno(status); | ||
| 1136 | out: | ||
| 1137 | /* | ||
| 1138 | * movement/defragmentation may end up being partially completed, | ||
| 1139 | * that's the reason why we need to return userspace the finished | ||
| 1140 | * length and new_offset even if failure happens somewhere. | ||
| 1141 | */ | ||
| 1142 | if (argp) { | ||
| 1143 | if (copy_to_user((struct ocfs2_move_extents *)argp, &range, | ||
| 1144 | sizeof(range))) | ||
| 1145 | status = -EFAULT; | ||
| 1146 | } | ||
| 1147 | |||
| 1148 | kfree(context); | ||
| 1149 | |||
| 1150 | mnt_drop_write(filp->f_path.mnt); | ||
| 1151 | |||
| 1152 | return status; | ||
| 1153 | } | ||
diff --git a/fs/ocfs2/move_extents.h b/fs/ocfs2/move_extents.h new file mode 100644 index 000000000000..4e143e811441 --- /dev/null +++ b/fs/ocfs2/move_extents.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * move_extents.h | ||
| 5 | * | ||
| 6 | * Copyright (C) 2011 Oracle. All rights reserved. | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public | ||
| 10 | * License version 2 as published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * General Public License for more details. | ||
| 16 | */ | ||
| 17 | #ifndef OCFS2_MOVE_EXTENTS_H | ||
| 18 | #define OCFS2_MOVE_EXTENTS_H | ||
| 19 | |||
| 20 | int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp); | ||
| 21 | |||
| 22 | #endif /* OCFS2_MOVE_EXTENTS_H */ | ||
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index b46f39bf7438..5b27ff1fa577 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h | |||
| @@ -142,6 +142,38 @@ struct ocfs2_info_journal_size { | |||
| 142 | __u64 ij_journal_size; | 142 | __u64 ij_journal_size; |
| 143 | }; | 143 | }; |
| 144 | 144 | ||
| 145 | struct ocfs2_info_freeinode { | ||
| 146 | struct ocfs2_info_request ifi_req; | ||
| 147 | struct ocfs2_info_local_freeinode { | ||
| 148 | __u64 lfi_total; | ||
| 149 | __u64 lfi_free; | ||
| 150 | } ifi_stat[OCFS2_MAX_SLOTS]; | ||
| 151 | __u32 ifi_slotnum; /* out */ | ||
| 152 | __u32 ifi_pad; | ||
| 153 | }; | ||
| 154 | |||
| 155 | #define OCFS2_INFO_MAX_HIST (32) | ||
| 156 | |||
| 157 | struct ocfs2_info_freefrag { | ||
| 158 | struct ocfs2_info_request iff_req; | ||
| 159 | struct ocfs2_info_freefrag_stats { /* (out) */ | ||
| 160 | struct ocfs2_info_free_chunk_list { | ||
| 161 | __u32 fc_chunks[OCFS2_INFO_MAX_HIST]; | ||
| 162 | __u32 fc_clusters[OCFS2_INFO_MAX_HIST]; | ||
| 163 | } ffs_fc_hist; | ||
| 164 | __u32 ffs_clusters; | ||
| 165 | __u32 ffs_free_clusters; | ||
| 166 | __u32 ffs_free_chunks; | ||
| 167 | __u32 ffs_free_chunks_real; | ||
| 168 | __u32 ffs_min; /* Minimum free chunksize in clusters */ | ||
| 169 | __u32 ffs_max; | ||
| 170 | __u32 ffs_avg; | ||
| 171 | __u32 ffs_pad; | ||
| 172 | } iff_ffs; | ||
| 173 | __u32 iff_chunksize; /* chunksize in clusters(in) */ | ||
| 174 | __u32 iff_pad; | ||
| 175 | }; | ||
| 176 | |||
| 145 | /* Codes for ocfs2_info_request */ | 177 | /* Codes for ocfs2_info_request */ |
| 146 | enum ocfs2_info_type { | 178 | enum ocfs2_info_type { |
| 147 | OCFS2_INFO_CLUSTERSIZE = 1, | 179 | OCFS2_INFO_CLUSTERSIZE = 1, |
| @@ -151,6 +183,8 @@ enum ocfs2_info_type { | |||
| 151 | OCFS2_INFO_UUID, | 183 | OCFS2_INFO_UUID, |
| 152 | OCFS2_INFO_FS_FEATURES, | 184 | OCFS2_INFO_FS_FEATURES, |
| 153 | OCFS2_INFO_JOURNAL_SIZE, | 185 | OCFS2_INFO_JOURNAL_SIZE, |
| 186 | OCFS2_INFO_FREEINODE, | ||
| 187 | OCFS2_INFO_FREEFRAG, | ||
| 154 | OCFS2_INFO_NUM_TYPES | 188 | OCFS2_INFO_NUM_TYPES |
| 155 | }; | 189 | }; |
| 156 | 190 | ||
| @@ -171,4 +205,38 @@ enum ocfs2_info_type { | |||
| 171 | 205 | ||
| 172 | #define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) | 206 | #define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) |
| 173 | 207 | ||
| 208 | struct ocfs2_move_extents { | ||
| 209 | /* All values are in bytes */ | ||
| 210 | /* in */ | ||
| 211 | __u64 me_start; /* Virtual start in the file to move */ | ||
| 212 | __u64 me_len; /* Length of the extents to be moved */ | ||
| 213 | __u64 me_goal; /* Physical offset of the goal, | ||
| 214 | it's in block unit */ | ||
| 215 | __u64 me_threshold; /* Maximum distance from goal or threshold | ||
| 216 | for auto defragmentation */ | ||
| 217 | __u64 me_flags; /* Flags for the operation: | ||
| 218 | * - auto defragmentation. | ||
| 219 | * - refcount,xattr cases. | ||
| 220 | */ | ||
| 221 | /* out */ | ||
| 222 | __u64 me_moved_len; /* Moved/defraged length */ | ||
| 223 | __u64 me_new_offset; /* Resulting physical location */ | ||
| 224 | __u32 me_reserved[2]; /* Reserved for futhure */ | ||
| 225 | }; | ||
| 226 | |||
| 227 | #define OCFS2_MOVE_EXT_FL_AUTO_DEFRAG (0x00000001) /* Kernel manages to | ||
| 228 | claim new clusters | ||
| 229 | as the goal place | ||
| 230 | for extents moving */ | ||
| 231 | #define OCFS2_MOVE_EXT_FL_PART_DEFRAG (0x00000002) /* Allow partial extent | ||
| 232 | moving, is to make | ||
| 233 | movement less likely | ||
| 234 | to fail, may make fs | ||
| 235 | even more fragmented */ | ||
| 236 | #define OCFS2_MOVE_EXT_FL_COMPLETE (0x00000004) /* Move or defragmenation | ||
| 237 | completely gets done. | ||
| 238 | */ | ||
| 239 | |||
| 240 | #define OCFS2_IOC_MOVE_EXT _IOW('o', 6, struct ocfs2_move_extents) | ||
| 241 | |||
| 174 | #endif /* OCFS2_IOCTL_H */ | 242 | #endif /* OCFS2_IOCTL_H */ |
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index a1dae5bb54ac..3b481f490633 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h | |||
| @@ -688,6 +688,31 @@ TRACE_EVENT(ocfs2_cache_block_dealloc, | |||
| 688 | __entry->blkno, __entry->bit) | 688 | __entry->blkno, __entry->bit) |
| 689 | ); | 689 | ); |
| 690 | 690 | ||
| 691 | TRACE_EVENT(ocfs2_trim_extent, | ||
| 692 | TP_PROTO(struct super_block *sb, unsigned long long blk, | ||
| 693 | unsigned long long count), | ||
| 694 | TP_ARGS(sb, blk, count), | ||
| 695 | TP_STRUCT__entry( | ||
| 696 | __field(int, dev_major) | ||
| 697 | __field(int, dev_minor) | ||
| 698 | __field(unsigned long long, blk) | ||
| 699 | __field(__u64, count) | ||
| 700 | ), | ||
| 701 | TP_fast_assign( | ||
| 702 | __entry->dev_major = MAJOR(sb->s_dev); | ||
| 703 | __entry->dev_minor = MINOR(sb->s_dev); | ||
| 704 | __entry->blk = blk; | ||
| 705 | __entry->count = count; | ||
| 706 | ), | ||
| 707 | TP_printk("%d %d %llu %llu", | ||
| 708 | __entry->dev_major, __entry->dev_minor, | ||
| 709 | __entry->blk, __entry->count) | ||
| 710 | ); | ||
| 711 | |||
| 712 | DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_trim_group); | ||
| 713 | |||
| 714 | DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_trim_fs); | ||
| 715 | |||
| 691 | /* End of trace events for fs/ocfs2/alloc.c. */ | 716 | /* End of trace events for fs/ocfs2/alloc.c. */ |
| 692 | 717 | ||
| 693 | /* Trace events for fs/ocfs2/localalloc.c. */ | 718 | /* Trace events for fs/ocfs2/localalloc.c. */ |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 3c7606cff1ab..ebfd3825f12a 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
| @@ -66,7 +66,7 @@ struct ocfs2_cow_context { | |||
| 66 | u32 *num_clusters, | 66 | u32 *num_clusters, |
| 67 | unsigned int *extent_flags); | 67 | unsigned int *extent_flags); |
| 68 | int (*cow_duplicate_clusters)(handle_t *handle, | 68 | int (*cow_duplicate_clusters)(handle_t *handle, |
| 69 | struct ocfs2_cow_context *context, | 69 | struct file *file, |
| 70 | u32 cpos, u32 old_cluster, | 70 | u32 cpos, u32 old_cluster, |
| 71 | u32 new_cluster, u32 new_len); | 71 | u32 new_cluster, u32 new_len); |
| 72 | }; | 72 | }; |
| @@ -2921,20 +2921,21 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) | |||
| 2921 | return 0; | 2921 | return 0; |
| 2922 | } | 2922 | } |
| 2923 | 2923 | ||
| 2924 | static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | 2924 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, |
| 2925 | struct ocfs2_cow_context *context, | 2925 | struct file *file, |
| 2926 | u32 cpos, u32 old_cluster, | 2926 | u32 cpos, u32 old_cluster, |
| 2927 | u32 new_cluster, u32 new_len) | 2927 | u32 new_cluster, u32 new_len) |
| 2928 | { | 2928 | { |
| 2929 | int ret = 0, partial; | 2929 | int ret = 0, partial; |
| 2930 | struct ocfs2_caching_info *ci = context->data_et.et_ci; | 2930 | struct inode *inode = file->f_path.dentry->d_inode; |
| 2931 | struct ocfs2_caching_info *ci = INODE_CACHE(inode); | ||
| 2931 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | 2932 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); |
| 2932 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | 2933 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); |
| 2933 | struct page *page; | 2934 | struct page *page; |
| 2934 | pgoff_t page_index; | 2935 | pgoff_t page_index; |
| 2935 | unsigned int from, to, readahead_pages; | 2936 | unsigned int from, to, readahead_pages; |
| 2936 | loff_t offset, end, map_end; | 2937 | loff_t offset, end, map_end; |
| 2937 | struct address_space *mapping = context->inode->i_mapping; | 2938 | struct address_space *mapping = inode->i_mapping; |
| 2938 | 2939 | ||
| 2939 | trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster, | 2940 | trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster, |
| 2940 | new_cluster, new_len); | 2941 | new_cluster, new_len); |
| @@ -2948,8 +2949,8 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
| 2948 | * We only duplicate pages until we reach the page contains i_size - 1. | 2949 | * We only duplicate pages until we reach the page contains i_size - 1. |
| 2949 | * So trim 'end' to i_size. | 2950 | * So trim 'end' to i_size. |
| 2950 | */ | 2951 | */ |
| 2951 | if (end > i_size_read(context->inode)) | 2952 | if (end > i_size_read(inode)) |
| 2952 | end = i_size_read(context->inode); | 2953 | end = i_size_read(inode); |
| 2953 | 2954 | ||
| 2954 | while (offset < end) { | 2955 | while (offset < end) { |
| 2955 | page_index = offset >> PAGE_CACHE_SHIFT; | 2956 | page_index = offset >> PAGE_CACHE_SHIFT; |
| @@ -2972,10 +2973,9 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
| 2972 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) | 2973 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) |
| 2973 | BUG_ON(PageDirty(page)); | 2974 | BUG_ON(PageDirty(page)); |
| 2974 | 2975 | ||
| 2975 | if (PageReadahead(page) && context->file) { | 2976 | if (PageReadahead(page)) { |
| 2976 | page_cache_async_readahead(mapping, | 2977 | page_cache_async_readahead(mapping, |
| 2977 | &context->file->f_ra, | 2978 | &file->f_ra, file, |
| 2978 | context->file, | ||
| 2979 | page, page_index, | 2979 | page, page_index, |
| 2980 | readahead_pages); | 2980 | readahead_pages); |
| 2981 | } | 2981 | } |
| @@ -2999,8 +2999,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
| 2999 | } | 2999 | } |
| 3000 | } | 3000 | } |
| 3001 | 3001 | ||
| 3002 | ocfs2_map_and_dirty_page(context->inode, | 3002 | ocfs2_map_and_dirty_page(inode, handle, from, to, |
| 3003 | handle, from, to, | ||
| 3004 | page, 0, &new_block); | 3003 | page, 0, &new_block); |
| 3005 | mark_page_accessed(page); | 3004 | mark_page_accessed(page); |
| 3006 | unlock: | 3005 | unlock: |
| @@ -3015,14 +3014,15 @@ unlock: | |||
| 3015 | return ret; | 3014 | return ret; |
| 3016 | } | 3015 | } |
| 3017 | 3016 | ||
| 3018 | static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, | 3017 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, |
| 3019 | struct ocfs2_cow_context *context, | 3018 | struct file *file, |
| 3020 | u32 cpos, u32 old_cluster, | 3019 | u32 cpos, u32 old_cluster, |
| 3021 | u32 new_cluster, u32 new_len) | 3020 | u32 new_cluster, u32 new_len) |
| 3022 | { | 3021 | { |
| 3023 | int ret = 0; | 3022 | int ret = 0; |
| 3024 | struct super_block *sb = context->inode->i_sb; | 3023 | struct inode *inode = file->f_path.dentry->d_inode; |
| 3025 | struct ocfs2_caching_info *ci = context->data_et.et_ci; | 3024 | struct super_block *sb = inode->i_sb; |
| 3025 | struct ocfs2_caching_info *ci = INODE_CACHE(inode); | ||
| 3026 | int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); | 3026 | int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); |
| 3027 | u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster); | 3027 | u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster); |
| 3028 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | 3028 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); |
| @@ -3145,8 +3145,8 @@ static int ocfs2_replace_clusters(handle_t *handle, | |||
| 3145 | 3145 | ||
| 3146 | /*If the old clusters is unwritten, no need to duplicate. */ | 3146 | /*If the old clusters is unwritten, no need to duplicate. */ |
| 3147 | if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { | 3147 | if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { |
| 3148 | ret = context->cow_duplicate_clusters(handle, context, cpos, | 3148 | ret = context->cow_duplicate_clusters(handle, context->file, |
| 3149 | old, new, len); | 3149 | cpos, old, new, len); |
| 3150 | if (ret) { | 3150 | if (ret) { |
| 3151 | mlog_errno(ret); | 3151 | mlog_errno(ret); |
| 3152 | goto out; | 3152 | goto out; |
| @@ -3162,22 +3162,22 @@ out: | |||
| 3162 | return ret; | 3162 | return ret; |
| 3163 | } | 3163 | } |
| 3164 | 3164 | ||
| 3165 | static int ocfs2_cow_sync_writeback(struct super_block *sb, | 3165 | int ocfs2_cow_sync_writeback(struct super_block *sb, |
| 3166 | struct ocfs2_cow_context *context, | 3166 | struct inode *inode, |
| 3167 | u32 cpos, u32 num_clusters) | 3167 | u32 cpos, u32 num_clusters) |
| 3168 | { | 3168 | { |
| 3169 | int ret = 0; | 3169 | int ret = 0; |
| 3170 | loff_t offset, end, map_end; | 3170 | loff_t offset, end, map_end; |
| 3171 | pgoff_t page_index; | 3171 | pgoff_t page_index; |
| 3172 | struct page *page; | 3172 | struct page *page; |
| 3173 | 3173 | ||
| 3174 | if (ocfs2_should_order_data(context->inode)) | 3174 | if (ocfs2_should_order_data(inode)) |
| 3175 | return 0; | 3175 | return 0; |
| 3176 | 3176 | ||
| 3177 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; | 3177 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; |
| 3178 | end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits); | 3178 | end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits); |
| 3179 | 3179 | ||
| 3180 | ret = filemap_fdatawrite_range(context->inode->i_mapping, | 3180 | ret = filemap_fdatawrite_range(inode->i_mapping, |
| 3181 | offset, end - 1); | 3181 | offset, end - 1); |
| 3182 | if (ret < 0) { | 3182 | if (ret < 0) { |
| 3183 | mlog_errno(ret); | 3183 | mlog_errno(ret); |
| @@ -3190,7 +3190,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, | |||
| 3190 | if (map_end > end) | 3190 | if (map_end > end) |
| 3191 | map_end = end; | 3191 | map_end = end; |
| 3192 | 3192 | ||
| 3193 | page = find_or_create_page(context->inode->i_mapping, | 3193 | page = find_or_create_page(inode->i_mapping, |
| 3194 | page_index, GFP_NOFS); | 3194 | page_index, GFP_NOFS); |
| 3195 | BUG_ON(!page); | 3195 | BUG_ON(!page); |
| 3196 | 3196 | ||
| @@ -3349,7 +3349,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb, | |||
| 3349 | * in write-back mode. | 3349 | * in write-back mode. |
| 3350 | */ | 3350 | */ |
| 3351 | if (context->get_clusters == ocfs2_di_get_clusters) { | 3351 | if (context->get_clusters == ocfs2_di_get_clusters) { |
| 3352 | ret = ocfs2_cow_sync_writeback(sb, context, cpos, | 3352 | ret = ocfs2_cow_sync_writeback(sb, context->inode, cpos, |
| 3353 | orig_num_clusters); | 3353 | orig_num_clusters); |
| 3354 | if (ret) | 3354 | if (ret) |
| 3355 | mlog_errno(ret); | 3355 | mlog_errno(ret); |
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index c8ce46f7d8e3..7754608c83a4 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h | |||
| @@ -84,6 +84,17 @@ int ocfs2_refcount_cow_xattr(struct inode *inode, | |||
| 84 | struct buffer_head *ref_root_bh, | 84 | struct buffer_head *ref_root_bh, |
| 85 | u32 cpos, u32 write_len, | 85 | u32 cpos, u32 write_len, |
| 86 | struct ocfs2_post_refcount *post); | 86 | struct ocfs2_post_refcount *post); |
| 87 | int ocfs2_duplicate_clusters_by_page(handle_t *handle, | ||
| 88 | struct file *file, | ||
| 89 | u32 cpos, u32 old_cluster, | ||
| 90 | u32 new_cluster, u32 new_len); | ||
| 91 | int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, | ||
| 92 | struct file *file, | ||
| 93 | u32 cpos, u32 old_cluster, | ||
| 94 | u32 new_cluster, u32 new_len); | ||
| 95 | int ocfs2_cow_sync_writeback(struct super_block *sb, | ||
| 96 | struct inode *inode, | ||
| 97 | u32 cpos, u32 num_clusters); | ||
| 87 | int ocfs2_add_refcount_flag(struct inode *inode, | 98 | int ocfs2_add_refcount_flag(struct inode *inode, |
| 88 | struct ocfs2_extent_tree *data_et, | 99 | struct ocfs2_extent_tree *data_et, |
| 89 | struct ocfs2_caching_info *ref_ci, | 100 | struct ocfs2_caching_info *ref_ci, |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 4129fb671d71..cdbaf5e97308 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -1567,7 +1567,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 1567 | if (osb->preferred_slot != OCFS2_INVALID_SLOT) | 1567 | if (osb->preferred_slot != OCFS2_INVALID_SLOT) |
| 1568 | seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); | 1568 | seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); |
| 1569 | 1569 | ||
| 1570 | if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) | 1570 | if (!(mnt->mnt_flags & MNT_NOATIME) && !(mnt->mnt_flags & MNT_RELATIME)) |
| 1571 | seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); | 1571 | seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); |
| 1572 | 1572 | ||
| 1573 | if (osb->osb_commit_interval) | 1573 | if (osb->osb_commit_interval) |
