diff options
| -rw-r--r-- | Documentation/ABI/removed/o2cb (renamed from Documentation/ABI/obsolete/o2cb) | 9 | ||||
| -rw-r--r-- | Documentation/feature-removal-schedule.txt | 10 | ||||
| -rw-r--r-- | Documentation/filesystems/ocfs2.txt | 8 | ||||
| -rw-r--r-- | fs/ocfs2/alloc.c | 166 | ||||
| -rw-r--r-- | fs/ocfs2/alloc.h | 1 | ||||
| -rw-r--r-- | fs/ocfs2/cluster/sys.c | 9 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmcommon.h | 14 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmdebug.c | 6 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 94 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 255 | ||||
| -rw-r--r-- | fs/ocfs2/dlm/dlmrecovery.c | 1 | ||||
| -rw-r--r-- | fs/ocfs2/dlmfs/dlmfs.c | 2 | ||||
| -rw-r--r-- | fs/ocfs2/file.c | 1 | ||||
| -rw-r--r-- | fs/ocfs2/ioctl.c | 24 | ||||
| -rw-r--r-- | fs/ocfs2/ocfs2_trace.h | 25 | ||||
| -rw-r--r-- | fs/ocfs2/super.c | 2 |
16 files changed, 421 insertions, 206 deletions
diff --git a/Documentation/ABI/obsolete/o2cb b/Documentation/ABI/removed/o2cb index 9c49d8e6c0c..7f5daa46509 100644 --- a/Documentation/ABI/obsolete/o2cb +++ b/Documentation/ABI/removed/o2cb | |||
| @@ -1,11 +1,10 @@ | |||
| 1 | What: /sys/o2cb symlink | 1 | What: /sys/o2cb symlink |
| 2 | Date: Dec 2005 | 2 | Date: May 2011 |
| 3 | KernelVersion: 2.6.16 | 3 | KernelVersion: 2.6.40 |
| 4 | Contact: ocfs2-devel@oss.oracle.com | 4 | Contact: ocfs2-devel@oss.oracle.com |
| 5 | Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will | 5 | Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink is |
| 6 | be removed when new versions of ocfs2-tools which know to look | 6 | removed when new versions of ocfs2-tools which know to look |
| 7 | in /sys/fs/o2cb are sufficiently prevalent. Don't code new | 7 | in /sys/fs/o2cb are sufficiently prevalent. Don't code new |
| 8 | software to look here, it should try /sys/fs/o2cb instead. | 8 | software to look here, it should try /sys/fs/o2cb instead. |
| 9 | See Documentation/ABI/stable/o2cb for more information on usage. | ||
| 10 | Users: ocfs2-tools. It's sufficient to mail proposed changes to | 9 | Users: ocfs2-tools. It's sufficient to mail proposed changes to |
| 11 | ocfs2-devel@oss.oracle.com. | 10 | ocfs2-devel@oss.oracle.com. |
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 95788ad2506..ff31b1cc50a 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
| @@ -262,16 +262,6 @@ Who: Michael Buesch <mb@bu3sch.de> | |||
| 262 | 262 | ||
| 263 | --------------------------- | 263 | --------------------------- |
| 264 | 264 | ||
| 265 | What: /sys/o2cb symlink | ||
| 266 | When: January 2010 | ||
| 267 | Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb | ||
| 268 | exists as a symlink for backwards compatibility for old versions of | ||
| 269 | ocfs2-tools. 2 years should be sufficient time to phase in new versions | ||
| 270 | which know to look in /sys/fs/o2cb. | ||
| 271 | Who: ocfs2-devel@oss.oracle.com | ||
| 272 | |||
| 273 | --------------------------- | ||
| 274 | |||
| 275 | What: Ability for non root users to shm_get hugetlb pages based on mlock | 265 | What: Ability for non root users to shm_get hugetlb pages based on mlock |
| 276 | resource limits | 266 | resource limits |
| 277 | When: 2.6.31 | 267 | When: 2.6.31 |
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 9ed920a8cd7..7618a287aa4 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt | |||
| @@ -46,9 +46,15 @@ errors=panic Panic and halt the machine if an error occurs. | |||
| 46 | intr (*) Allow signals to interrupt cluster operations. | 46 | intr (*) Allow signals to interrupt cluster operations. |
| 47 | nointr Do not allow signals to interrupt cluster | 47 | nointr Do not allow signals to interrupt cluster |
| 48 | operations. | 48 | operations. |
| 49 | noatime Do not update access time. | ||
| 50 | relatime(*) Update atime if the previous atime is older than | ||
| 51 | mtime or ctime | ||
| 52 | strictatime Always update atime, but the minimum update interval | ||
| 53 | is specified by atime_quantum. | ||
| 49 | atime_quantum=60(*) OCFS2 will not update atime unless this number | 54 | atime_quantum=60(*) OCFS2 will not update atime unless this number |
| 50 | of seconds has passed since the last update. | 55 | of seconds has passed since the last update. |
| 51 | Set to zero to always update atime. | 56 | Set to zero to always update atime. This option need |
| 57 | work with strictatime. | ||
| 52 | data=ordered (*) All data are forced directly out to the main file | 58 | data=ordered (*) All data are forced directly out to the main file |
| 53 | system prior to its metadata being committed to the | 59 | system prior to its metadata being committed to the |
| 54 | journal. | 60 | journal. |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 48aa9c7401c..ed553c60de8 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
| 30 | #include <linux/swap.h> | 30 | #include <linux/swap.h> |
| 31 | #include <linux/quotaops.h> | 31 | #include <linux/quotaops.h> |
| 32 | #include <linux/blkdev.h> | ||
| 32 | 33 | ||
| 33 | #include <cluster/masklog.h> | 34 | #include <cluster/masklog.h> |
| 34 | 35 | ||
| @@ -7184,3 +7185,168 @@ out_commit: | |||
| 7184 | out: | 7185 | out: |
| 7185 | return ret; | 7186 | return ret; |
| 7186 | } | 7187 | } |
| 7188 | |||
| 7189 | static int ocfs2_trim_extent(struct super_block *sb, | ||
| 7190 | struct ocfs2_group_desc *gd, | ||
| 7191 | u32 start, u32 count) | ||
| 7192 | { | ||
| 7193 | u64 discard, bcount; | ||
| 7194 | |||
| 7195 | bcount = ocfs2_clusters_to_blocks(sb, count); | ||
| 7196 | discard = le64_to_cpu(gd->bg_blkno) + | ||
| 7197 | ocfs2_clusters_to_blocks(sb, start); | ||
| 7198 | |||
| 7199 | trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount); | ||
| 7200 | |||
| 7201 | return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0); | ||
| 7202 | } | ||
| 7203 | |||
| 7204 | static int ocfs2_trim_group(struct super_block *sb, | ||
| 7205 | struct ocfs2_group_desc *gd, | ||
| 7206 | u32 start, u32 max, u32 minbits) | ||
| 7207 | { | ||
| 7208 | int ret = 0, count = 0, next; | ||
| 7209 | void *bitmap = gd->bg_bitmap; | ||
| 7210 | |||
| 7211 | if (le16_to_cpu(gd->bg_free_bits_count) < minbits) | ||
| 7212 | return 0; | ||
| 7213 | |||
| 7214 | trace_ocfs2_trim_group((unsigned long long)le64_to_cpu(gd->bg_blkno), | ||
| 7215 | start, max, minbits); | ||
| 7216 | |||
| 7217 | while (start < max) { | ||
| 7218 | start = ocfs2_find_next_zero_bit(bitmap, max, start); | ||
| 7219 | if (start >= max) | ||
| 7220 | break; | ||
| 7221 | next = ocfs2_find_next_bit(bitmap, max, start); | ||
| 7222 | |||
| 7223 | if ((next - start) >= minbits) { | ||
| 7224 | ret = ocfs2_trim_extent(sb, gd, | ||
| 7225 | start, next - start); | ||
| 7226 | if (ret < 0) { | ||
| 7227 | mlog_errno(ret); | ||
| 7228 | break; | ||
| 7229 | } | ||
| 7230 | count += next - start; | ||
| 7231 | } | ||
| 7232 | start = next + 1; | ||
| 7233 | |||
| 7234 | if (fatal_signal_pending(current)) { | ||
| 7235 | count = -ERESTARTSYS; | ||
| 7236 | break; | ||
| 7237 | } | ||
| 7238 | |||
| 7239 | if ((le16_to_cpu(gd->bg_free_bits_count) - count) < minbits) | ||
| 7240 | break; | ||
| 7241 | } | ||
| 7242 | |||
| 7243 | if (ret < 0) | ||
| 7244 | count = ret; | ||
| 7245 | |||
| 7246 | return count; | ||
| 7247 | } | ||
| 7248 | |||
| 7249 | int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) | ||
| 7250 | { | ||
| 7251 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
| 7252 | u64 start, len, trimmed, first_group, last_group, group; | ||
| 7253 | int ret, cnt; | ||
| 7254 | u32 first_bit, last_bit, minlen; | ||
| 7255 | struct buffer_head *main_bm_bh = NULL; | ||
| 7256 | struct inode *main_bm_inode = NULL; | ||
| 7257 | struct buffer_head *gd_bh = NULL; | ||
| 7258 | struct ocfs2_dinode *main_bm; | ||
| 7259 | struct ocfs2_group_desc *gd = NULL; | ||
| 7260 | |||
| 7261 | start = range->start >> osb->s_clustersize_bits; | ||
| 7262 | len = range->len >> osb->s_clustersize_bits; | ||
| 7263 | minlen = range->minlen >> osb->s_clustersize_bits; | ||
| 7264 | trimmed = 0; | ||
| 7265 | |||
| 7266 | if (!len) { | ||
| 7267 | range->len = 0; | ||
| 7268 | return 0; | ||
| 7269 | } | ||
| 7270 | |||
| 7271 | if (minlen >= osb->bitmap_cpg) | ||
| 7272 | return -EINVAL; | ||
| 7273 | |||
| 7274 | main_bm_inode = ocfs2_get_system_file_inode(osb, | ||
| 7275 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
| 7276 | OCFS2_INVALID_SLOT); | ||
| 7277 | if (!main_bm_inode) { | ||
| 7278 | ret = -EIO; | ||
| 7279 | mlog_errno(ret); | ||
| 7280 | goto out; | ||
| 7281 | } | ||
| 7282 | |||
| 7283 | mutex_lock(&main_bm_inode->i_mutex); | ||
| 7284 | |||
| 7285 | ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0); | ||
| 7286 | if (ret < 0) { | ||
| 7287 | mlog_errno(ret); | ||
| 7288 | goto out_mutex; | ||
| 7289 | } | ||
| 7290 | main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data; | ||
| 7291 | |||
| 7292 | if (start >= le32_to_cpu(main_bm->i_clusters)) { | ||
| 7293 | ret = -EINVAL; | ||
| 7294 | goto out_unlock; | ||
| 7295 | } | ||
| 7296 | |||
| 7297 | if (start + len > le32_to_cpu(main_bm->i_clusters)) | ||
| 7298 | len = le32_to_cpu(main_bm->i_clusters) - start; | ||
| 7299 | |||
| 7300 | trace_ocfs2_trim_fs(start, len, minlen); | ||
| 7301 | |||
| 7302 | /* Determine first and last group to examine based on start and len */ | ||
| 7303 | first_group = ocfs2_which_cluster_group(main_bm_inode, start); | ||
| 7304 | if (first_group == osb->first_cluster_group_blkno) | ||
| 7305 | first_bit = start; | ||
| 7306 | else | ||
| 7307 | first_bit = start - ocfs2_blocks_to_clusters(sb, first_group); | ||
| 7308 | last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1); | ||
| 7309 | last_bit = osb->bitmap_cpg; | ||
| 7310 | |||
| 7311 | for (group = first_group; group <= last_group;) { | ||
| 7312 | if (first_bit + len >= osb->bitmap_cpg) | ||
| 7313 | last_bit = osb->bitmap_cpg; | ||
| 7314 | else | ||
| 7315 | last_bit = first_bit + len; | ||
| 7316 | |||
| 7317 | ret = ocfs2_read_group_descriptor(main_bm_inode, | ||
| 7318 | main_bm, group, | ||
| 7319 | &gd_bh); | ||
| 7320 | if (ret < 0) { | ||
| 7321 | mlog_errno(ret); | ||
| 7322 | break; | ||
| 7323 | } | ||
| 7324 | |||
| 7325 | gd = (struct ocfs2_group_desc *)gd_bh->b_data; | ||
| 7326 | cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen); | ||
| 7327 | brelse(gd_bh); | ||
| 7328 | gd_bh = NULL; | ||
| 7329 | if (cnt < 0) { | ||
| 7330 | ret = cnt; | ||
| 7331 | mlog_errno(ret); | ||
| 7332 | break; | ||
| 7333 | } | ||
| 7334 | |||
| 7335 | trimmed += cnt; | ||
| 7336 | len -= osb->bitmap_cpg - first_bit; | ||
| 7337 | first_bit = 0; | ||
| 7338 | if (group == osb->first_cluster_group_blkno) | ||
| 7339 | group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg); | ||
| 7340 | else | ||
| 7341 | group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg); | ||
| 7342 | } | ||
| 7343 | range->len = trimmed * sb->s_blocksize; | ||
| 7344 | out_unlock: | ||
| 7345 | ocfs2_inode_unlock(main_bm_inode, 0); | ||
| 7346 | brelse(main_bm_bh); | ||
| 7347 | out_mutex: | ||
| 7348 | mutex_unlock(&main_bm_inode->i_mutex); | ||
| 7349 | iput(main_bm_inode); | ||
| 7350 | out: | ||
| 7351 | return ret; | ||
| 7352 | } | ||
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 3bd08a03251..ca381c58412 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
| @@ -239,6 +239,7 @@ int ocfs2_find_leaf(struct ocfs2_caching_info *ci, | |||
| 239 | struct buffer_head **leaf_bh); | 239 | struct buffer_head **leaf_bh); |
| 240 | int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); | 240 | int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); |
| 241 | 241 | ||
| 242 | int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range); | ||
| 242 | /* | 243 | /* |
| 243 | * Helper function to look at the # of clusters in an extent record. | 244 | * Helper function to look at the # of clusters in an extent record. |
| 244 | */ | 245 | */ |
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index bc702dab5d1..a4b07730b2e 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c | |||
| @@ -57,7 +57,6 @@ static struct kset *o2cb_kset; | |||
| 57 | void o2cb_sys_shutdown(void) | 57 | void o2cb_sys_shutdown(void) |
| 58 | { | 58 | { |
| 59 | mlog_sys_shutdown(); | 59 | mlog_sys_shutdown(); |
| 60 | sysfs_remove_link(NULL, "o2cb"); | ||
| 61 | kset_unregister(o2cb_kset); | 60 | kset_unregister(o2cb_kset); |
| 62 | } | 61 | } |
| 63 | 62 | ||
| @@ -69,14 +68,6 @@ int o2cb_sys_init(void) | |||
| 69 | if (!o2cb_kset) | 68 | if (!o2cb_kset) |
| 70 | return -ENOMEM; | 69 | return -ENOMEM; |
| 71 | 70 | ||
| 72 | /* | ||
| 73 | * Create this symlink for backwards compatibility with old | ||
| 74 | * versions of ocfs2-tools which look for things in /sys/o2cb. | ||
| 75 | */ | ||
| 76 | ret = sysfs_create_link(NULL, &o2cb_kset->kobj, "o2cb"); | ||
| 77 | if (ret) | ||
| 78 | goto error; | ||
| 79 | |||
| 80 | ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); | 71 | ret = sysfs_create_group(&o2cb_kset->kobj, &o2cb_attr_group); |
| 81 | if (ret) | 72 | if (ret) |
| 82 | goto error; | 73 | goto error; |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 4bdf7baee34..d602abb51b6 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
| @@ -144,6 +144,7 @@ struct dlm_ctxt | |||
| 144 | wait_queue_head_t dlm_join_events; | 144 | wait_queue_head_t dlm_join_events; |
| 145 | unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 145 | unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 146 | unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 146 | unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 147 | unsigned long exit_domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 147 | unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 148 | unsigned long recovery_map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 148 | struct dlm_recovery_ctxt reco; | 149 | struct dlm_recovery_ctxt reco; |
| 149 | spinlock_t master_lock; | 150 | spinlock_t master_lock; |
| @@ -401,6 +402,18 @@ static inline int dlm_lvb_is_empty(char *lvb) | |||
| 401 | return 1; | 402 | return 1; |
| 402 | } | 403 | } |
| 403 | 404 | ||
| 405 | static inline char *dlm_list_in_text(enum dlm_lockres_list idx) | ||
| 406 | { | ||
| 407 | if (idx == DLM_GRANTED_LIST) | ||
| 408 | return "granted"; | ||
| 409 | else if (idx == DLM_CONVERTING_LIST) | ||
| 410 | return "converting"; | ||
| 411 | else if (idx == DLM_BLOCKED_LIST) | ||
| 412 | return "blocked"; | ||
| 413 | else | ||
| 414 | return "unknown"; | ||
| 415 | } | ||
| 416 | |||
| 404 | static inline struct list_head * | 417 | static inline struct list_head * |
| 405 | dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) | 418 | dlm_list_idx_to_ptr(struct dlm_lock_resource *res, enum dlm_lockres_list idx) |
| 406 | { | 419 | { |
| @@ -448,6 +461,7 @@ enum { | |||
| 448 | DLM_FINALIZE_RECO_MSG = 518, | 461 | DLM_FINALIZE_RECO_MSG = 518, |
| 449 | DLM_QUERY_REGION = 519, | 462 | DLM_QUERY_REGION = 519, |
| 450 | DLM_QUERY_NODEINFO = 520, | 463 | DLM_QUERY_NODEINFO = 520, |
| 464 | DLM_BEGIN_EXIT_DOMAIN_MSG = 521, | ||
| 451 | }; | 465 | }; |
| 452 | 466 | ||
| 453 | struct dlm_reco_node_data | 467 | struct dlm_reco_node_data |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 04a32be0aeb..56f82cb912e 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
| @@ -756,6 +756,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len) | |||
| 756 | buf + out, len - out); | 756 | buf + out, len - out); |
| 757 | out += snprintf(buf + out, len - out, "\n"); | 757 | out += snprintf(buf + out, len - out, "\n"); |
| 758 | 758 | ||
| 759 | /* Exit Domain Map: xx xx xx */ | ||
| 760 | out += snprintf(buf + out, len - out, "Exit Domain Map: "); | ||
| 761 | out += stringify_nodemap(dlm->exit_domain_map, O2NM_MAX_NODES, | ||
| 762 | buf + out, len - out); | ||
| 763 | out += snprintf(buf + out, len - out, "\n"); | ||
| 764 | |||
| 759 | /* Live Map: xx xx xx */ | 765 | /* Live Map: xx xx xx */ |
| 760 | out += snprintf(buf + out, len - out, "Live Map: "); | 766 | out += snprintf(buf + out, len - out, "Live Map: "); |
| 761 | out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, | 767 | out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 3b179d6cbde..6ed6b95dcf9 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
| @@ -132,10 +132,12 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); | |||
| 132 | * New in version 1.1: | 132 | * New in version 1.1: |
| 133 | * - Message DLM_QUERY_REGION added to support global heartbeat | 133 | * - Message DLM_QUERY_REGION added to support global heartbeat |
| 134 | * - Message DLM_QUERY_NODEINFO added to allow online node removes | 134 | * - Message DLM_QUERY_NODEINFO added to allow online node removes |
| 135 | * New in version 1.2: | ||
| 136 | * - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain | ||
| 135 | */ | 137 | */ |
| 136 | static const struct dlm_protocol_version dlm_protocol = { | 138 | static const struct dlm_protocol_version dlm_protocol = { |
| 137 | .pv_major = 1, | 139 | .pv_major = 1, |
| 138 | .pv_minor = 1, | 140 | .pv_minor = 2, |
| 139 | }; | 141 | }; |
| 140 | 142 | ||
| 141 | #define DLM_DOMAIN_BACKOFF_MS 200 | 143 | #define DLM_DOMAIN_BACKOFF_MS 200 |
| @@ -449,14 +451,18 @@ redo_bucket: | |||
| 449 | dropped = dlm_empty_lockres(dlm, res); | 451 | dropped = dlm_empty_lockres(dlm, res); |
| 450 | 452 | ||
| 451 | spin_lock(&res->spinlock); | 453 | spin_lock(&res->spinlock); |
| 452 | __dlm_lockres_calc_usage(dlm, res); | 454 | if (dropped) |
| 453 | iter = res->hash_node.next; | 455 | __dlm_lockres_calc_usage(dlm, res); |
| 456 | else | ||
| 457 | iter = res->hash_node.next; | ||
| 454 | spin_unlock(&res->spinlock); | 458 | spin_unlock(&res->spinlock); |
| 455 | 459 | ||
| 456 | dlm_lockres_put(res); | 460 | dlm_lockres_put(res); |
| 457 | 461 | ||
| 458 | if (dropped) | 462 | if (dropped) { |
| 463 | cond_resched_lock(&dlm->spinlock); | ||
| 459 | goto redo_bucket; | 464 | goto redo_bucket; |
| 465 | } | ||
| 460 | } | 466 | } |
| 461 | cond_resched_lock(&dlm->spinlock); | 467 | cond_resched_lock(&dlm->spinlock); |
| 462 | num += n; | 468 | num += n; |
| @@ -486,6 +492,28 @@ static int dlm_no_joining_node(struct dlm_ctxt *dlm) | |||
| 486 | return ret; | 492 | return ret; |
| 487 | } | 493 | } |
| 488 | 494 | ||
| 495 | static int dlm_begin_exit_domain_handler(struct o2net_msg *msg, u32 len, | ||
| 496 | void *data, void **ret_data) | ||
| 497 | { | ||
| 498 | struct dlm_ctxt *dlm = data; | ||
| 499 | unsigned int node; | ||
| 500 | struct dlm_exit_domain *exit_msg = (struct dlm_exit_domain *) msg->buf; | ||
| 501 | |||
| 502 | if (!dlm_grab(dlm)) | ||
| 503 | return 0; | ||
| 504 | |||
| 505 | node = exit_msg->node_idx; | ||
| 506 | mlog(0, "%s: Node %u sent a begin exit domain message\n", dlm->name, node); | ||
| 507 | |||
| 508 | spin_lock(&dlm->spinlock); | ||
| 509 | set_bit(node, dlm->exit_domain_map); | ||
| 510 | spin_unlock(&dlm->spinlock); | ||
| 511 | |||
| 512 | dlm_put(dlm); | ||
| 513 | |||
| 514 | return 0; | ||
| 515 | } | ||
| 516 | |||
| 489 | static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm) | 517 | static void dlm_mark_domain_leaving(struct dlm_ctxt *dlm) |
| 490 | { | 518 | { |
| 491 | /* Yikes, a double spinlock! I need domain_lock for the dlm | 519 | /* Yikes, a double spinlock! I need domain_lock for the dlm |
| @@ -542,6 +570,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 542 | 570 | ||
| 543 | spin_lock(&dlm->spinlock); | 571 | spin_lock(&dlm->spinlock); |
| 544 | clear_bit(node, dlm->domain_map); | 572 | clear_bit(node, dlm->domain_map); |
| 573 | clear_bit(node, dlm->exit_domain_map); | ||
| 545 | __dlm_print_nodes(dlm); | 574 | __dlm_print_nodes(dlm); |
| 546 | 575 | ||
| 547 | /* notify anything attached to the heartbeat events */ | 576 | /* notify anything attached to the heartbeat events */ |
| @@ -554,29 +583,56 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 554 | return 0; | 583 | return 0; |
| 555 | } | 584 | } |
| 556 | 585 | ||
| 557 | static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, | 586 | static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm, u32 msg_type, |
| 558 | unsigned int node) | 587 | unsigned int node) |
| 559 | { | 588 | { |
| 560 | int status; | 589 | int status; |
| 561 | struct dlm_exit_domain leave_msg; | 590 | struct dlm_exit_domain leave_msg; |
| 562 | 591 | ||
| 563 | mlog(0, "Asking node %u if we can leave the domain %s me = %u\n", | 592 | mlog(0, "%s: Sending domain exit message %u to node %u\n", dlm->name, |
| 564 | node, dlm->name, dlm->node_num); | 593 | msg_type, node); |
| 565 | 594 | ||
| 566 | memset(&leave_msg, 0, sizeof(leave_msg)); | 595 | memset(&leave_msg, 0, sizeof(leave_msg)); |
| 567 | leave_msg.node_idx = dlm->node_num; | 596 | leave_msg.node_idx = dlm->node_num; |
| 568 | 597 | ||
| 569 | status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key, | 598 | status = o2net_send_message(msg_type, dlm->key, &leave_msg, |
| 570 | &leave_msg, sizeof(leave_msg), node, | 599 | sizeof(leave_msg), node, NULL); |
| 571 | NULL); | ||
| 572 | if (status < 0) | 600 | if (status < 0) |
| 573 | mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " | 601 | mlog(ML_ERROR, "Error %d sending domain exit message %u " |
| 574 | "node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node); | 602 | "to node %u on domain %s\n", status, msg_type, node, |
| 575 | mlog(0, "status return %d from o2net_send_message\n", status); | 603 | dlm->name); |
| 576 | 604 | ||
| 577 | return status; | 605 | return status; |
| 578 | } | 606 | } |
| 579 | 607 | ||
| 608 | static void dlm_begin_exit_domain(struct dlm_ctxt *dlm) | ||
| 609 | { | ||
| 610 | int node = -1; | ||
| 611 | |||
| 612 | /* Support for begin exit domain was added in 1.2 */ | ||
| 613 | if (dlm->dlm_locking_proto.pv_major == 1 && | ||
| 614 | dlm->dlm_locking_proto.pv_minor < 2) | ||
| 615 | return; | ||
| 616 | |||
| 617 | /* | ||
| 618 | * Unlike DLM_EXIT_DOMAIN_MSG, DLM_BEGIN_EXIT_DOMAIN_MSG is purely | ||
| 619 | * informational. Meaning if a node does not receive the message, | ||
| 620 | * so be it. | ||
| 621 | */ | ||
| 622 | spin_lock(&dlm->spinlock); | ||
| 623 | while (1) { | ||
| 624 | node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, node + 1); | ||
| 625 | if (node >= O2NM_MAX_NODES) | ||
| 626 | break; | ||
| 627 | if (node == dlm->node_num) | ||
| 628 | continue; | ||
| 629 | |||
| 630 | spin_unlock(&dlm->spinlock); | ||
| 631 | dlm_send_one_domain_exit(dlm, DLM_BEGIN_EXIT_DOMAIN_MSG, node); | ||
| 632 | spin_lock(&dlm->spinlock); | ||
| 633 | } | ||
| 634 | spin_unlock(&dlm->spinlock); | ||
| 635 | } | ||
| 580 | 636 | ||
| 581 | static void dlm_leave_domain(struct dlm_ctxt *dlm) | 637 | static void dlm_leave_domain(struct dlm_ctxt *dlm) |
| 582 | { | 638 | { |
| @@ -602,7 +658,8 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm) | |||
| 602 | 658 | ||
| 603 | clear_node = 1; | 659 | clear_node = 1; |
| 604 | 660 | ||
| 605 | status = dlm_send_one_domain_exit(dlm, node); | 661 | status = dlm_send_one_domain_exit(dlm, DLM_EXIT_DOMAIN_MSG, |
| 662 | node); | ||
| 606 | if (status < 0 && | 663 | if (status < 0 && |
| 607 | status != -ENOPROTOOPT && | 664 | status != -ENOPROTOOPT && |
| 608 | status != -ENOTCONN) { | 665 | status != -ENOTCONN) { |
| @@ -677,6 +734,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
| 677 | 734 | ||
| 678 | if (leave) { | 735 | if (leave) { |
| 679 | mlog(0, "shutting down domain %s\n", dlm->name); | 736 | mlog(0, "shutting down domain %s\n", dlm->name); |
| 737 | dlm_begin_exit_domain(dlm); | ||
| 680 | 738 | ||
| 681 | /* We changed dlm state, notify the thread */ | 739 | /* We changed dlm state, notify the thread */ |
| 682 | dlm_kick_thread(dlm, NULL); | 740 | dlm_kick_thread(dlm, NULL); |
| @@ -909,6 +967,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 909 | * leftover join state. */ | 967 | * leftover join state. */ |
| 910 | BUG_ON(dlm->joining_node != assert->node_idx); | 968 | BUG_ON(dlm->joining_node != assert->node_idx); |
| 911 | set_bit(assert->node_idx, dlm->domain_map); | 969 | set_bit(assert->node_idx, dlm->domain_map); |
| 970 | clear_bit(assert->node_idx, dlm->exit_domain_map); | ||
| 912 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); | 971 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); |
| 913 | 972 | ||
| 914 | printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n", | 973 | printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n", |
| @@ -1793,6 +1852,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm) | |||
| 1793 | if (status) | 1852 | if (status) |
| 1794 | goto bail; | 1853 | goto bail; |
| 1795 | 1854 | ||
| 1855 | status = o2net_register_handler(DLM_BEGIN_EXIT_DOMAIN_MSG, dlm->key, | ||
| 1856 | sizeof(struct dlm_exit_domain), | ||
| 1857 | dlm_begin_exit_domain_handler, | ||
| 1858 | dlm, NULL, &dlm->dlm_domain_handlers); | ||
| 1859 | if (status) | ||
| 1860 | goto bail; | ||
| 1861 | |||
| 1796 | bail: | 1862 | bail: |
| 1797 | if (status) | 1863 | if (status) |
| 1798 | dlm_unregister_domain_handlers(dlm); | 1864 | dlm_unregister_domain_handlers(dlm); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 84d166328cf..11eefb8c12e 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -2339,65 +2339,55 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) | |||
| 2339 | dlm_lockres_put(res); | 2339 | dlm_lockres_put(res); |
| 2340 | } | 2340 | } |
| 2341 | 2341 | ||
| 2342 | /* Checks whether the lockres can be migrated. Returns 0 if yes, < 0 | 2342 | /* |
| 2343 | * if not. If 0, numlocks is set to the number of locks in the lockres. | 2343 | * A migrateable resource is one that is : |
| 2344 | * 1. locally mastered, and, | ||
| 2345 | * 2. zero local locks, and, | ||
| 2346 | * 3. one or more non-local locks, or, one or more references | ||
| 2347 | * Returns 1 if yes, 0 if not. | ||
| 2344 | */ | 2348 | */ |
| 2345 | static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, | 2349 | static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, |
| 2346 | struct dlm_lock_resource *res, | 2350 | struct dlm_lock_resource *res) |
| 2347 | int *numlocks, | ||
| 2348 | int *hasrefs) | ||
| 2349 | { | 2351 | { |
| 2350 | int ret; | 2352 | enum dlm_lockres_list idx; |
| 2351 | int i; | 2353 | int nonlocal = 0, node_ref; |
| 2352 | int count = 0; | ||
| 2353 | struct list_head *queue; | 2354 | struct list_head *queue; |
| 2354 | struct dlm_lock *lock; | 2355 | struct dlm_lock *lock; |
| 2356 | u64 cookie; | ||
| 2355 | 2357 | ||
| 2356 | assert_spin_locked(&res->spinlock); | 2358 | assert_spin_locked(&res->spinlock); |
| 2357 | 2359 | ||
| 2358 | *numlocks = 0; | 2360 | if (res->owner != dlm->node_num) |
| 2359 | *hasrefs = 0; | 2361 | return 0; |
| 2360 | |||
| 2361 | ret = -EINVAL; | ||
| 2362 | if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { | ||
| 2363 | mlog(0, "cannot migrate lockres with unknown owner!\n"); | ||
| 2364 | goto leave; | ||
| 2365 | } | ||
| 2366 | |||
| 2367 | if (res->owner != dlm->node_num) { | ||
| 2368 | mlog(0, "cannot migrate lockres this node doesn't own!\n"); | ||
| 2369 | goto leave; | ||
| 2370 | } | ||
| 2371 | 2362 | ||
| 2372 | ret = 0; | 2363 | for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { |
| 2373 | queue = &res->granted; | 2364 | queue = dlm_list_idx_to_ptr(res, idx); |
| 2374 | for (i = 0; i < 3; i++) { | ||
| 2375 | list_for_each_entry(lock, queue, list) { | 2365 | list_for_each_entry(lock, queue, list) { |
| 2376 | ++count; | 2366 | if (lock->ml.node != dlm->node_num) { |
| 2377 | if (lock->ml.node == dlm->node_num) { | 2367 | nonlocal++; |
| 2378 | mlog(0, "found a lock owned by this node still " | 2368 | continue; |
| 2379 | "on the %s queue! will not migrate this " | ||
| 2380 | "lockres\n", (i == 0 ? "granted" : | ||
| 2381 | (i == 1 ? "converting" : | ||
| 2382 | "blocked"))); | ||
| 2383 | ret = -ENOTEMPTY; | ||
| 2384 | goto leave; | ||
| 2385 | } | 2369 | } |
| 2370 | cookie = be64_to_cpu(lock->ml.cookie); | ||
| 2371 | mlog(0, "%s: Not migrateable res %.*s, lock %u:%llu on " | ||
| 2372 | "%s list\n", dlm->name, res->lockname.len, | ||
| 2373 | res->lockname.name, | ||
| 2374 | dlm_get_lock_cookie_node(cookie), | ||
| 2375 | dlm_get_lock_cookie_seq(cookie), | ||
| 2376 | dlm_list_in_text(idx)); | ||
| 2377 | return 0; | ||
| 2386 | } | 2378 | } |
| 2387 | queue++; | ||
| 2388 | } | 2379 | } |
| 2389 | 2380 | ||
| 2390 | *numlocks = count; | 2381 | if (!nonlocal) { |
| 2391 | 2382 | node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | |
| 2392 | count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | 2383 | if (node_ref >= O2NM_MAX_NODES) |
| 2393 | if (count < O2NM_MAX_NODES) | 2384 | return 0; |
| 2394 | *hasrefs = 1; | 2385 | } |
| 2395 | 2386 | ||
| 2396 | mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name, | 2387 | mlog(0, "%s: res %.*s, Migrateable\n", dlm->name, res->lockname.len, |
| 2397 | res->lockname.len, res->lockname.name, *numlocks, *hasrefs); | 2388 | res->lockname.name); |
| 2398 | 2389 | ||
| 2399 | leave: | 2390 | return 1; |
| 2400 | return ret; | ||
| 2401 | } | 2391 | } |
| 2402 | 2392 | ||
| 2403 | /* | 2393 | /* |
| @@ -2406,8 +2396,7 @@ leave: | |||
| 2406 | 2396 | ||
| 2407 | 2397 | ||
| 2408 | static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | 2398 | static int dlm_migrate_lockres(struct dlm_ctxt *dlm, |
| 2409 | struct dlm_lock_resource *res, | 2399 | struct dlm_lock_resource *res, u8 target) |
| 2410 | u8 target) | ||
| 2411 | { | 2400 | { |
| 2412 | struct dlm_master_list_entry *mle = NULL; | 2401 | struct dlm_master_list_entry *mle = NULL; |
| 2413 | struct dlm_master_list_entry *oldmle = NULL; | 2402 | struct dlm_master_list_entry *oldmle = NULL; |
| @@ -2416,37 +2405,20 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | |||
| 2416 | const char *name; | 2405 | const char *name; |
| 2417 | unsigned int namelen; | 2406 | unsigned int namelen; |
| 2418 | int mle_added = 0; | 2407 | int mle_added = 0; |
| 2419 | int numlocks, hasrefs; | ||
| 2420 | int wake = 0; | 2408 | int wake = 0; |
| 2421 | 2409 | ||
| 2422 | if (!dlm_grab(dlm)) | 2410 | if (!dlm_grab(dlm)) |
| 2423 | return -EINVAL; | 2411 | return -EINVAL; |
| 2424 | 2412 | ||
| 2413 | BUG_ON(target == O2NM_MAX_NODES); | ||
| 2414 | |||
| 2425 | name = res->lockname.name; | 2415 | name = res->lockname.name; |
| 2426 | namelen = res->lockname.len; | 2416 | namelen = res->lockname.len; |
| 2427 | 2417 | ||
| 2428 | mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target); | 2418 | mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name, |
| 2429 | 2419 | target); | |
| 2430 | /* | ||
| 2431 | * ensure this lockres is a proper candidate for migration | ||
| 2432 | */ | ||
| 2433 | spin_lock(&res->spinlock); | ||
| 2434 | ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs); | ||
| 2435 | if (ret < 0) { | ||
| 2436 | spin_unlock(&res->spinlock); | ||
| 2437 | goto leave; | ||
| 2438 | } | ||
| 2439 | spin_unlock(&res->spinlock); | ||
| 2440 | |||
| 2441 | /* no work to do */ | ||
| 2442 | if (numlocks == 0 && !hasrefs) | ||
| 2443 | goto leave; | ||
| 2444 | |||
| 2445 | /* | ||
| 2446 | * preallocate up front | ||
| 2447 | * if this fails, abort | ||
| 2448 | */ | ||
| 2449 | 2420 | ||
| 2421 | /* preallocate up front. if this fails, abort */ | ||
| 2450 | ret = -ENOMEM; | 2422 | ret = -ENOMEM; |
| 2451 | mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); | 2423 | mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); |
| 2452 | if (!mres) { | 2424 | if (!mres) { |
| @@ -2462,35 +2434,10 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | |||
| 2462 | ret = 0; | 2434 | ret = 0; |
| 2463 | 2435 | ||
| 2464 | /* | 2436 | /* |
| 2465 | * find a node to migrate the lockres to | ||
| 2466 | */ | ||
| 2467 | |||
| 2468 | spin_lock(&dlm->spinlock); | ||
| 2469 | /* pick a new node */ | ||
| 2470 | if (!test_bit(target, dlm->domain_map) || | ||
| 2471 | target >= O2NM_MAX_NODES) { | ||
| 2472 | target = dlm_pick_migration_target(dlm, res); | ||
| 2473 | } | ||
| 2474 | mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name, | ||
| 2475 | namelen, name, target); | ||
| 2476 | |||
| 2477 | if (target >= O2NM_MAX_NODES || | ||
| 2478 | !test_bit(target, dlm->domain_map)) { | ||
| 2479 | /* target chosen is not alive */ | ||
| 2480 | ret = -EINVAL; | ||
| 2481 | } | ||
| 2482 | |||
| 2483 | if (ret) { | ||
| 2484 | spin_unlock(&dlm->spinlock); | ||
| 2485 | goto fail; | ||
| 2486 | } | ||
| 2487 | |||
| 2488 | mlog(0, "continuing with target = %u\n", target); | ||
| 2489 | |||
| 2490 | /* | ||
| 2491 | * clear any existing master requests and | 2437 | * clear any existing master requests and |
| 2492 | * add the migration mle to the list | 2438 | * add the migration mle to the list |
| 2493 | */ | 2439 | */ |
| 2440 | spin_lock(&dlm->spinlock); | ||
| 2494 | spin_lock(&dlm->master_lock); | 2441 | spin_lock(&dlm->master_lock); |
| 2495 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, | 2442 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, |
| 2496 | namelen, target, dlm->node_num); | 2443 | namelen, target, dlm->node_num); |
| @@ -2531,6 +2478,7 @@ fail: | |||
| 2531 | dlm_put_mle(mle); | 2478 | dlm_put_mle(mle); |
| 2532 | } else if (mle) { | 2479 | } else if (mle) { |
| 2533 | kmem_cache_free(dlm_mle_cache, mle); | 2480 | kmem_cache_free(dlm_mle_cache, mle); |
| 2481 | mle = NULL; | ||
| 2534 | } | 2482 | } |
| 2535 | goto leave; | 2483 | goto leave; |
| 2536 | } | 2484 | } |
| @@ -2652,69 +2600,52 @@ leave: | |||
| 2652 | if (wake) | 2600 | if (wake) |
| 2653 | wake_up(&res->wq); | 2601 | wake_up(&res->wq); |
| 2654 | 2602 | ||
| 2655 | /* TODO: cleanup */ | ||
| 2656 | if (mres) | 2603 | if (mres) |
| 2657 | free_page((unsigned long)mres); | 2604 | free_page((unsigned long)mres); |
| 2658 | 2605 | ||
| 2659 | dlm_put(dlm); | 2606 | dlm_put(dlm); |
| 2660 | 2607 | ||
| 2661 | mlog(0, "returning %d\n", ret); | 2608 | mlog(0, "%s: Migrating %.*s to %u, returns %d\n", dlm->name, namelen, |
| 2609 | name, target, ret); | ||
| 2662 | return ret; | 2610 | return ret; |
| 2663 | } | 2611 | } |
| 2664 | 2612 | ||
| 2665 | #define DLM_MIGRATION_RETRY_MS 100 | 2613 | #define DLM_MIGRATION_RETRY_MS 100 |
| 2666 | 2614 | ||
| 2667 | /* Should be called only after beginning the domain leave process. | 2615 | /* |
| 2616 | * Should be called only after beginning the domain leave process. | ||
| 2668 | * There should not be any remaining locks on nonlocal lock resources, | 2617 | * There should not be any remaining locks on nonlocal lock resources, |
| 2669 | * and there should be no local locks left on locally mastered resources. | 2618 | * and there should be no local locks left on locally mastered resources. |
| 2670 | * | 2619 | * |
| 2671 | * Called with the dlm spinlock held, may drop it to do migration, but | 2620 | * Called with the dlm spinlock held, may drop it to do migration, but |
| 2672 | * will re-acquire before exit. | 2621 | * will re-acquire before exit. |
| 2673 | * | 2622 | * |
| 2674 | * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped */ | 2623 | * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped |
| 2624 | */ | ||
| 2675 | int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | 2625 | int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) |
| 2676 | { | 2626 | { |
| 2677 | int ret; | 2627 | int ret; |
| 2678 | int lock_dropped = 0; | 2628 | int lock_dropped = 0; |
| 2679 | int numlocks, hasrefs; | 2629 | u8 target = O2NM_MAX_NODES; |
| 2630 | |||
| 2631 | assert_spin_locked(&dlm->spinlock); | ||
| 2680 | 2632 | ||
| 2681 | spin_lock(&res->spinlock); | 2633 | spin_lock(&res->spinlock); |
| 2682 | if (res->owner != dlm->node_num) { | 2634 | if (dlm_is_lockres_migrateable(dlm, res)) |
| 2683 | if (!__dlm_lockres_unused(res)) { | 2635 | target = dlm_pick_migration_target(dlm, res); |
| 2684 | mlog(ML_ERROR, "%s:%.*s: this node is not master, " | 2636 | spin_unlock(&res->spinlock); |
| 2685 | "trying to free this but locks remain\n", | ||
| 2686 | dlm->name, res->lockname.len, res->lockname.name); | ||
| 2687 | } | ||
| 2688 | spin_unlock(&res->spinlock); | ||
| 2689 | goto leave; | ||
| 2690 | } | ||
| 2691 | 2637 | ||
| 2692 | /* No need to migrate a lockres having no locks */ | 2638 | if (target == O2NM_MAX_NODES) |
| 2693 | ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs); | ||
| 2694 | if (ret >= 0 && numlocks == 0 && !hasrefs) { | ||
| 2695 | spin_unlock(&res->spinlock); | ||
| 2696 | goto leave; | 2639 | goto leave; |
| 2697 | } | ||
| 2698 | spin_unlock(&res->spinlock); | ||
| 2699 | 2640 | ||
| 2700 | /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ | 2641 | /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ |
| 2701 | spin_unlock(&dlm->spinlock); | 2642 | spin_unlock(&dlm->spinlock); |
| 2702 | lock_dropped = 1; | 2643 | lock_dropped = 1; |
| 2703 | while (1) { | 2644 | ret = dlm_migrate_lockres(dlm, res, target); |
| 2704 | ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); | 2645 | if (ret) |
| 2705 | if (ret >= 0) | 2646 | mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n", |
| 2706 | break; | 2647 | dlm->name, res->lockname.len, res->lockname.name, |
| 2707 | if (ret == -ENOTEMPTY) { | 2648 | target, ret); |
| 2708 | mlog(ML_ERROR, "lockres %.*s still has local locks!\n", | ||
| 2709 | res->lockname.len, res->lockname.name); | ||
| 2710 | BUG(); | ||
| 2711 | } | ||
| 2712 | |||
| 2713 | mlog(0, "lockres %.*s: migrate failed, " | ||
| 2714 | "retrying\n", res->lockname.len, | ||
| 2715 | res->lockname.name); | ||
| 2716 | msleep(DLM_MIGRATION_RETRY_MS); | ||
| 2717 | } | ||
| 2718 | spin_lock(&dlm->spinlock); | 2649 | spin_lock(&dlm->spinlock); |
| 2719 | leave: | 2650 | leave: |
| 2720 | return lock_dropped; | 2651 | return lock_dropped; |
| @@ -2898,61 +2829,55 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, | |||
| 2898 | } | 2829 | } |
| 2899 | } | 2830 | } |
| 2900 | 2831 | ||
| 2901 | /* for now this is not too intelligent. we will | 2832 | /* |
| 2902 | * need stats to make this do the right thing. | 2833 | * Pick a node to migrate the lock resource to. This function selects a |
| 2903 | * this just finds the first lock on one of the | 2834 | * potential target based first on the locks and then on refmap. It skips |
| 2904 | * queues and uses that node as the target. */ | 2835 | * nodes that are in the process of exiting the domain. |
| 2836 | */ | ||
| 2905 | static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, | 2837 | static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, |
| 2906 | struct dlm_lock_resource *res) | 2838 | struct dlm_lock_resource *res) |
| 2907 | { | 2839 | { |
| 2908 | int i; | 2840 | enum dlm_lockres_list idx; |
| 2909 | struct list_head *queue = &res->granted; | 2841 | struct list_head *queue = &res->granted; |
| 2910 | struct dlm_lock *lock; | 2842 | struct dlm_lock *lock; |
| 2911 | int nodenum; | 2843 | int noderef; |
| 2844 | u8 nodenum = O2NM_MAX_NODES; | ||
| 2912 | 2845 | ||
| 2913 | assert_spin_locked(&dlm->spinlock); | 2846 | assert_spin_locked(&dlm->spinlock); |
| 2847 | assert_spin_locked(&res->spinlock); | ||
| 2914 | 2848 | ||
| 2915 | spin_lock(&res->spinlock); | 2849 | /* Go through all the locks */ |
| 2916 | for (i=0; i<3; i++) { | 2850 | for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { |
| 2851 | queue = dlm_list_idx_to_ptr(res, idx); | ||
| 2917 | list_for_each_entry(lock, queue, list) { | 2852 | list_for_each_entry(lock, queue, list) { |
| 2918 | /* up to the caller to make sure this node | 2853 | if (lock->ml.node == dlm->node_num) |
| 2919 | * is alive */ | 2854 | continue; |
| 2920 | if (lock->ml.node != dlm->node_num) { | 2855 | if (test_bit(lock->ml.node, dlm->exit_domain_map)) |
| 2921 | spin_unlock(&res->spinlock); | 2856 | continue; |
| 2922 | return lock->ml.node; | 2857 | nodenum = lock->ml.node; |
| 2923 | } | 2858 | goto bail; |
| 2924 | } | 2859 | } |
| 2925 | queue++; | ||
| 2926 | } | ||
| 2927 | |||
| 2928 | nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | ||
| 2929 | if (nodenum < O2NM_MAX_NODES) { | ||
| 2930 | spin_unlock(&res->spinlock); | ||
| 2931 | return nodenum; | ||
| 2932 | } | 2860 | } |
| 2933 | spin_unlock(&res->spinlock); | ||
| 2934 | mlog(0, "have not found a suitable target yet! checking domain map\n"); | ||
| 2935 | 2861 | ||
| 2936 | /* ok now we're getting desperate. pick anyone alive. */ | 2862 | /* Go thru the refmap */ |
| 2937 | nodenum = -1; | 2863 | noderef = -1; |
| 2938 | while (1) { | 2864 | while (1) { |
| 2939 | nodenum = find_next_bit(dlm->domain_map, | 2865 | noderef = find_next_bit(res->refmap, O2NM_MAX_NODES, |
| 2940 | O2NM_MAX_NODES, nodenum+1); | 2866 | noderef + 1); |
| 2941 | mlog(0, "found %d in domain map\n", nodenum); | 2867 | if (noderef >= O2NM_MAX_NODES) |
| 2942 | if (nodenum >= O2NM_MAX_NODES) | ||
| 2943 | break; | 2868 | break; |
| 2944 | if (nodenum != dlm->node_num) { | 2869 | if (noderef == dlm->node_num) |
| 2945 | mlog(0, "picking %d\n", nodenum); | 2870 | continue; |
| 2946 | return nodenum; | 2871 | if (test_bit(noderef, dlm->exit_domain_map)) |
| 2947 | } | 2872 | continue; |
| 2873 | nodenum = noderef; | ||
| 2874 | goto bail; | ||
| 2948 | } | 2875 | } |
| 2949 | 2876 | ||
| 2950 | mlog(0, "giving up. no master to migrate to\n"); | 2877 | bail: |
| 2951 | return DLM_LOCK_RES_OWNER_UNKNOWN; | 2878 | return nodenum; |
| 2952 | } | 2879 | } |
| 2953 | 2880 | ||
| 2954 | |||
| 2955 | |||
| 2956 | /* this is called by the new master once all lockres | 2881 | /* this is called by the new master once all lockres |
| 2957 | * data has been received */ | 2882 | * data has been received */ |
| 2958 | static int dlm_do_migrate_request(struct dlm_ctxt *dlm, | 2883 | static int dlm_do_migrate_request(struct dlm_ctxt *dlm, |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f1beb6fc254..7efab6d28a2 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -2393,6 +2393,7 @@ static void __dlm_hb_node_down(struct dlm_ctxt *dlm, int idx) | |||
| 2393 | 2393 | ||
| 2394 | mlog(0, "node %u being removed from domain map!\n", idx); | 2394 | mlog(0, "node %u being removed from domain map!\n", idx); |
| 2395 | clear_bit(idx, dlm->domain_map); | 2395 | clear_bit(idx, dlm->domain_map); |
| 2396 | clear_bit(idx, dlm->exit_domain_map); | ||
| 2396 | /* wake up migration waiters if a node goes down. | 2397 | /* wake up migration waiters if a node goes down. |
| 2397 | * perhaps later we can genericize this for other waiters. */ | 2398 | * perhaps later we can genericize this for other waiters. */ |
| 2398 | wake_up(&dlm->migration_wq); | 2399 | wake_up(&dlm->migration_wq); |
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 8c5c0eddc36..b4207679704 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
| @@ -88,7 +88,7 @@ struct workqueue_struct *user_dlm_worker; | |||
| 88 | * signifies a bast fired on the lock. | 88 | * signifies a bast fired on the lock. |
| 89 | */ | 89 | */ |
| 90 | #define DLMFS_CAPABILITIES "bast stackglue" | 90 | #define DLMFS_CAPABILITIES "bast stackglue" |
| 91 | extern int param_set_dlmfs_capabilities(const char *val, | 91 | static int param_set_dlmfs_capabilities(const char *val, |
| 92 | struct kernel_param *kp) | 92 | struct kernel_param *kp) |
| 93 | { | 93 | { |
| 94 | printk(KERN_ERR "%s: readonly parameter\n", kp->name); | 94 | printk(KERN_ERR "%s: readonly parameter\n", kp->name); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 89659d6dc20..b1e35a392ca 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -2670,6 +2670,7 @@ const struct file_operations ocfs2_fops_no_plocks = { | |||
| 2670 | .flock = ocfs2_flock, | 2670 | .flock = ocfs2_flock, |
| 2671 | .splice_read = ocfs2_file_splice_read, | 2671 | .splice_read = ocfs2_file_splice_read, |
| 2672 | .splice_write = ocfs2_file_splice_write, | 2672 | .splice_write = ocfs2_file_splice_write, |
| 2673 | .fallocate = ocfs2_fallocate, | ||
| 2673 | }; | 2674 | }; |
| 2674 | 2675 | ||
| 2675 | const struct file_operations ocfs2_dops_no_plocks = { | 2676 | const struct file_operations ocfs2_dops_no_plocks = { |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 59100598b0c..bc91072b721 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
| @@ -952,6 +952,29 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 952 | return -EFAULT; | 952 | return -EFAULT; |
| 953 | 953 | ||
| 954 | return ocfs2_info_handle(inode, &info, 0); | 954 | return ocfs2_info_handle(inode, &info, 0); |
| 955 | case FITRIM: | ||
| 956 | { | ||
| 957 | struct super_block *sb = inode->i_sb; | ||
| 958 | struct fstrim_range range; | ||
| 959 | int ret = 0; | ||
| 960 | |||
| 961 | if (!capable(CAP_SYS_ADMIN)) | ||
| 962 | return -EPERM; | ||
| 963 | |||
| 964 | if (copy_from_user(&range, (struct fstrim_range *)arg, | ||
| 965 | sizeof(range))) | ||
| 966 | return -EFAULT; | ||
| 967 | |||
| 968 | ret = ocfs2_trim_fs(sb, &range); | ||
| 969 | if (ret < 0) | ||
| 970 | return ret; | ||
| 971 | |||
| 972 | if (copy_to_user((struct fstrim_range *)arg, &range, | ||
| 973 | sizeof(range))) | ||
| 974 | return -EFAULT; | ||
| 975 | |||
| 976 | return 0; | ||
| 977 | } | ||
| 955 | case OCFS2_IOC_MOVE_EXT: | 978 | case OCFS2_IOC_MOVE_EXT: |
| 956 | return ocfs2_ioctl_move_extents(filp, (void __user *)arg); | 979 | return ocfs2_ioctl_move_extents(filp, (void __user *)arg); |
| 957 | default: | 980 | default: |
| @@ -981,6 +1004,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 981 | case OCFS2_IOC_GROUP_EXTEND: | 1004 | case OCFS2_IOC_GROUP_EXTEND: |
| 982 | case OCFS2_IOC_GROUP_ADD: | 1005 | case OCFS2_IOC_GROUP_ADD: |
| 983 | case OCFS2_IOC_GROUP_ADD64: | 1006 | case OCFS2_IOC_GROUP_ADD64: |
| 1007 | case FITRIM: | ||
| 984 | break; | 1008 | break; |
| 985 | case OCFS2_IOC_REFLINK: | 1009 | case OCFS2_IOC_REFLINK: |
| 986 | if (copy_from_user(&args, (struct reflink_arguments *)arg, | 1010 | if (copy_from_user(&args, (struct reflink_arguments *)arg, |
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index a1dae5bb54a..3b481f49063 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h | |||
| @@ -688,6 +688,31 @@ TRACE_EVENT(ocfs2_cache_block_dealloc, | |||
| 688 | __entry->blkno, __entry->bit) | 688 | __entry->blkno, __entry->bit) |
| 689 | ); | 689 | ); |
| 690 | 690 | ||
| 691 | TRACE_EVENT(ocfs2_trim_extent, | ||
| 692 | TP_PROTO(struct super_block *sb, unsigned long long blk, | ||
| 693 | unsigned long long count), | ||
| 694 | TP_ARGS(sb, blk, count), | ||
| 695 | TP_STRUCT__entry( | ||
| 696 | __field(int, dev_major) | ||
| 697 | __field(int, dev_minor) | ||
| 698 | __field(unsigned long long, blk) | ||
| 699 | __field(__u64, count) | ||
| 700 | ), | ||
| 701 | TP_fast_assign( | ||
| 702 | __entry->dev_major = MAJOR(sb->s_dev); | ||
| 703 | __entry->dev_minor = MINOR(sb->s_dev); | ||
| 704 | __entry->blk = blk; | ||
| 705 | __entry->count = count; | ||
| 706 | ), | ||
| 707 | TP_printk("%d %d %llu %llu", | ||
| 708 | __entry->dev_major, __entry->dev_minor, | ||
| 709 | __entry->blk, __entry->count) | ||
| 710 | ); | ||
| 711 | |||
| 712 | DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_trim_group); | ||
| 713 | |||
| 714 | DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_trim_fs); | ||
| 715 | |||
| 691 | /* End of trace events for fs/ocfs2/alloc.c. */ | 716 | /* End of trace events for fs/ocfs2/alloc.c. */ |
| 692 | 717 | ||
| 693 | /* Trace events for fs/ocfs2/localalloc.c. */ | 718 | /* Trace events for fs/ocfs2/localalloc.c. */ |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 5a521c74885..823bc35334e 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -1566,7 +1566,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 1566 | if (osb->preferred_slot != OCFS2_INVALID_SLOT) | 1566 | if (osb->preferred_slot != OCFS2_INVALID_SLOT) |
| 1567 | seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); | 1567 | seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); |
| 1568 | 1568 | ||
| 1569 | if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM) | 1569 | if (!(mnt->mnt_flags & MNT_NOATIME) && !(mnt->mnt_flags & MNT_RELATIME)) |
| 1570 | seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); | 1570 | seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum); |
| 1571 | 1571 | ||
| 1572 | if (osb->osb_commit_interval) | 1572 | if (osb->osb_commit_interval) |
