diff options
Diffstat (limited to 'fs/ocfs2')
46 files changed, 2563 insertions, 421 deletions
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index da702294d7e7..391915093fe1 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -209,7 +209,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, | |||
209 | } | 209 | } |
210 | 210 | ||
211 | inode->i_mode = new_mode; | 211 | inode->i_mode = new_mode; |
212 | inode->i_ctime = CURRENT_TIME; | ||
212 | di->i_mode = cpu_to_le16(inode->i_mode); | 213 | di->i_mode = cpu_to_le16(inode->i_mode); |
214 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
215 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
213 | 216 | ||
214 | ocfs2_journal_dirty(handle, di_bh); | 217 | ocfs2_journal_dirty(handle, di_bh); |
215 | 218 | ||
@@ -290,12 +293,30 @@ static int ocfs2_set_acl(handle_t *handle, | |||
290 | 293 | ||
291 | int ocfs2_check_acl(struct inode *inode, int mask) | 294 | int ocfs2_check_acl(struct inode *inode, int mask) |
292 | { | 295 | { |
293 | struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS); | 296 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
297 | struct buffer_head *di_bh = NULL; | ||
298 | struct posix_acl *acl; | ||
299 | int ret = -EAGAIN; | ||
294 | 300 | ||
295 | if (IS_ERR(acl)) | 301 | if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) |
302 | return ret; | ||
303 | |||
304 | ret = ocfs2_read_inode_block(inode, &di_bh); | ||
305 | if (ret < 0) { | ||
306 | mlog_errno(ret); | ||
307 | return ret; | ||
308 | } | ||
309 | |||
310 | acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, di_bh); | ||
311 | |||
312 | brelse(di_bh); | ||
313 | |||
314 | if (IS_ERR(acl)) { | ||
315 | mlog_errno(PTR_ERR(acl)); | ||
296 | return PTR_ERR(acl); | 316 | return PTR_ERR(acl); |
317 | } | ||
297 | if (acl) { | 318 | if (acl) { |
298 | int ret = posix_acl_permission(inode, acl, mask); | 319 | ret = posix_acl_permission(inode, acl, mask); |
299 | posix_acl_release(acl); | 320 | posix_acl_release(acl); |
300 | return ret; | 321 | return ret; |
301 | } | 322 | } |
@@ -344,7 +365,7 @@ int ocfs2_init_acl(handle_t *handle, | |||
344 | { | 365 | { |
345 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 366 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
346 | struct posix_acl *acl = NULL; | 367 | struct posix_acl *acl = NULL; |
347 | int ret = 0; | 368 | int ret = 0, ret2; |
348 | mode_t mode; | 369 | mode_t mode; |
349 | 370 | ||
350 | if (!S_ISLNK(inode->i_mode)) { | 371 | if (!S_ISLNK(inode->i_mode)) { |
@@ -381,7 +402,12 @@ int ocfs2_init_acl(handle_t *handle, | |||
381 | mode = inode->i_mode; | 402 | mode = inode->i_mode; |
382 | ret = posix_acl_create_masq(clone, &mode); | 403 | ret = posix_acl_create_masq(clone, &mode); |
383 | if (ret >= 0) { | 404 | if (ret >= 0) { |
384 | ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode); | 405 | ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); |
406 | if (ret2) { | ||
407 | mlog_errno(ret2); | ||
408 | ret = ret2; | ||
409 | goto cleanup; | ||
410 | } | ||
385 | if (ret > 0) { | 411 | if (ret > 0) { |
386 | ret = ocfs2_set_acl(handle, inode, | 412 | ret = ocfs2_set_acl(handle, inode, |
387 | di_bh, ACL_TYPE_ACCESS, | 413 | di_bh, ACL_TYPE_ACCESS, |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 215e12ce1d85..592fae5007d1 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -6672,7 +6672,7 @@ int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end, | |||
6672 | last_page_bytes = PAGE_ALIGN(end); | 6672 | last_page_bytes = PAGE_ALIGN(end); |
6673 | index = start >> PAGE_CACHE_SHIFT; | 6673 | index = start >> PAGE_CACHE_SHIFT; |
6674 | do { | 6674 | do { |
6675 | pages[numpages] = grab_cache_page(mapping, index); | 6675 | pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS); |
6676 | if (!pages[numpages]) { | 6676 | if (!pages[numpages]) { |
6677 | ret = -ENOMEM; | 6677 | ret = -ENOMEM; |
6678 | mlog_errno(ret); | 6678 | mlog_errno(ret); |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 0de69c9a08be..5cfeee118158 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -883,8 +883,8 @@ struct ocfs2_write_ctxt { | |||
883 | * out in so that future reads from that region will get | 883 | * out in so that future reads from that region will get |
884 | * zero's. | 884 | * zero's. |
885 | */ | 885 | */ |
886 | struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; | ||
887 | unsigned int w_num_pages; | 886 | unsigned int w_num_pages; |
887 | struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; | ||
888 | struct page *w_target_page; | 888 | struct page *w_target_page; |
889 | 889 | ||
890 | /* | 890 | /* |
@@ -1642,7 +1642,8 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, | |||
1642 | return ret; | 1642 | return ret; |
1643 | } | 1643 | } |
1644 | 1644 | ||
1645 | int ocfs2_write_begin_nolock(struct address_space *mapping, | 1645 | int ocfs2_write_begin_nolock(struct file *filp, |
1646 | struct address_space *mapping, | ||
1646 | loff_t pos, unsigned len, unsigned flags, | 1647 | loff_t pos, unsigned len, unsigned flags, |
1647 | struct page **pagep, void **fsdata, | 1648 | struct page **pagep, void **fsdata, |
1648 | struct buffer_head *di_bh, struct page *mmap_page) | 1649 | struct buffer_head *di_bh, struct page *mmap_page) |
@@ -1692,7 +1693,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1692 | mlog_errno(ret); | 1693 | mlog_errno(ret); |
1693 | goto out; | 1694 | goto out; |
1694 | } else if (ret == 1) { | 1695 | } else if (ret == 1) { |
1695 | ret = ocfs2_refcount_cow(inode, di_bh, | 1696 | ret = ocfs2_refcount_cow(inode, filp, di_bh, |
1696 | wc->w_cpos, wc->w_clen, UINT_MAX); | 1697 | wc->w_cpos, wc->w_clen, UINT_MAX); |
1697 | if (ret) { | 1698 | if (ret) { |
1698 | mlog_errno(ret); | 1699 | mlog_errno(ret); |
@@ -1854,7 +1855,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
1854 | */ | 1855 | */ |
1855 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 1856 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
1856 | 1857 | ||
1857 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, | 1858 | ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep, |
1858 | fsdata, di_bh, NULL); | 1859 | fsdata, di_bh, NULL); |
1859 | if (ret) { | 1860 | if (ret) { |
1860 | mlog_errno(ret); | 1861 | mlog_errno(ret); |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index c48e93ffc513..7606f663da6d 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
@@ -48,7 +48,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping, | |||
48 | loff_t pos, unsigned len, unsigned copied, | 48 | loff_t pos, unsigned len, unsigned copied, |
49 | struct page *page, void *fsdata); | 49 | struct page *page, void *fsdata); |
50 | 50 | ||
51 | int ocfs2_write_begin_nolock(struct address_space *mapping, | 51 | int ocfs2_write_begin_nolock(struct file *filp, |
52 | struct address_space *mapping, | ||
52 | loff_t pos, unsigned len, unsigned flags, | 53 | loff_t pos, unsigned len, unsigned flags, |
53 | struct page **pagep, void **fsdata, | 54 | struct page **pagep, void **fsdata, |
54 | struct buffer_head *di_bh, struct page *mmap_page); | 55 | struct buffer_head *di_bh, struct page *mmap_page); |
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index ec6d12339593..c7ee03c22226 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c | |||
@@ -439,7 +439,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, | |||
439 | 439 | ||
440 | ocfs2_blockcheck_inc_failure(stats); | 440 | ocfs2_blockcheck_inc_failure(stats); |
441 | mlog(ML_ERROR, | 441 | mlog(ML_ERROR, |
442 | "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", | 442 | "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n", |
443 | (unsigned int)check.bc_crc32e, (unsigned int)crc); | 443 | (unsigned int)check.bc_crc32e, (unsigned int)crc); |
444 | 444 | ||
445 | /* Ok, try ECC fixups */ | 445 | /* Ok, try ECC fixups */ |
@@ -453,7 +453,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, | |||
453 | goto out; | 453 | goto out; |
454 | } | 454 | } |
455 | 455 | ||
456 | mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", | 456 | mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n", |
457 | (unsigned int)check.bc_crc32e, (unsigned int)crc); | 457 | (unsigned int)check.bc_crc32e, (unsigned int)crc); |
458 | 458 | ||
459 | rc = -EIO; | 459 | rc = -EIO; |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 41d5f1f92d56..52c7557f3e25 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -62,10 +62,51 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | |||
62 | static LIST_HEAD(o2hb_node_events); | 62 | static LIST_HEAD(o2hb_node_events); |
63 | static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); | 63 | static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); |
64 | 64 | ||
65 | /* | ||
66 | * In global heartbeat, we maintain a series of region bitmaps. | ||
67 | * - o2hb_region_bitmap allows us to limit the region number to max region. | ||
68 | * - o2hb_live_region_bitmap tracks live regions (seen steady iterations). | ||
69 | * - o2hb_quorum_region_bitmap tracks live regions that have seen all nodes | ||
70 | * heartbeat on it. | ||
71 | * - o2hb_failed_region_bitmap tracks the regions that have seen io timeouts. | ||
72 | */ | ||
73 | static unsigned long o2hb_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
74 | static unsigned long o2hb_live_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
75 | static unsigned long o2hb_quorum_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
76 | static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
77 | |||
78 | #define O2HB_DB_TYPE_LIVENODES 0 | ||
79 | #define O2HB_DB_TYPE_LIVEREGIONS 1 | ||
80 | #define O2HB_DB_TYPE_QUORUMREGIONS 2 | ||
81 | #define O2HB_DB_TYPE_FAILEDREGIONS 3 | ||
82 | #define O2HB_DB_TYPE_REGION_LIVENODES 4 | ||
83 | #define O2HB_DB_TYPE_REGION_NUMBER 5 | ||
84 | #define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6 | ||
85 | struct o2hb_debug_buf { | ||
86 | int db_type; | ||
87 | int db_size; | ||
88 | int db_len; | ||
89 | void *db_data; | ||
90 | }; | ||
91 | |||
92 | static struct o2hb_debug_buf *o2hb_db_livenodes; | ||
93 | static struct o2hb_debug_buf *o2hb_db_liveregions; | ||
94 | static struct o2hb_debug_buf *o2hb_db_quorumregions; | ||
95 | static struct o2hb_debug_buf *o2hb_db_failedregions; | ||
96 | |||
65 | #define O2HB_DEBUG_DIR "o2hb" | 97 | #define O2HB_DEBUG_DIR "o2hb" |
66 | #define O2HB_DEBUG_LIVENODES "livenodes" | 98 | #define O2HB_DEBUG_LIVENODES "livenodes" |
99 | #define O2HB_DEBUG_LIVEREGIONS "live_regions" | ||
100 | #define O2HB_DEBUG_QUORUMREGIONS "quorum_regions" | ||
101 | #define O2HB_DEBUG_FAILEDREGIONS "failed_regions" | ||
102 | #define O2HB_DEBUG_REGION_NUMBER "num" | ||
103 | #define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms" | ||
104 | |||
67 | static struct dentry *o2hb_debug_dir; | 105 | static struct dentry *o2hb_debug_dir; |
68 | static struct dentry *o2hb_debug_livenodes; | 106 | static struct dentry *o2hb_debug_livenodes; |
107 | static struct dentry *o2hb_debug_liveregions; | ||
108 | static struct dentry *o2hb_debug_quorumregions; | ||
109 | static struct dentry *o2hb_debug_failedregions; | ||
69 | 110 | ||
70 | static LIST_HEAD(o2hb_all_regions); | 111 | static LIST_HEAD(o2hb_all_regions); |
71 | 112 | ||
@@ -77,7 +118,19 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type); | |||
77 | 118 | ||
78 | #define O2HB_DEFAULT_BLOCK_BITS 9 | 119 | #define O2HB_DEFAULT_BLOCK_BITS 9 |
79 | 120 | ||
121 | enum o2hb_heartbeat_modes { | ||
122 | O2HB_HEARTBEAT_LOCAL = 0, | ||
123 | O2HB_HEARTBEAT_GLOBAL, | ||
124 | O2HB_HEARTBEAT_NUM_MODES, | ||
125 | }; | ||
126 | |||
127 | char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = { | ||
128 | "local", /* O2HB_HEARTBEAT_LOCAL */ | ||
129 | "global", /* O2HB_HEARTBEAT_GLOBAL */ | ||
130 | }; | ||
131 | |||
80 | unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; | 132 | unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; |
133 | unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; | ||
81 | 134 | ||
82 | /* Only sets a new threshold if there are no active regions. | 135 | /* Only sets a new threshold if there are no active regions. |
83 | * | 136 | * |
@@ -94,6 +147,22 @@ static void o2hb_dead_threshold_set(unsigned int threshold) | |||
94 | } | 147 | } |
95 | } | 148 | } |
96 | 149 | ||
150 | static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode) | ||
151 | { | ||
152 | int ret = -1; | ||
153 | |||
154 | if (hb_mode < O2HB_HEARTBEAT_NUM_MODES) { | ||
155 | spin_lock(&o2hb_live_lock); | ||
156 | if (list_empty(&o2hb_all_regions)) { | ||
157 | o2hb_heartbeat_mode = hb_mode; | ||
158 | ret = 0; | ||
159 | } | ||
160 | spin_unlock(&o2hb_live_lock); | ||
161 | } | ||
162 | |||
163 | return ret; | ||
164 | } | ||
165 | |||
97 | struct o2hb_node_event { | 166 | struct o2hb_node_event { |
98 | struct list_head hn_item; | 167 | struct list_head hn_item; |
99 | enum o2hb_callback_type hn_event_type; | 168 | enum o2hb_callback_type hn_event_type; |
@@ -135,6 +204,18 @@ struct o2hb_region { | |||
135 | struct block_device *hr_bdev; | 204 | struct block_device *hr_bdev; |
136 | struct o2hb_disk_slot *hr_slots; | 205 | struct o2hb_disk_slot *hr_slots; |
137 | 206 | ||
207 | /* live node map of this region */ | ||
208 | unsigned long hr_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
209 | unsigned int hr_region_num; | ||
210 | |||
211 | struct dentry *hr_debug_dir; | ||
212 | struct dentry *hr_debug_livenodes; | ||
213 | struct dentry *hr_debug_regnum; | ||
214 | struct dentry *hr_debug_elapsed_time; | ||
215 | struct o2hb_debug_buf *hr_db_livenodes; | ||
216 | struct o2hb_debug_buf *hr_db_regnum; | ||
217 | struct o2hb_debug_buf *hr_db_elapsed_time; | ||
218 | |||
138 | /* let the person setting up hb wait for it to return until it | 219 | /* let the person setting up hb wait for it to return until it |
139 | * has reached a 'steady' state. This will be fixed when we have | 220 | * has reached a 'steady' state. This will be fixed when we have |
140 | * a more complete api that doesn't lead to this sort of fragility. */ | 221 | * a more complete api that doesn't lead to this sort of fragility. */ |
@@ -163,8 +244,19 @@ struct o2hb_bio_wait_ctxt { | |||
163 | int wc_error; | 244 | int wc_error; |
164 | }; | 245 | }; |
165 | 246 | ||
247 | static int o2hb_pop_count(void *map, int count) | ||
248 | { | ||
249 | int i = -1, pop = 0; | ||
250 | |||
251 | while ((i = find_next_bit(map, count, i + 1)) < count) | ||
252 | pop++; | ||
253 | return pop; | ||
254 | } | ||
255 | |||
166 | static void o2hb_write_timeout(struct work_struct *work) | 256 | static void o2hb_write_timeout(struct work_struct *work) |
167 | { | 257 | { |
258 | int failed, quorum; | ||
259 | unsigned long flags; | ||
168 | struct o2hb_region *reg = | 260 | struct o2hb_region *reg = |
169 | container_of(work, struct o2hb_region, | 261 | container_of(work, struct o2hb_region, |
170 | hr_write_timeout_work.work); | 262 | hr_write_timeout_work.work); |
@@ -172,6 +264,28 @@ static void o2hb_write_timeout(struct work_struct *work) | |||
172 | mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " | 264 | mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " |
173 | "milliseconds\n", reg->hr_dev_name, | 265 | "milliseconds\n", reg->hr_dev_name, |
174 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); | 266 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); |
267 | |||
268 | if (o2hb_global_heartbeat_active()) { | ||
269 | spin_lock_irqsave(&o2hb_live_lock, flags); | ||
270 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
271 | set_bit(reg->hr_region_num, o2hb_failed_region_bitmap); | ||
272 | failed = o2hb_pop_count(&o2hb_failed_region_bitmap, | ||
273 | O2NM_MAX_REGIONS); | ||
274 | quorum = o2hb_pop_count(&o2hb_quorum_region_bitmap, | ||
275 | O2NM_MAX_REGIONS); | ||
276 | spin_unlock_irqrestore(&o2hb_live_lock, flags); | ||
277 | |||
278 | mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n", | ||
279 | quorum, failed); | ||
280 | |||
281 | /* | ||
282 | * Fence if the number of failed regions >= half the number | ||
283 | * of quorum regions | ||
284 | */ | ||
285 | if ((failed << 1) < quorum) | ||
286 | return; | ||
287 | } | ||
288 | |||
175 | o2quo_disk_timeout(); | 289 | o2quo_disk_timeout(); |
176 | } | 290 | } |
177 | 291 | ||
@@ -180,6 +294,11 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg) | |||
180 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", | 294 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", |
181 | O2HB_MAX_WRITE_TIMEOUT_MS); | 295 | O2HB_MAX_WRITE_TIMEOUT_MS); |
182 | 296 | ||
297 | if (o2hb_global_heartbeat_active()) { | ||
298 | spin_lock(&o2hb_live_lock); | ||
299 | clear_bit(reg->hr_region_num, o2hb_failed_region_bitmap); | ||
300 | spin_unlock(&o2hb_live_lock); | ||
301 | } | ||
183 | cancel_delayed_work(®->hr_write_timeout_work); | 302 | cancel_delayed_work(®->hr_write_timeout_work); |
184 | reg->hr_last_timeout_start = jiffies; | 303 | reg->hr_last_timeout_start = jiffies; |
185 | schedule_delayed_work(®->hr_write_timeout_work, | 304 | schedule_delayed_work(®->hr_write_timeout_work, |
@@ -513,6 +632,8 @@ static void o2hb_queue_node_event(struct o2hb_node_event *event, | |||
513 | { | 632 | { |
514 | assert_spin_locked(&o2hb_live_lock); | 633 | assert_spin_locked(&o2hb_live_lock); |
515 | 634 | ||
635 | BUG_ON((!node) && (type != O2HB_NODE_DOWN_CB)); | ||
636 | |||
516 | event->hn_event_type = type; | 637 | event->hn_event_type = type; |
517 | event->hn_node = node; | 638 | event->hn_node = node; |
518 | event->hn_node_num = node_num; | 639 | event->hn_node_num = node_num; |
@@ -554,6 +675,35 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) | |||
554 | o2nm_node_put(node); | 675 | o2nm_node_put(node); |
555 | } | 676 | } |
556 | 677 | ||
678 | static void o2hb_set_quorum_device(struct o2hb_region *reg, | ||
679 | struct o2hb_disk_slot *slot) | ||
680 | { | ||
681 | assert_spin_locked(&o2hb_live_lock); | ||
682 | |||
683 | if (!o2hb_global_heartbeat_active()) | ||
684 | return; | ||
685 | |||
686 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
687 | return; | ||
688 | |||
689 | /* | ||
690 | * A region can be added to the quorum only when it sees all | ||
691 | * live nodes heartbeat on it. In other words, the region has been | ||
692 | * added to all nodes. | ||
693 | */ | ||
694 | if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, | ||
695 | sizeof(o2hb_live_node_bitmap))) | ||
696 | return; | ||
697 | |||
698 | if (slot->ds_changed_samples < O2HB_LIVE_THRESHOLD) | ||
699 | return; | ||
700 | |||
701 | printk(KERN_NOTICE "o2hb: Region %s is now a quorum device\n", | ||
702 | config_item_name(®->hr_item)); | ||
703 | |||
704 | set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); | ||
705 | } | ||
706 | |||
557 | static int o2hb_check_slot(struct o2hb_region *reg, | 707 | static int o2hb_check_slot(struct o2hb_region *reg, |
558 | struct o2hb_disk_slot *slot) | 708 | struct o2hb_disk_slot *slot) |
559 | { | 709 | { |
@@ -565,14 +715,22 @@ static int o2hb_check_slot(struct o2hb_region *reg, | |||
565 | u64 cputime; | 715 | u64 cputime; |
566 | unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; | 716 | unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; |
567 | unsigned int slot_dead_ms; | 717 | unsigned int slot_dead_ms; |
718 | int tmp; | ||
568 | 719 | ||
569 | memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); | 720 | memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); |
570 | 721 | ||
571 | /* Is this correct? Do we assume that the node doesn't exist | 722 | /* |
572 | * if we're not configured for him? */ | 723 | * If a node is no longer configured but is still in the livemap, we |
724 | * may need to clear that bit from the livemap. | ||
725 | */ | ||
573 | node = o2nm_get_node_by_num(slot->ds_node_num); | 726 | node = o2nm_get_node_by_num(slot->ds_node_num); |
574 | if (!node) | 727 | if (!node) { |
575 | return 0; | 728 | spin_lock(&o2hb_live_lock); |
729 | tmp = test_bit(slot->ds_node_num, o2hb_live_node_bitmap); | ||
730 | spin_unlock(&o2hb_live_lock); | ||
731 | if (!tmp) | ||
732 | return 0; | ||
733 | } | ||
576 | 734 | ||
577 | if (!o2hb_verify_crc(reg, hb_block)) { | 735 | if (!o2hb_verify_crc(reg, hb_block)) { |
578 | /* all paths from here will drop o2hb_live_lock for | 736 | /* all paths from here will drop o2hb_live_lock for |
@@ -639,8 +797,12 @@ fire_callbacks: | |||
639 | mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n", | 797 | mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n", |
640 | slot->ds_node_num, (long long)slot->ds_last_generation); | 798 | slot->ds_node_num, (long long)slot->ds_last_generation); |
641 | 799 | ||
800 | set_bit(slot->ds_node_num, reg->hr_live_node_bitmap); | ||
801 | |||
642 | /* first on the list generates a callback */ | 802 | /* first on the list generates a callback */ |
643 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { | 803 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { |
804 | mlog(ML_HEARTBEAT, "o2hb: Add node %d to live nodes " | ||
805 | "bitmap\n", slot->ds_node_num); | ||
644 | set_bit(slot->ds_node_num, o2hb_live_node_bitmap); | 806 | set_bit(slot->ds_node_num, o2hb_live_node_bitmap); |
645 | 807 | ||
646 | o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node, | 808 | o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node, |
@@ -684,13 +846,18 @@ fire_callbacks: | |||
684 | mlog(ML_HEARTBEAT, "Node %d left my region\n", | 846 | mlog(ML_HEARTBEAT, "Node %d left my region\n", |
685 | slot->ds_node_num); | 847 | slot->ds_node_num); |
686 | 848 | ||
849 | clear_bit(slot->ds_node_num, reg->hr_live_node_bitmap); | ||
850 | |||
687 | /* last off the live_slot generates a callback */ | 851 | /* last off the live_slot generates a callback */ |
688 | list_del_init(&slot->ds_live_item); | 852 | list_del_init(&slot->ds_live_item); |
689 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { | 853 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { |
854 | mlog(ML_HEARTBEAT, "o2hb: Remove node %d from live " | ||
855 | "nodes bitmap\n", slot->ds_node_num); | ||
690 | clear_bit(slot->ds_node_num, o2hb_live_node_bitmap); | 856 | clear_bit(slot->ds_node_num, o2hb_live_node_bitmap); |
691 | 857 | ||
692 | o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node, | 858 | /* node can be null */ |
693 | slot->ds_node_num); | 859 | o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, |
860 | node, slot->ds_node_num); | ||
694 | 861 | ||
695 | changed = 1; | 862 | changed = 1; |
696 | } | 863 | } |
@@ -706,11 +873,14 @@ fire_callbacks: | |||
706 | slot->ds_equal_samples = 0; | 873 | slot->ds_equal_samples = 0; |
707 | } | 874 | } |
708 | out: | 875 | out: |
876 | o2hb_set_quorum_device(reg, slot); | ||
877 | |||
709 | spin_unlock(&o2hb_live_lock); | 878 | spin_unlock(&o2hb_live_lock); |
710 | 879 | ||
711 | o2hb_run_event_list(&event); | 880 | o2hb_run_event_list(&event); |
712 | 881 | ||
713 | o2nm_node_put(node); | 882 | if (node) |
883 | o2nm_node_put(node); | ||
714 | return changed; | 884 | return changed; |
715 | } | 885 | } |
716 | 886 | ||
@@ -737,6 +907,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
737 | { | 907 | { |
738 | int i, ret, highest_node, change = 0; | 908 | int i, ret, highest_node, change = 0; |
739 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 909 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
910 | unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
740 | struct o2hb_bio_wait_ctxt write_wc; | 911 | struct o2hb_bio_wait_ctxt write_wc; |
741 | 912 | ||
742 | ret = o2nm_configured_node_map(configured_nodes, | 913 | ret = o2nm_configured_node_map(configured_nodes, |
@@ -746,6 +917,17 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
746 | return ret; | 917 | return ret; |
747 | } | 918 | } |
748 | 919 | ||
920 | /* | ||
921 | * If a node is not configured but is in the livemap, we still need | ||
922 | * to read the slot so as to be able to remove it from the livemap. | ||
923 | */ | ||
924 | o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap)); | ||
925 | i = -1; | ||
926 | while ((i = find_next_bit(live_node_bitmap, | ||
927 | O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { | ||
928 | set_bit(i, configured_nodes); | ||
929 | } | ||
930 | |||
749 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); | 931 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); |
750 | if (highest_node >= O2NM_MAX_NODES) { | 932 | if (highest_node >= O2NM_MAX_NODES) { |
751 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); | 933 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); |
@@ -917,21 +1099,59 @@ static int o2hb_thread(void *data) | |||
917 | #ifdef CONFIG_DEBUG_FS | 1099 | #ifdef CONFIG_DEBUG_FS |
918 | static int o2hb_debug_open(struct inode *inode, struct file *file) | 1100 | static int o2hb_debug_open(struct inode *inode, struct file *file) |
919 | { | 1101 | { |
1102 | struct o2hb_debug_buf *db = inode->i_private; | ||
1103 | struct o2hb_region *reg; | ||
920 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 1104 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
921 | char *buf = NULL; | 1105 | char *buf = NULL; |
922 | int i = -1; | 1106 | int i = -1; |
923 | int out = 0; | 1107 | int out = 0; |
924 | 1108 | ||
1109 | /* max_nodes should be the largest bitmap we pass here */ | ||
1110 | BUG_ON(sizeof(map) < db->db_size); | ||
1111 | |||
925 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | 1112 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
926 | if (!buf) | 1113 | if (!buf) |
927 | goto bail; | 1114 | goto bail; |
928 | 1115 | ||
929 | o2hb_fill_node_map(map, sizeof(map)); | 1116 | switch (db->db_type) { |
1117 | case O2HB_DB_TYPE_LIVENODES: | ||
1118 | case O2HB_DB_TYPE_LIVEREGIONS: | ||
1119 | case O2HB_DB_TYPE_QUORUMREGIONS: | ||
1120 | case O2HB_DB_TYPE_FAILEDREGIONS: | ||
1121 | spin_lock(&o2hb_live_lock); | ||
1122 | memcpy(map, db->db_data, db->db_size); | ||
1123 | spin_unlock(&o2hb_live_lock); | ||
1124 | break; | ||
1125 | |||
1126 | case O2HB_DB_TYPE_REGION_LIVENODES: | ||
1127 | spin_lock(&o2hb_live_lock); | ||
1128 | reg = (struct o2hb_region *)db->db_data; | ||
1129 | memcpy(map, reg->hr_live_node_bitmap, db->db_size); | ||
1130 | spin_unlock(&o2hb_live_lock); | ||
1131 | break; | ||
1132 | |||
1133 | case O2HB_DB_TYPE_REGION_NUMBER: | ||
1134 | reg = (struct o2hb_region *)db->db_data; | ||
1135 | out += snprintf(buf + out, PAGE_SIZE - out, "%d\n", | ||
1136 | reg->hr_region_num); | ||
1137 | goto done; | ||
1138 | |||
1139 | case O2HB_DB_TYPE_REGION_ELAPSED_TIME: | ||
1140 | reg = (struct o2hb_region *)db->db_data; | ||
1141 | out += snprintf(buf + out, PAGE_SIZE - out, "%u\n", | ||
1142 | jiffies_to_msecs(jiffies - | ||
1143 | reg->hr_last_timeout_start)); | ||
1144 | goto done; | ||
1145 | |||
1146 | default: | ||
1147 | goto done; | ||
1148 | } | ||
930 | 1149 | ||
931 | while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) | 1150 | while ((i = find_next_bit(map, db->db_len, i + 1)) < db->db_len) |
932 | out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); | 1151 | out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); |
933 | out += snprintf(buf + out, PAGE_SIZE - out, "\n"); | 1152 | out += snprintf(buf + out, PAGE_SIZE - out, "\n"); |
934 | 1153 | ||
1154 | done: | ||
935 | i_size_write(inode, out); | 1155 | i_size_write(inode, out); |
936 | 1156 | ||
937 | file->private_data = buf; | 1157 | file->private_data = buf; |
@@ -978,10 +1198,104 @@ static const struct file_operations o2hb_debug_fops = { | |||
978 | 1198 | ||
979 | void o2hb_exit(void) | 1199 | void o2hb_exit(void) |
980 | { | 1200 | { |
981 | if (o2hb_debug_livenodes) | 1201 | kfree(o2hb_db_livenodes); |
982 | debugfs_remove(o2hb_debug_livenodes); | 1202 | kfree(o2hb_db_liveregions); |
983 | if (o2hb_debug_dir) | 1203 | kfree(o2hb_db_quorumregions); |
984 | debugfs_remove(o2hb_debug_dir); | 1204 | kfree(o2hb_db_failedregions); |
1205 | debugfs_remove(o2hb_debug_failedregions); | ||
1206 | debugfs_remove(o2hb_debug_quorumregions); | ||
1207 | debugfs_remove(o2hb_debug_liveregions); | ||
1208 | debugfs_remove(o2hb_debug_livenodes); | ||
1209 | debugfs_remove(o2hb_debug_dir); | ||
1210 | } | ||
1211 | |||
1212 | static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, | ||
1213 | struct o2hb_debug_buf **db, int db_len, | ||
1214 | int type, int size, int len, void *data) | ||
1215 | { | ||
1216 | *db = kmalloc(db_len, GFP_KERNEL); | ||
1217 | if (!*db) | ||
1218 | return NULL; | ||
1219 | |||
1220 | (*db)->db_type = type; | ||
1221 | (*db)->db_size = size; | ||
1222 | (*db)->db_len = len; | ||
1223 | (*db)->db_data = data; | ||
1224 | |||
1225 | return debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db, | ||
1226 | &o2hb_debug_fops); | ||
1227 | } | ||
1228 | |||
1229 | static int o2hb_debug_init(void) | ||
1230 | { | ||
1231 | int ret = -ENOMEM; | ||
1232 | |||
1233 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | ||
1234 | if (!o2hb_debug_dir) { | ||
1235 | mlog_errno(ret); | ||
1236 | goto bail; | ||
1237 | } | ||
1238 | |||
1239 | o2hb_debug_livenodes = o2hb_debug_create(O2HB_DEBUG_LIVENODES, | ||
1240 | o2hb_debug_dir, | ||
1241 | &o2hb_db_livenodes, | ||
1242 | sizeof(*o2hb_db_livenodes), | ||
1243 | O2HB_DB_TYPE_LIVENODES, | ||
1244 | sizeof(o2hb_live_node_bitmap), | ||
1245 | O2NM_MAX_NODES, | ||
1246 | o2hb_live_node_bitmap); | ||
1247 | if (!o2hb_debug_livenodes) { | ||
1248 | mlog_errno(ret); | ||
1249 | goto bail; | ||
1250 | } | ||
1251 | |||
1252 | o2hb_debug_liveregions = o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, | ||
1253 | o2hb_debug_dir, | ||
1254 | &o2hb_db_liveregions, | ||
1255 | sizeof(*o2hb_db_liveregions), | ||
1256 | O2HB_DB_TYPE_LIVEREGIONS, | ||
1257 | sizeof(o2hb_live_region_bitmap), | ||
1258 | O2NM_MAX_REGIONS, | ||
1259 | o2hb_live_region_bitmap); | ||
1260 | if (!o2hb_debug_liveregions) { | ||
1261 | mlog_errno(ret); | ||
1262 | goto bail; | ||
1263 | } | ||
1264 | |||
1265 | o2hb_debug_quorumregions = | ||
1266 | o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, | ||
1267 | o2hb_debug_dir, | ||
1268 | &o2hb_db_quorumregions, | ||
1269 | sizeof(*o2hb_db_quorumregions), | ||
1270 | O2HB_DB_TYPE_QUORUMREGIONS, | ||
1271 | sizeof(o2hb_quorum_region_bitmap), | ||
1272 | O2NM_MAX_REGIONS, | ||
1273 | o2hb_quorum_region_bitmap); | ||
1274 | if (!o2hb_debug_quorumregions) { | ||
1275 | mlog_errno(ret); | ||
1276 | goto bail; | ||
1277 | } | ||
1278 | |||
1279 | o2hb_debug_failedregions = | ||
1280 | o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, | ||
1281 | o2hb_debug_dir, | ||
1282 | &o2hb_db_failedregions, | ||
1283 | sizeof(*o2hb_db_failedregions), | ||
1284 | O2HB_DB_TYPE_FAILEDREGIONS, | ||
1285 | sizeof(o2hb_failed_region_bitmap), | ||
1286 | O2NM_MAX_REGIONS, | ||
1287 | o2hb_failed_region_bitmap); | ||
1288 | if (!o2hb_debug_failedregions) { | ||
1289 | mlog_errno(ret); | ||
1290 | goto bail; | ||
1291 | } | ||
1292 | |||
1293 | ret = 0; | ||
1294 | bail: | ||
1295 | if (ret) | ||
1296 | o2hb_exit(); | ||
1297 | |||
1298 | return ret; | ||
985 | } | 1299 | } |
986 | 1300 | ||
987 | int o2hb_init(void) | 1301 | int o2hb_init(void) |
@@ -997,24 +1311,12 @@ int o2hb_init(void) | |||
997 | INIT_LIST_HEAD(&o2hb_node_events); | 1311 | INIT_LIST_HEAD(&o2hb_node_events); |
998 | 1312 | ||
999 | memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); | 1313 | memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); |
1314 | memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap)); | ||
1315 | memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap)); | ||
1316 | memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap)); | ||
1317 | memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap)); | ||
1000 | 1318 | ||
1001 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | 1319 | return o2hb_debug_init(); |
1002 | if (!o2hb_debug_dir) { | ||
1003 | mlog_errno(-ENOMEM); | ||
1004 | return -ENOMEM; | ||
1005 | } | ||
1006 | |||
1007 | o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES, | ||
1008 | S_IFREG|S_IRUSR, | ||
1009 | o2hb_debug_dir, NULL, | ||
1010 | &o2hb_debug_fops); | ||
1011 | if (!o2hb_debug_livenodes) { | ||
1012 | mlog_errno(-ENOMEM); | ||
1013 | debugfs_remove(o2hb_debug_dir); | ||
1014 | return -ENOMEM; | ||
1015 | } | ||
1016 | |||
1017 | return 0; | ||
1018 | } | 1320 | } |
1019 | 1321 | ||
1020 | /* if we're already in a callback then we're already serialized by the sem */ | 1322 | /* if we're already in a callback then we're already serialized by the sem */ |
@@ -1078,6 +1380,13 @@ static void o2hb_region_release(struct config_item *item) | |||
1078 | if (reg->hr_slots) | 1380 | if (reg->hr_slots) |
1079 | kfree(reg->hr_slots); | 1381 | kfree(reg->hr_slots); |
1080 | 1382 | ||
1383 | kfree(reg->hr_db_regnum); | ||
1384 | kfree(reg->hr_db_livenodes); | ||
1385 | debugfs_remove(reg->hr_debug_livenodes); | ||
1386 | debugfs_remove(reg->hr_debug_regnum); | ||
1387 | debugfs_remove(reg->hr_debug_elapsed_time); | ||
1388 | debugfs_remove(reg->hr_debug_dir); | ||
1389 | |||
1081 | spin_lock(&o2hb_live_lock); | 1390 | spin_lock(&o2hb_live_lock); |
1082 | list_del(®->hr_all_item); | 1391 | list_del(®->hr_all_item); |
1083 | spin_unlock(&o2hb_live_lock); | 1392 | spin_unlock(&o2hb_live_lock); |
@@ -1441,6 +1750,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1441 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ | 1750 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ |
1442 | spin_lock(&o2hb_live_lock); | 1751 | spin_lock(&o2hb_live_lock); |
1443 | hb_task = reg->hr_task; | 1752 | hb_task = reg->hr_task; |
1753 | if (o2hb_global_heartbeat_active()) | ||
1754 | set_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
1444 | spin_unlock(&o2hb_live_lock); | 1755 | spin_unlock(&o2hb_live_lock); |
1445 | 1756 | ||
1446 | if (hb_task) | 1757 | if (hb_task) |
@@ -1448,6 +1759,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1448 | else | 1759 | else |
1449 | ret = -EIO; | 1760 | ret = -EIO; |
1450 | 1761 | ||
1762 | if (hb_task && o2hb_global_heartbeat_active()) | ||
1763 | printk(KERN_NOTICE "o2hb: Heartbeat started on region %s\n", | ||
1764 | config_item_name(®->hr_item)); | ||
1765 | |||
1451 | out: | 1766 | out: |
1452 | if (filp) | 1767 | if (filp) |
1453 | fput(filp); | 1768 | fput(filp); |
@@ -1586,21 +1901,94 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group | |||
1586 | : NULL; | 1901 | : NULL; |
1587 | } | 1902 | } |
1588 | 1903 | ||
1904 | static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | ||
1905 | { | ||
1906 | int ret = -ENOMEM; | ||
1907 | |||
1908 | reg->hr_debug_dir = | ||
1909 | debugfs_create_dir(config_item_name(®->hr_item), dir); | ||
1910 | if (!reg->hr_debug_dir) { | ||
1911 | mlog_errno(ret); | ||
1912 | goto bail; | ||
1913 | } | ||
1914 | |||
1915 | reg->hr_debug_livenodes = | ||
1916 | o2hb_debug_create(O2HB_DEBUG_LIVENODES, | ||
1917 | reg->hr_debug_dir, | ||
1918 | &(reg->hr_db_livenodes), | ||
1919 | sizeof(*(reg->hr_db_livenodes)), | ||
1920 | O2HB_DB_TYPE_REGION_LIVENODES, | ||
1921 | sizeof(reg->hr_live_node_bitmap), | ||
1922 | O2NM_MAX_NODES, reg); | ||
1923 | if (!reg->hr_debug_livenodes) { | ||
1924 | mlog_errno(ret); | ||
1925 | goto bail; | ||
1926 | } | ||
1927 | |||
1928 | reg->hr_debug_regnum = | ||
1929 | o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER, | ||
1930 | reg->hr_debug_dir, | ||
1931 | &(reg->hr_db_regnum), | ||
1932 | sizeof(*(reg->hr_db_regnum)), | ||
1933 | O2HB_DB_TYPE_REGION_NUMBER, | ||
1934 | 0, O2NM_MAX_NODES, reg); | ||
1935 | if (!reg->hr_debug_regnum) { | ||
1936 | mlog_errno(ret); | ||
1937 | goto bail; | ||
1938 | } | ||
1939 | |||
1940 | reg->hr_debug_elapsed_time = | ||
1941 | o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME, | ||
1942 | reg->hr_debug_dir, | ||
1943 | &(reg->hr_db_elapsed_time), | ||
1944 | sizeof(*(reg->hr_db_elapsed_time)), | ||
1945 | O2HB_DB_TYPE_REGION_ELAPSED_TIME, | ||
1946 | 0, 0, reg); | ||
1947 | if (!reg->hr_debug_elapsed_time) { | ||
1948 | mlog_errno(ret); | ||
1949 | goto bail; | ||
1950 | } | ||
1951 | |||
1952 | ret = 0; | ||
1953 | bail: | ||
1954 | return ret; | ||
1955 | } | ||
1956 | |||
1589 | static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, | 1957 | static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, |
1590 | const char *name) | 1958 | const char *name) |
1591 | { | 1959 | { |
1592 | struct o2hb_region *reg = NULL; | 1960 | struct o2hb_region *reg = NULL; |
1961 | int ret; | ||
1593 | 1962 | ||
1594 | reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); | 1963 | reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); |
1595 | if (reg == NULL) | 1964 | if (reg == NULL) |
1596 | return ERR_PTR(-ENOMEM); | 1965 | return ERR_PTR(-ENOMEM); |
1597 | 1966 | ||
1598 | config_item_init_type_name(®->hr_item, name, &o2hb_region_type); | 1967 | if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) |
1968 | return ERR_PTR(-ENAMETOOLONG); | ||
1599 | 1969 | ||
1600 | spin_lock(&o2hb_live_lock); | 1970 | spin_lock(&o2hb_live_lock); |
1971 | reg->hr_region_num = 0; | ||
1972 | if (o2hb_global_heartbeat_active()) { | ||
1973 | reg->hr_region_num = find_first_zero_bit(o2hb_region_bitmap, | ||
1974 | O2NM_MAX_REGIONS); | ||
1975 | if (reg->hr_region_num >= O2NM_MAX_REGIONS) { | ||
1976 | spin_unlock(&o2hb_live_lock); | ||
1977 | return ERR_PTR(-EFBIG); | ||
1978 | } | ||
1979 | set_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
1980 | } | ||
1601 | list_add_tail(®->hr_all_item, &o2hb_all_regions); | 1981 | list_add_tail(®->hr_all_item, &o2hb_all_regions); |
1602 | spin_unlock(&o2hb_live_lock); | 1982 | spin_unlock(&o2hb_live_lock); |
1603 | 1983 | ||
1984 | config_item_init_type_name(®->hr_item, name, &o2hb_region_type); | ||
1985 | |||
1986 | ret = o2hb_debug_region_init(reg, o2hb_debug_dir); | ||
1987 | if (ret) { | ||
1988 | config_item_put(®->hr_item); | ||
1989 | return ERR_PTR(ret); | ||
1990 | } | ||
1991 | |||
1604 | return ®->hr_item; | 1992 | return ®->hr_item; |
1605 | } | 1993 | } |
1606 | 1994 | ||
@@ -1612,6 +2000,10 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
1612 | 2000 | ||
1613 | /* stop the thread when the user removes the region dir */ | 2001 | /* stop the thread when the user removes the region dir */ |
1614 | spin_lock(&o2hb_live_lock); | 2002 | spin_lock(&o2hb_live_lock); |
2003 | if (o2hb_global_heartbeat_active()) { | ||
2004 | clear_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
2005 | clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
2006 | } | ||
1615 | hb_task = reg->hr_task; | 2007 | hb_task = reg->hr_task; |
1616 | reg->hr_task = NULL; | 2008 | reg->hr_task = NULL; |
1617 | spin_unlock(&o2hb_live_lock); | 2009 | spin_unlock(&o2hb_live_lock); |
@@ -1628,6 +2020,9 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
1628 | wake_up(&o2hb_steady_queue); | 2020 | wake_up(&o2hb_steady_queue); |
1629 | } | 2021 | } |
1630 | 2022 | ||
2023 | if (o2hb_global_heartbeat_active()) | ||
2024 | printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", | ||
2025 | config_item_name(®->hr_item)); | ||
1631 | config_item_put(item); | 2026 | config_item_put(item); |
1632 | } | 2027 | } |
1633 | 2028 | ||
@@ -1688,6 +2083,41 @@ static ssize_t o2hb_heartbeat_group_threshold_store(struct o2hb_heartbeat_group | |||
1688 | return count; | 2083 | return count; |
1689 | } | 2084 | } |
1690 | 2085 | ||
2086 | static | ||
2087 | ssize_t o2hb_heartbeat_group_mode_show(struct o2hb_heartbeat_group *group, | ||
2088 | char *page) | ||
2089 | { | ||
2090 | return sprintf(page, "%s\n", | ||
2091 | o2hb_heartbeat_mode_desc[o2hb_heartbeat_mode]); | ||
2092 | } | ||
2093 | |||
2094 | static | ||
2095 | ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, | ||
2096 | const char *page, size_t count) | ||
2097 | { | ||
2098 | unsigned int i; | ||
2099 | int ret; | ||
2100 | size_t len; | ||
2101 | |||
2102 | len = (page[count - 1] == '\n') ? count - 1 : count; | ||
2103 | if (!len) | ||
2104 | return -EINVAL; | ||
2105 | |||
2106 | for (i = 0; i < O2HB_HEARTBEAT_NUM_MODES; ++i) { | ||
2107 | if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) | ||
2108 | continue; | ||
2109 | |||
2110 | ret = o2hb_global_hearbeat_mode_set(i); | ||
2111 | if (!ret) | ||
2112 | printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", | ||
2113 | o2hb_heartbeat_mode_desc[i]); | ||
2114 | return count; | ||
2115 | } | ||
2116 | |||
2117 | return -EINVAL; | ||
2118 | |||
2119 | } | ||
2120 | |||
1691 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { | 2121 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { |
1692 | .attr = { .ca_owner = THIS_MODULE, | 2122 | .attr = { .ca_owner = THIS_MODULE, |
1693 | .ca_name = "dead_threshold", | 2123 | .ca_name = "dead_threshold", |
@@ -1696,8 +2126,17 @@ static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold | |||
1696 | .store = o2hb_heartbeat_group_threshold_store, | 2126 | .store = o2hb_heartbeat_group_threshold_store, |
1697 | }; | 2127 | }; |
1698 | 2128 | ||
2129 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_mode = { | ||
2130 | .attr = { .ca_owner = THIS_MODULE, | ||
2131 | .ca_name = "mode", | ||
2132 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
2133 | .show = o2hb_heartbeat_group_mode_show, | ||
2134 | .store = o2hb_heartbeat_group_mode_store, | ||
2135 | }; | ||
2136 | |||
1699 | static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { | 2137 | static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { |
1700 | &o2hb_heartbeat_group_attr_threshold.attr, | 2138 | &o2hb_heartbeat_group_attr_threshold.attr, |
2139 | &o2hb_heartbeat_group_attr_mode.attr, | ||
1701 | NULL, | 2140 | NULL, |
1702 | }; | 2141 | }; |
1703 | 2142 | ||
@@ -1963,3 +2402,34 @@ void o2hb_stop_all_regions(void) | |||
1963 | spin_unlock(&o2hb_live_lock); | 2402 | spin_unlock(&o2hb_live_lock); |
1964 | } | 2403 | } |
1965 | EXPORT_SYMBOL_GPL(o2hb_stop_all_regions); | 2404 | EXPORT_SYMBOL_GPL(o2hb_stop_all_regions); |
2405 | |||
2406 | int o2hb_get_all_regions(char *region_uuids, u8 max_regions) | ||
2407 | { | ||
2408 | struct o2hb_region *reg; | ||
2409 | int numregs = 0; | ||
2410 | char *p; | ||
2411 | |||
2412 | spin_lock(&o2hb_live_lock); | ||
2413 | |||
2414 | p = region_uuids; | ||
2415 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | ||
2416 | mlog(0, "Region: %s\n", config_item_name(®->hr_item)); | ||
2417 | if (numregs < max_regions) { | ||
2418 | memcpy(p, config_item_name(®->hr_item), | ||
2419 | O2HB_MAX_REGION_NAME_LEN); | ||
2420 | p += O2HB_MAX_REGION_NAME_LEN; | ||
2421 | } | ||
2422 | numregs++; | ||
2423 | } | ||
2424 | |||
2425 | spin_unlock(&o2hb_live_lock); | ||
2426 | |||
2427 | return numregs; | ||
2428 | } | ||
2429 | EXPORT_SYMBOL_GPL(o2hb_get_all_regions); | ||
2430 | |||
2431 | int o2hb_global_heartbeat_active(void) | ||
2432 | { | ||
2433 | return (o2hb_heartbeat_mode == O2HB_HEARTBEAT_GLOBAL); | ||
2434 | } | ||
2435 | EXPORT_SYMBOL(o2hb_global_heartbeat_active); | ||
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 2f1649253b49..00ad8e8fea51 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h | |||
@@ -31,6 +31,8 @@ | |||
31 | 31 | ||
32 | #define O2HB_REGION_TIMEOUT_MS 2000 | 32 | #define O2HB_REGION_TIMEOUT_MS 2000 |
33 | 33 | ||
34 | #define O2HB_MAX_REGION_NAME_LEN 32 | ||
35 | |||
34 | /* number of changes to be seen as live */ | 36 | /* number of changes to be seen as live */ |
35 | #define O2HB_LIVE_THRESHOLD 2 | 37 | #define O2HB_LIVE_THRESHOLD 2 |
36 | /* number of equal samples to be seen as dead */ | 38 | /* number of equal samples to be seen as dead */ |
@@ -81,5 +83,7 @@ int o2hb_check_node_heartbeating(u8 node_num); | |||
81 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); | 83 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); |
82 | int o2hb_check_local_node_heartbeating(void); | 84 | int o2hb_check_local_node_heartbeating(void); |
83 | void o2hb_stop_all_regions(void); | 85 | void o2hb_stop_all_regions(void); |
86 | int o2hb_get_all_regions(char *region_uuids, u8 numregions); | ||
87 | int o2hb_global_heartbeat_active(void); | ||
84 | 88 | ||
85 | #endif /* O2CLUSTER_HEARTBEAT_H */ | 89 | #endif /* O2CLUSTER_HEARTBEAT_H */ |
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index fd96e2a2fa56..ea2ed9f56c94 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
@@ -119,7 +119,8 @@ | |||
119 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ | 119 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ |
120 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ | 120 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ |
121 | #define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ | 121 | #define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ |
122 | #define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ | 122 | #define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ |
123 | #define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */ | ||
123 | 124 | ||
124 | #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) | 125 | #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) |
125 | #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) | 126 | #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) |
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index ed0c9f367fed..bb240647ca5f 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
@@ -711,6 +711,8 @@ static struct config_item *o2nm_node_group_make_item(struct config_group *group, | |||
711 | config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); | 711 | config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); |
712 | spin_lock_init(&node->nd_lock); | 712 | spin_lock_init(&node->nd_lock); |
713 | 713 | ||
714 | mlog(ML_CLUSTER, "o2nm: Registering node %s\n", name); | ||
715 | |||
714 | return &node->nd_item; | 716 | return &node->nd_item; |
715 | } | 717 | } |
716 | 718 | ||
@@ -744,6 +746,9 @@ static void o2nm_node_group_drop_item(struct config_group *group, | |||
744 | } | 746 | } |
745 | write_unlock(&cluster->cl_nodes_lock); | 747 | write_unlock(&cluster->cl_nodes_lock); |
746 | 748 | ||
749 | mlog(ML_CLUSTER, "o2nm: Unregistered node %s\n", | ||
750 | config_item_name(&node->nd_item)); | ||
751 | |||
747 | config_item_put(item); | 752 | config_item_put(item); |
748 | } | 753 | } |
749 | 754 | ||
diff --git a/fs/ocfs2/cluster/ocfs2_nodemanager.h b/fs/ocfs2/cluster/ocfs2_nodemanager.h index 5b9854bad571..49b594325bec 100644 --- a/fs/ocfs2/cluster/ocfs2_nodemanager.h +++ b/fs/ocfs2/cluster/ocfs2_nodemanager.h | |||
@@ -36,4 +36,10 @@ | |||
36 | /* host name, group name, cluster name all 64 bytes */ | 36 | /* host name, group name, cluster name all 64 bytes */ |
37 | #define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN | 37 | #define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN |
38 | 38 | ||
39 | /* | ||
40 | * Maximum number of global heartbeat regions allowed. | ||
41 | * **CAUTION** Changing this number will break dlm compatibility. | ||
42 | */ | ||
43 | #define O2NM_MAX_REGIONS 32 | ||
44 | |||
39 | #endif /* _OCFS2_NODEMANAGER_H */ | 45 | #endif /* _OCFS2_NODEMANAGER_H */ |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index aa75ca3f78da..9aa426e42123 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -977,7 +977,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, | |||
977 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | 977 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, |
978 | size_t caller_veclen, u8 target_node, int *status) | 978 | size_t caller_veclen, u8 target_node, int *status) |
979 | { | 979 | { |
980 | int ret; | 980 | int ret = 0; |
981 | struct o2net_msg *msg = NULL; | 981 | struct o2net_msg *msg = NULL; |
982 | size_t veclen, caller_bytes = 0; | 982 | size_t veclen, caller_bytes = 0; |
983 | struct kvec *vec = NULL; | 983 | struct kvec *vec = NULL; |
@@ -1696,6 +1696,9 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num, | |||
1696 | { | 1696 | { |
1697 | o2quo_hb_down(node_num); | 1697 | o2quo_hb_down(node_num); |
1698 | 1698 | ||
1699 | if (!node) | ||
1700 | return; | ||
1701 | |||
1699 | if (node_num != o2nm_this_node()) | 1702 | if (node_num != o2nm_this_node()) |
1700 | o2net_disconnect_node(node); | 1703 | o2net_disconnect_node(node); |
1701 | 1704 | ||
@@ -1709,6 +1712,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
1709 | 1712 | ||
1710 | o2quo_hb_up(node_num); | 1713 | o2quo_hb_up(node_num); |
1711 | 1714 | ||
1715 | BUG_ON(!node); | ||
1716 | |||
1712 | /* ensure an immediate connect attempt */ | 1717 | /* ensure an immediate connect attempt */ |
1713 | nn->nn_last_connect_attempt = jiffies - | 1718 | nn->nn_last_connect_attempt = jiffies - |
1714 | (msecs_to_jiffies(o2net_reconnect_delay()) + 1); | 1719 | (msecs_to_jiffies(o2net_reconnect_delay()) + 1); |
@@ -1759,6 +1764,7 @@ static int o2net_accept_one(struct socket *sock) | |||
1759 | struct sockaddr_in sin; | 1764 | struct sockaddr_in sin; |
1760 | struct socket *new_sock = NULL; | 1765 | struct socket *new_sock = NULL; |
1761 | struct o2nm_node *node = NULL; | 1766 | struct o2nm_node *node = NULL; |
1767 | struct o2nm_node *local_node = NULL; | ||
1762 | struct o2net_sock_container *sc = NULL; | 1768 | struct o2net_sock_container *sc = NULL; |
1763 | struct o2net_node *nn; | 1769 | struct o2net_node *nn; |
1764 | 1770 | ||
@@ -1796,11 +1802,15 @@ static int o2net_accept_one(struct socket *sock) | |||
1796 | goto out; | 1802 | goto out; |
1797 | } | 1803 | } |
1798 | 1804 | ||
1799 | if (o2nm_this_node() > node->nd_num) { | 1805 | if (o2nm_this_node() >= node->nd_num) { |
1800 | mlog(ML_NOTICE, "unexpected connect attempted from a lower " | 1806 | local_node = o2nm_get_node_by_num(o2nm_this_node()); |
1801 | "numbered node '%s' at " "%pI4:%d with num %u\n", | 1807 | mlog(ML_NOTICE, "unexpected connect attempt seen at node '%s' (" |
1802 | node->nd_name, &sin.sin_addr.s_addr, | 1808 | "%u, %pI4:%d) from node '%s' (%u, %pI4:%d)\n", |
1803 | ntohs(sin.sin_port), node->nd_num); | 1809 | local_node->nd_name, local_node->nd_num, |
1810 | &(local_node->nd_ipv4_address), | ||
1811 | ntohs(local_node->nd_ipv4_port), | ||
1812 | node->nd_name, node->nd_num, &sin.sin_addr.s_addr, | ||
1813 | ntohs(sin.sin_port)); | ||
1804 | ret = -EINVAL; | 1814 | ret = -EINVAL; |
1805 | goto out; | 1815 | goto out; |
1806 | } | 1816 | } |
@@ -1857,6 +1867,8 @@ out: | |||
1857 | sock_release(new_sock); | 1867 | sock_release(new_sock); |
1858 | if (node) | 1868 | if (node) |
1859 | o2nm_node_put(node); | 1869 | o2nm_node_put(node); |
1870 | if (local_node) | ||
1871 | o2nm_node_put(local_node); | ||
1860 | if (sc) | 1872 | if (sc) |
1861 | sc_put(sc); | 1873 | sc_put(sc); |
1862 | return ret; | 1874 | return ret; |
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index b4957c7d9fe2..edaded48e7e9 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
@@ -40,6 +40,14 @@ | |||
40 | #include "inode.h" | 40 | #include "inode.h" |
41 | #include "super.h" | 41 | #include "super.h" |
42 | 42 | ||
43 | void ocfs2_dentry_attach_gen(struct dentry *dentry) | ||
44 | { | ||
45 | unsigned long gen = | ||
46 | OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; | ||
47 | BUG_ON(dentry->d_inode); | ||
48 | dentry->d_fsdata = (void *)gen; | ||
49 | } | ||
50 | |||
43 | 51 | ||
44 | static int ocfs2_dentry_revalidate(struct dentry *dentry, | 52 | static int ocfs2_dentry_revalidate(struct dentry *dentry, |
45 | struct nameidata *nd) | 53 | struct nameidata *nd) |
@@ -51,11 +59,20 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, | |||
51 | mlog_entry("(0x%p, '%.*s')\n", dentry, | 59 | mlog_entry("(0x%p, '%.*s')\n", dentry, |
52 | dentry->d_name.len, dentry->d_name.name); | 60 | dentry->d_name.len, dentry->d_name.name); |
53 | 61 | ||
54 | /* Never trust a negative dentry - force a new lookup. */ | 62 | /* For a negative dentry - |
63 | * check the generation number of the parent and compare with the | ||
64 | * one stored in the inode. | ||
65 | */ | ||
55 | if (inode == NULL) { | 66 | if (inode == NULL) { |
56 | mlog(0, "negative dentry: %.*s\n", dentry->d_name.len, | 67 | unsigned long gen = (unsigned long) dentry->d_fsdata; |
57 | dentry->d_name.name); | 68 | unsigned long pgen = |
58 | goto bail; | 69 | OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; |
70 | mlog(0, "negative dentry: %.*s parent gen: %lu " | ||
71 | "dentry gen: %lu\n", | ||
72 | dentry->d_name.len, dentry->d_name.name, pgen, gen); | ||
73 | if (gen != pgen) | ||
74 | goto bail; | ||
75 | goto valid; | ||
59 | } | 76 | } |
60 | 77 | ||
61 | BUG_ON(!osb); | 78 | BUG_ON(!osb); |
@@ -96,6 +113,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, | |||
96 | goto bail; | 113 | goto bail; |
97 | } | 114 | } |
98 | 115 | ||
116 | valid: | ||
99 | ret = 1; | 117 | ret = 1; |
100 | 118 | ||
101 | bail: | 119 | bail: |
@@ -227,6 +245,12 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, | |||
227 | if (!inode) | 245 | if (!inode) |
228 | return 0; | 246 | return 0; |
229 | 247 | ||
248 | if (!dentry->d_inode && dentry->d_fsdata) { | ||
249 | /* Converting a negative dentry to positive | ||
250 | Clear dentry->d_fsdata */ | ||
251 | dentry->d_fsdata = dl = NULL; | ||
252 | } | ||
253 | |||
230 | if (dl) { | 254 | if (dl) { |
231 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, | 255 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, |
232 | " \"%.*s\": old parent: %llu, new: %llu\n", | 256 | " \"%.*s\": old parent: %llu, new: %llu\n", |
@@ -452,6 +476,7 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) | |||
452 | 476 | ||
453 | out: | 477 | out: |
454 | iput(inode); | 478 | iput(inode); |
479 | ocfs2_dentry_attach_gen(dentry); | ||
455 | } | 480 | } |
456 | 481 | ||
457 | /* | 482 | /* |
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h index f5dd1789acf1..b79eff709958 100644 --- a/fs/ocfs2/dcache.h +++ b/fs/ocfs2/dcache.h | |||
@@ -64,5 +64,6 @@ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, | |||
64 | struct inode *old_dir, struct inode *new_dir); | 64 | struct inode *old_dir, struct inode *new_dir); |
65 | 65 | ||
66 | extern spinlock_t dentry_attach_lock; | 66 | extern spinlock_t dentry_attach_lock; |
67 | void ocfs2_dentry_attach_gen(struct dentry *dentry); | ||
67 | 68 | ||
68 | #endif /* OCFS2_DCACHE_H */ | 69 | #endif /* OCFS2_DCACHE_H */ |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f04ebcfffc4a..c49f6de0e7ab 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -3931,6 +3931,15 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | |||
3931 | goto out_commit; | 3931 | goto out_commit; |
3932 | } | 3932 | } |
3933 | 3933 | ||
3934 | cpos = split_hash; | ||
3935 | ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, | ||
3936 | data_ac, meta_ac, new_dx_leaves, | ||
3937 | num_dx_leaves); | ||
3938 | if (ret) { | ||
3939 | mlog_errno(ret); | ||
3940 | goto out_commit; | ||
3941 | } | ||
3942 | |||
3934 | for (i = 0; i < num_dx_leaves; i++) { | 3943 | for (i = 0; i < num_dx_leaves; i++) { |
3935 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), | 3944 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), |
3936 | orig_dx_leaves[i], | 3945 | orig_dx_leaves[i], |
@@ -3939,15 +3948,14 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | |||
3939 | mlog_errno(ret); | 3948 | mlog_errno(ret); |
3940 | goto out_commit; | 3949 | goto out_commit; |
3941 | } | 3950 | } |
3942 | } | ||
3943 | 3951 | ||
3944 | cpos = split_hash; | 3952 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), |
3945 | ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, | 3953 | new_dx_leaves[i], |
3946 | data_ac, meta_ac, new_dx_leaves, | 3954 | OCFS2_JOURNAL_ACCESS_WRITE); |
3947 | num_dx_leaves); | 3955 | if (ret) { |
3948 | if (ret) { | 3956 | mlog_errno(ret); |
3949 | mlog_errno(ret); | 3957 | goto out_commit; |
3950 | goto out_commit; | 3958 | } |
3951 | } | 3959 | } |
3952 | 3960 | ||
3953 | ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, | 3961 | ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 4b6ae2c13b47..b36d0bf77a5a 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -445,7 +445,9 @@ enum { | |||
445 | DLM_LOCK_REQUEST_MSG, /* 515 */ | 445 | DLM_LOCK_REQUEST_MSG, /* 515 */ |
446 | DLM_RECO_DATA_DONE_MSG, /* 516 */ | 446 | DLM_RECO_DATA_DONE_MSG, /* 516 */ |
447 | DLM_BEGIN_RECO_MSG, /* 517 */ | 447 | DLM_BEGIN_RECO_MSG, /* 517 */ |
448 | DLM_FINALIZE_RECO_MSG /* 518 */ | 448 | DLM_FINALIZE_RECO_MSG, /* 518 */ |
449 | DLM_QUERY_REGION, /* 519 */ | ||
450 | DLM_QUERY_NODEINFO, /* 520 */ | ||
449 | }; | 451 | }; |
450 | 452 | ||
451 | struct dlm_reco_node_data | 453 | struct dlm_reco_node_data |
@@ -727,6 +729,31 @@ struct dlm_cancel_join | |||
727 | u8 domain[O2NM_MAX_NAME_LEN]; | 729 | u8 domain[O2NM_MAX_NAME_LEN]; |
728 | }; | 730 | }; |
729 | 731 | ||
732 | struct dlm_query_region { | ||
733 | u8 qr_node; | ||
734 | u8 qr_numregions; | ||
735 | u8 qr_namelen; | ||
736 | u8 pad1; | ||
737 | u8 qr_domain[O2NM_MAX_NAME_LEN]; | ||
738 | u8 qr_regions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_REGIONS]; | ||
739 | }; | ||
740 | |||
741 | struct dlm_node_info { | ||
742 | u8 ni_nodenum; | ||
743 | u8 pad1; | ||
744 | u16 ni_ipv4_port; | ||
745 | u32 ni_ipv4_address; | ||
746 | }; | ||
747 | |||
748 | struct dlm_query_nodeinfo { | ||
749 | u8 qn_nodenum; | ||
750 | u8 qn_numnodes; | ||
751 | u8 qn_namelen; | ||
752 | u8 pad1; | ||
753 | u8 qn_domain[O2NM_MAX_NAME_LEN]; | ||
754 | struct dlm_node_info qn_nodes[O2NM_MAX_NODES]; | ||
755 | }; | ||
756 | |||
730 | struct dlm_exit_domain | 757 | struct dlm_exit_domain |
731 | { | 758 | { |
732 | u8 node_idx; | 759 | u8 node_idx; |
@@ -1030,6 +1057,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, | |||
1030 | struct dlm_lock_resource *res); | 1057 | struct dlm_lock_resource *res); |
1031 | void dlm_clean_master_list(struct dlm_ctxt *dlm, | 1058 | void dlm_clean_master_list(struct dlm_ctxt *dlm, |
1032 | u8 dead_node); | 1059 | u8 dead_node); |
1060 | void dlm_force_free_mles(struct dlm_ctxt *dlm); | ||
1033 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 1061 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
1034 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res); | 1062 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res); |
1035 | int __dlm_lockres_unused(struct dlm_lock_resource *res); | 1063 | int __dlm_lockres_unused(struct dlm_lock_resource *res); |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 5efdd37dfe48..272ec8631a51 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -493,7 +493,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
493 | struct hlist_head *bucket; | 493 | struct hlist_head *bucket; |
494 | struct hlist_node *list; | 494 | struct hlist_node *list; |
495 | int i, out = 0; | 495 | int i, out = 0; |
496 | unsigned long total = 0, longest = 0, bktcnt; | 496 | unsigned long total = 0, longest = 0, bucket_count = 0; |
497 | 497 | ||
498 | out += snprintf(db->buf + out, db->len - out, | 498 | out += snprintf(db->buf + out, db->len - out, |
499 | "Dumping MLEs for Domain: %s\n", dlm->name); | 499 | "Dumping MLEs for Domain: %s\n", dlm->name); |
@@ -505,13 +505,13 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
505 | mle = hlist_entry(list, struct dlm_master_list_entry, | 505 | mle = hlist_entry(list, struct dlm_master_list_entry, |
506 | master_hash_node); | 506 | master_hash_node); |
507 | ++total; | 507 | ++total; |
508 | ++bktcnt; | 508 | ++bucket_count; |
509 | if (db->len - out < 200) | 509 | if (db->len - out < 200) |
510 | continue; | 510 | continue; |
511 | out += dump_mle(mle, db->buf + out, db->len - out); | 511 | out += dump_mle(mle, db->buf + out, db->len - out); |
512 | } | 512 | } |
513 | longest = max(longest, bktcnt); | 513 | longest = max(longest, bucket_count); |
514 | bktcnt = 0; | 514 | bucket_count = 0; |
515 | } | 515 | } |
516 | spin_unlock(&dlm->master_lock); | 516 | spin_unlock(&dlm->master_lock); |
517 | 517 | ||
@@ -636,8 +636,14 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) | |||
636 | spin_lock(&dlm->track_lock); | 636 | spin_lock(&dlm->track_lock); |
637 | if (oldres) | 637 | if (oldres) |
638 | track_list = &oldres->tracking; | 638 | track_list = &oldres->tracking; |
639 | else | 639 | else { |
640 | track_list = &dlm->tracking_list; | 640 | track_list = &dlm->tracking_list; |
641 | if (list_empty(track_list)) { | ||
642 | dl = NULL; | ||
643 | spin_unlock(&dlm->track_lock); | ||
644 | goto bail; | ||
645 | } | ||
646 | } | ||
641 | 647 | ||
642 | list_for_each_entry(res, track_list, tracking) { | 648 | list_for_each_entry(res, track_list, tracking) { |
643 | if (&res->tracking == &dlm->tracking_list) | 649 | if (&res->tracking == &dlm->tracking_list) |
@@ -660,6 +666,7 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) | |||
660 | } else | 666 | } else |
661 | dl = NULL; | 667 | dl = NULL; |
662 | 668 | ||
669 | bail: | ||
663 | /* passed to seq_show */ | 670 | /* passed to seq_show */ |
664 | return dl; | 671 | return dl; |
665 | } | 672 | } |
@@ -775,7 +782,9 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
775 | 782 | ||
776 | /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ | 783 | /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ |
777 | out += snprintf(db->buf + out, db->len - out, | 784 | out += snprintf(db->buf + out, db->len - out, |
778 | "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key); | 785 | "Domain: %s Key: 0x%08x Protocol: %d.%d\n", |
786 | dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major, | ||
787 | dlm->dlm_locking_proto.pv_minor); | ||
779 | 788 | ||
780 | /* Thread Pid: xxx Node: xxx State: xxxxx */ | 789 | /* Thread Pid: xxx Node: xxx State: xxxxx */ |
781 | out += snprintf(db->buf + out, db->len - out, | 790 | out += snprintf(db->buf + out, db->len - out, |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 153abb5abef0..58a93b953735 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -128,10 +128,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); | |||
128 | * will have a negotiated version with the same major number and a minor | 128 | * will have a negotiated version with the same major number and a minor |
129 | * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should | 129 | * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should |
130 | * be used to determine what a running domain is actually using. | 130 | * be used to determine what a running domain is actually using. |
131 | * | ||
132 | * New in version 1.1: | ||
133 | * - Message DLM_QUERY_REGION added to support global heartbeat | ||
134 | * - Message DLM_QUERY_NODEINFO added to allow online node removes | ||
131 | */ | 135 | */ |
132 | static const struct dlm_protocol_version dlm_protocol = { | 136 | static const struct dlm_protocol_version dlm_protocol = { |
133 | .pv_major = 1, | 137 | .pv_major = 1, |
134 | .pv_minor = 0, | 138 | .pv_minor = 1, |
135 | }; | 139 | }; |
136 | 140 | ||
137 | #define DLM_DOMAIN_BACKOFF_MS 200 | 141 | #define DLM_DOMAIN_BACKOFF_MS 200 |
@@ -142,6 +146,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, | |||
142 | void **ret_data); | 146 | void **ret_data); |
143 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, | 147 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
144 | void **ret_data); | 148 | void **ret_data); |
149 | static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | ||
150 | void *data, void **ret_data); | ||
145 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, | 151 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, |
146 | void **ret_data); | 152 | void **ret_data); |
147 | static int dlm_protocol_compare(struct dlm_protocol_version *existing, | 153 | static int dlm_protocol_compare(struct dlm_protocol_version *existing, |
@@ -693,6 +699,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
693 | 699 | ||
694 | dlm_mark_domain_leaving(dlm); | 700 | dlm_mark_domain_leaving(dlm); |
695 | dlm_leave_domain(dlm); | 701 | dlm_leave_domain(dlm); |
702 | dlm_force_free_mles(dlm); | ||
696 | dlm_complete_dlm_shutdown(dlm); | 703 | dlm_complete_dlm_shutdown(dlm); |
697 | } | 704 | } |
698 | dlm_put(dlm); | 705 | dlm_put(dlm); |
@@ -920,6 +927,370 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, | |||
920 | return 0; | 927 | return 0; |
921 | } | 928 | } |
922 | 929 | ||
930 | static int dlm_match_regions(struct dlm_ctxt *dlm, | ||
931 | struct dlm_query_region *qr) | ||
932 | { | ||
933 | char *local = NULL, *remote = qr->qr_regions; | ||
934 | char *l, *r; | ||
935 | int localnr, i, j, foundit; | ||
936 | int status = 0; | ||
937 | |||
938 | if (!o2hb_global_heartbeat_active()) { | ||
939 | if (qr->qr_numregions) { | ||
940 | mlog(ML_ERROR, "Domain %s: Joining node %d has global " | ||
941 | "heartbeat enabled but local node %d does not\n", | ||
942 | qr->qr_domain, qr->qr_node, dlm->node_num); | ||
943 | status = -EINVAL; | ||
944 | } | ||
945 | goto bail; | ||
946 | } | ||
947 | |||
948 | if (o2hb_global_heartbeat_active() && !qr->qr_numregions) { | ||
949 | mlog(ML_ERROR, "Domain %s: Local node %d has global " | ||
950 | "heartbeat enabled but joining node %d does not\n", | ||
951 | qr->qr_domain, dlm->node_num, qr->qr_node); | ||
952 | status = -EINVAL; | ||
953 | goto bail; | ||
954 | } | ||
955 | |||
956 | r = remote; | ||
957 | for (i = 0; i < qr->qr_numregions; ++i) { | ||
958 | mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r); | ||
959 | r += O2HB_MAX_REGION_NAME_LEN; | ||
960 | } | ||
961 | |||
962 | local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); | ||
963 | if (!local) { | ||
964 | status = -ENOMEM; | ||
965 | goto bail; | ||
966 | } | ||
967 | |||
968 | localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS); | ||
969 | |||
970 | /* compare local regions with remote */ | ||
971 | l = local; | ||
972 | for (i = 0; i < localnr; ++i) { | ||
973 | foundit = 0; | ||
974 | r = remote; | ||
975 | for (j = 0; j <= qr->qr_numregions; ++j) { | ||
976 | if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) { | ||
977 | foundit = 1; | ||
978 | break; | ||
979 | } | ||
980 | r += O2HB_MAX_REGION_NAME_LEN; | ||
981 | } | ||
982 | if (!foundit) { | ||
983 | status = -EINVAL; | ||
984 | mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " | ||
985 | "in local node %d but not in joining node %d\n", | ||
986 | qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, l, | ||
987 | dlm->node_num, qr->qr_node); | ||
988 | goto bail; | ||
989 | } | ||
990 | l += O2HB_MAX_REGION_NAME_LEN; | ||
991 | } | ||
992 | |||
993 | /* compare remote with local regions */ | ||
994 | r = remote; | ||
995 | for (i = 0; i < qr->qr_numregions; ++i) { | ||
996 | foundit = 0; | ||
997 | l = local; | ||
998 | for (j = 0; j < localnr; ++j) { | ||
999 | if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) { | ||
1000 | foundit = 1; | ||
1001 | break; | ||
1002 | } | ||
1003 | l += O2HB_MAX_REGION_NAME_LEN; | ||
1004 | } | ||
1005 | if (!foundit) { | ||
1006 | status = -EINVAL; | ||
1007 | mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " | ||
1008 | "in joining node %d but not in local node %d\n", | ||
1009 | qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, r, | ||
1010 | qr->qr_node, dlm->node_num); | ||
1011 | goto bail; | ||
1012 | } | ||
1013 | r += O2HB_MAX_REGION_NAME_LEN; | ||
1014 | } | ||
1015 | |||
1016 | bail: | ||
1017 | kfree(local); | ||
1018 | |||
1019 | return status; | ||
1020 | } | ||
1021 | |||
1022 | static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map) | ||
1023 | { | ||
1024 | struct dlm_query_region *qr = NULL; | ||
1025 | int status, ret = 0, i; | ||
1026 | char *p; | ||
1027 | |||
1028 | if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) | ||
1029 | goto bail; | ||
1030 | |||
1031 | qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL); | ||
1032 | if (!qr) { | ||
1033 | ret = -ENOMEM; | ||
1034 | mlog_errno(ret); | ||
1035 | goto bail; | ||
1036 | } | ||
1037 | |||
1038 | qr->qr_node = dlm->node_num; | ||
1039 | qr->qr_namelen = strlen(dlm->name); | ||
1040 | memcpy(qr->qr_domain, dlm->name, qr->qr_namelen); | ||
1041 | /* if local hb, the numregions will be zero */ | ||
1042 | if (o2hb_global_heartbeat_active()) | ||
1043 | qr->qr_numregions = o2hb_get_all_regions(qr->qr_regions, | ||
1044 | O2NM_MAX_REGIONS); | ||
1045 | |||
1046 | p = qr->qr_regions; | ||
1047 | for (i = 0; i < qr->qr_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN) | ||
1048 | mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p); | ||
1049 | |||
1050 | i = -1; | ||
1051 | while ((i = find_next_bit(node_map, O2NM_MAX_NODES, | ||
1052 | i + 1)) < O2NM_MAX_NODES) { | ||
1053 | if (i == dlm->node_num) | ||
1054 | continue; | ||
1055 | |||
1056 | mlog(0, "Sending regions to node %d\n", i); | ||
1057 | |||
1058 | ret = o2net_send_message(DLM_QUERY_REGION, DLM_MOD_KEY, qr, | ||
1059 | sizeof(struct dlm_query_region), | ||
1060 | i, &status); | ||
1061 | if (ret >= 0) | ||
1062 | ret = status; | ||
1063 | if (ret) { | ||
1064 | mlog(ML_ERROR, "Region mismatch %d, node %d\n", | ||
1065 | ret, i); | ||
1066 | break; | ||
1067 | } | ||
1068 | } | ||
1069 | |||
1070 | bail: | ||
1071 | kfree(qr); | ||
1072 | return ret; | ||
1073 | } | ||
1074 | |||
1075 | static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | ||
1076 | void *data, void **ret_data) | ||
1077 | { | ||
1078 | struct dlm_query_region *qr; | ||
1079 | struct dlm_ctxt *dlm = NULL; | ||
1080 | int status = 0; | ||
1081 | int locked = 0; | ||
1082 | |||
1083 | qr = (struct dlm_query_region *) msg->buf; | ||
1084 | |||
1085 | mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node, | ||
1086 | qr->qr_domain); | ||
1087 | |||
1088 | status = -EINVAL; | ||
1089 | |||
1090 | spin_lock(&dlm_domain_lock); | ||
1091 | dlm = __dlm_lookup_domain_full(qr->qr_domain, qr->qr_namelen); | ||
1092 | if (!dlm) { | ||
1093 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
1094 | "before join domain\n", qr->qr_node, qr->qr_domain); | ||
1095 | goto bail; | ||
1096 | } | ||
1097 | |||
1098 | spin_lock(&dlm->spinlock); | ||
1099 | locked = 1; | ||
1100 | if (dlm->joining_node != qr->qr_node) { | ||
1101 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
1102 | "but joining node is %d\n", qr->qr_node, qr->qr_domain, | ||
1103 | dlm->joining_node); | ||
1104 | goto bail; | ||
1105 | } | ||
1106 | |||
1107 | /* Support for global heartbeat was added in 1.1 */ | ||
1108 | if (dlm->dlm_locking_proto.pv_major == 1 && | ||
1109 | dlm->dlm_locking_proto.pv_minor == 0) { | ||
1110 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
1111 | "but active dlm protocol is %d.%d\n", qr->qr_node, | ||
1112 | qr->qr_domain, dlm->dlm_locking_proto.pv_major, | ||
1113 | dlm->dlm_locking_proto.pv_minor); | ||
1114 | goto bail; | ||
1115 | } | ||
1116 | |||
1117 | status = dlm_match_regions(dlm, qr); | ||
1118 | |||
1119 | bail: | ||
1120 | if (locked) | ||
1121 | spin_unlock(&dlm->spinlock); | ||
1122 | spin_unlock(&dlm_domain_lock); | ||
1123 | |||
1124 | return status; | ||
1125 | } | ||
1126 | |||
1127 | static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn) | ||
1128 | { | ||
1129 | struct o2nm_node *local; | ||
1130 | struct dlm_node_info *remote; | ||
1131 | int i, j; | ||
1132 | int status = 0; | ||
1133 | |||
1134 | for (j = 0; j < qn->qn_numnodes; ++j) | ||
1135 | mlog(0, "Node %3d, %pI4:%u\n", qn->qn_nodes[j].ni_nodenum, | ||
1136 | &(qn->qn_nodes[j].ni_ipv4_address), | ||
1137 | ntohs(qn->qn_nodes[j].ni_ipv4_port)); | ||
1138 | |||
1139 | for (i = 0; i < O2NM_MAX_NODES && !status; ++i) { | ||
1140 | local = o2nm_get_node_by_num(i); | ||
1141 | remote = NULL; | ||
1142 | for (j = 0; j < qn->qn_numnodes; ++j) { | ||
1143 | if (qn->qn_nodes[j].ni_nodenum == i) { | ||
1144 | remote = &(qn->qn_nodes[j]); | ||
1145 | break; | ||
1146 | } | ||
1147 | } | ||
1148 | |||
1149 | if (!local && !remote) | ||
1150 | continue; | ||
1151 | |||
1152 | if ((local && !remote) || (!local && remote)) | ||
1153 | status = -EINVAL; | ||
1154 | |||
1155 | if (!status && | ||
1156 | ((remote->ni_nodenum != local->nd_num) || | ||
1157 | (remote->ni_ipv4_port != local->nd_ipv4_port) || | ||
1158 | (remote->ni_ipv4_address != local->nd_ipv4_address))) | ||
1159 | status = -EINVAL; | ||
1160 | |||
1161 | if (status) { | ||
1162 | if (remote && !local) | ||
1163 | mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " | ||
1164 | "registered in joining node %d but not in " | ||
1165 | "local node %d\n", qn->qn_domain, | ||
1166 | remote->ni_nodenum, | ||
1167 | &(remote->ni_ipv4_address), | ||
1168 | ntohs(remote->ni_ipv4_port), | ||
1169 | qn->qn_nodenum, dlm->node_num); | ||
1170 | if (local && !remote) | ||
1171 | mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " | ||
1172 | "registered in local node %d but not in " | ||
1173 | "joining node %d\n", qn->qn_domain, | ||
1174 | local->nd_num, &(local->nd_ipv4_address), | ||
1175 | ntohs(local->nd_ipv4_port), | ||
1176 | dlm->node_num, qn->qn_nodenum); | ||
1177 | BUG_ON((!local && !remote)); | ||
1178 | } | ||
1179 | |||
1180 | if (local) | ||
1181 | o2nm_node_put(local); | ||
1182 | } | ||
1183 | |||
1184 | return status; | ||
1185 | } | ||
1186 | |||
1187 | static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) | ||
1188 | { | ||
1189 | struct dlm_query_nodeinfo *qn = NULL; | ||
1190 | struct o2nm_node *node; | ||
1191 | int ret = 0, status, count, i; | ||
1192 | |||
1193 | if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) | ||
1194 | goto bail; | ||
1195 | |||
1196 | qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); | ||
1197 | if (!qn) { | ||
1198 | ret = -ENOMEM; | ||
1199 | mlog_errno(ret); | ||
1200 | goto bail; | ||
1201 | } | ||
1202 | |||
1203 | for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) { | ||
1204 | node = o2nm_get_node_by_num(i); | ||
1205 | if (!node) | ||
1206 | continue; | ||
1207 | qn->qn_nodes[count].ni_nodenum = node->nd_num; | ||
1208 | qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port; | ||
1209 | qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address; | ||
1210 | mlog(0, "Node %3d, %pI4:%u\n", node->nd_num, | ||
1211 | &(node->nd_ipv4_address), ntohs(node->nd_ipv4_port)); | ||
1212 | ++count; | ||
1213 | o2nm_node_put(node); | ||
1214 | } | ||
1215 | |||
1216 | qn->qn_nodenum = dlm->node_num; | ||
1217 | qn->qn_numnodes = count; | ||
1218 | qn->qn_namelen = strlen(dlm->name); | ||
1219 | memcpy(qn->qn_domain, dlm->name, qn->qn_namelen); | ||
1220 | |||
1221 | i = -1; | ||
1222 | while ((i = find_next_bit(node_map, O2NM_MAX_NODES, | ||
1223 | i + 1)) < O2NM_MAX_NODES) { | ||
1224 | if (i == dlm->node_num) | ||
1225 | continue; | ||
1226 | |||
1227 | mlog(0, "Sending nodeinfo to node %d\n", i); | ||
1228 | |||
1229 | ret = o2net_send_message(DLM_QUERY_NODEINFO, DLM_MOD_KEY, | ||
1230 | qn, sizeof(struct dlm_query_nodeinfo), | ||
1231 | i, &status); | ||
1232 | if (ret >= 0) | ||
1233 | ret = status; | ||
1234 | if (ret) { | ||
1235 | mlog(ML_ERROR, "node mismatch %d, node %d\n", ret, i); | ||
1236 | break; | ||
1237 | } | ||
1238 | } | ||
1239 | |||
1240 | bail: | ||
1241 | kfree(qn); | ||
1242 | return ret; | ||
1243 | } | ||
1244 | |||
1245 | static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, | ||
1246 | void *data, void **ret_data) | ||
1247 | { | ||
1248 | struct dlm_query_nodeinfo *qn; | ||
1249 | struct dlm_ctxt *dlm = NULL; | ||
1250 | int locked = 0, status = -EINVAL; | ||
1251 | |||
1252 | qn = (struct dlm_query_nodeinfo *) msg->buf; | ||
1253 | |||
1254 | mlog(0, "Node %u queries nodes on domain %s\n", qn->qn_nodenum, | ||
1255 | qn->qn_domain); | ||
1256 | |||
1257 | spin_lock(&dlm_domain_lock); | ||
1258 | dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen); | ||
1259 | if (!dlm) { | ||
1260 | mlog(ML_ERROR, "Node %d queried nodes on domain %s before " | ||
1261 | "join domain\n", qn->qn_nodenum, qn->qn_domain); | ||
1262 | goto bail; | ||
1263 | } | ||
1264 | |||
1265 | spin_lock(&dlm->spinlock); | ||
1266 | locked = 1; | ||
1267 | if (dlm->joining_node != qn->qn_nodenum) { | ||
1268 | mlog(ML_ERROR, "Node %d queried nodes on domain %s but " | ||
1269 | "joining node is %d\n", qn->qn_nodenum, qn->qn_domain, | ||
1270 | dlm->joining_node); | ||
1271 | goto bail; | ||
1272 | } | ||
1273 | |||
1274 | /* Support for node query was added in 1.1 */ | ||
1275 | if (dlm->dlm_locking_proto.pv_major == 1 && | ||
1276 | dlm->dlm_locking_proto.pv_minor == 0) { | ||
1277 | mlog(ML_ERROR, "Node %d queried nodes on domain %s " | ||
1278 | "but active dlm protocol is %d.%d\n", qn->qn_nodenum, | ||
1279 | qn->qn_domain, dlm->dlm_locking_proto.pv_major, | ||
1280 | dlm->dlm_locking_proto.pv_minor); | ||
1281 | goto bail; | ||
1282 | } | ||
1283 | |||
1284 | status = dlm_match_nodes(dlm, qn); | ||
1285 | |||
1286 | bail: | ||
1287 | if (locked) | ||
1288 | spin_unlock(&dlm->spinlock); | ||
1289 | spin_unlock(&dlm_domain_lock); | ||
1290 | |||
1291 | return status; | ||
1292 | } | ||
1293 | |||
923 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, | 1294 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
924 | void **ret_data) | 1295 | void **ret_data) |
925 | { | 1296 | { |
@@ -1240,6 +1611,20 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) | |||
1240 | set_bit(dlm->node_num, dlm->domain_map); | 1611 | set_bit(dlm->node_num, dlm->domain_map); |
1241 | spin_unlock(&dlm->spinlock); | 1612 | spin_unlock(&dlm->spinlock); |
1242 | 1613 | ||
1614 | /* Support for global heartbeat and node info was added in 1.1 */ | ||
1615 | if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) { | ||
1616 | status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map); | ||
1617 | if (status) { | ||
1618 | mlog_errno(status); | ||
1619 | goto bail; | ||
1620 | } | ||
1621 | status = dlm_send_regions(dlm, ctxt->yes_resp_map); | ||
1622 | if (status) { | ||
1623 | mlog_errno(status); | ||
1624 | goto bail; | ||
1625 | } | ||
1626 | } | ||
1627 | |||
1243 | dlm_send_join_asserts(dlm, ctxt->yes_resp_map); | 1628 | dlm_send_join_asserts(dlm, ctxt->yes_resp_map); |
1244 | 1629 | ||
1245 | /* Joined state *must* be set before the joining node | 1630 | /* Joined state *must* be set before the joining node |
@@ -1806,7 +2191,21 @@ static int dlm_register_net_handlers(void) | |||
1806 | sizeof(struct dlm_cancel_join), | 2191 | sizeof(struct dlm_cancel_join), |
1807 | dlm_cancel_join_handler, | 2192 | dlm_cancel_join_handler, |
1808 | NULL, NULL, &dlm_join_handlers); | 2193 | NULL, NULL, &dlm_join_handlers); |
2194 | if (status) | ||
2195 | goto bail; | ||
2196 | |||
2197 | status = o2net_register_handler(DLM_QUERY_REGION, DLM_MOD_KEY, | ||
2198 | sizeof(struct dlm_query_region), | ||
2199 | dlm_query_region_handler, | ||
2200 | NULL, NULL, &dlm_join_handlers); | ||
1809 | 2201 | ||
2202 | if (status) | ||
2203 | goto bail; | ||
2204 | |||
2205 | status = o2net_register_handler(DLM_QUERY_NODEINFO, DLM_MOD_KEY, | ||
2206 | sizeof(struct dlm_query_nodeinfo), | ||
2207 | dlm_query_nodeinfo_handler, | ||
2208 | NULL, NULL, &dlm_join_handlers); | ||
1810 | bail: | 2209 | bail: |
1811 | if (status < 0) | 2210 | if (status < 0) |
1812 | dlm_unregister_net_handlers(); | 2211 | dlm_unregister_net_handlers(); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 94b97fc6a88e..f564b0e5f80d 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -511,8 +511,6 @@ static void dlm_lockres_release(struct kref *kref) | |||
511 | 511 | ||
512 | atomic_dec(&dlm->res_cur_count); | 512 | atomic_dec(&dlm->res_cur_count); |
513 | 513 | ||
514 | dlm_put(dlm); | ||
515 | |||
516 | if (!hlist_unhashed(&res->hash_node) || | 514 | if (!hlist_unhashed(&res->hash_node) || |
517 | !list_empty(&res->granted) || | 515 | !list_empty(&res->granted) || |
518 | !list_empty(&res->converting) || | 516 | !list_empty(&res->converting) || |
@@ -585,8 +583,6 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
585 | res->migration_pending = 0; | 583 | res->migration_pending = 0; |
586 | res->inflight_locks = 0; | 584 | res->inflight_locks = 0; |
587 | 585 | ||
588 | /* put in dlm_lockres_release */ | ||
589 | dlm_grab(dlm); | ||
590 | res->dlm = dlm; | 586 | res->dlm = dlm; |
591 | 587 | ||
592 | kref_init(&res->refs); | 588 | kref_init(&res->refs); |
@@ -3050,8 +3046,6 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, | |||
3050 | /* check for pre-existing lock */ | 3046 | /* check for pre-existing lock */ |
3051 | spin_lock(&dlm->spinlock); | 3047 | spin_lock(&dlm->spinlock); |
3052 | res = __dlm_lookup_lockres(dlm, name, namelen, hash); | 3048 | res = __dlm_lookup_lockres(dlm, name, namelen, hash); |
3053 | spin_lock(&dlm->master_lock); | ||
3054 | |||
3055 | if (res) { | 3049 | if (res) { |
3056 | spin_lock(&res->spinlock); | 3050 | spin_lock(&res->spinlock); |
3057 | if (res->state & DLM_LOCK_RES_RECOVERING) { | 3051 | if (res->state & DLM_LOCK_RES_RECOVERING) { |
@@ -3069,14 +3063,15 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, | |||
3069 | spin_unlock(&res->spinlock); | 3063 | spin_unlock(&res->spinlock); |
3070 | } | 3064 | } |
3071 | 3065 | ||
3066 | spin_lock(&dlm->master_lock); | ||
3072 | /* ignore status. only nonzero status would BUG. */ | 3067 | /* ignore status. only nonzero status would BUG. */ |
3073 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, | 3068 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, |
3074 | name, namelen, | 3069 | name, namelen, |
3075 | migrate->new_master, | 3070 | migrate->new_master, |
3076 | migrate->master); | 3071 | migrate->master); |
3077 | 3072 | ||
3078 | unlock: | ||
3079 | spin_unlock(&dlm->master_lock); | 3073 | spin_unlock(&dlm->master_lock); |
3074 | unlock: | ||
3080 | spin_unlock(&dlm->spinlock); | 3075 | spin_unlock(&dlm->spinlock); |
3081 | 3076 | ||
3082 | if (oldmle) { | 3077 | if (oldmle) { |
@@ -3438,3 +3433,43 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm, | |||
3438 | wake_up(&res->wq); | 3433 | wake_up(&res->wq); |
3439 | wake_up(&dlm->migration_wq); | 3434 | wake_up(&dlm->migration_wq); |
3440 | } | 3435 | } |
3436 | |||
3437 | void dlm_force_free_mles(struct dlm_ctxt *dlm) | ||
3438 | { | ||
3439 | int i; | ||
3440 | struct hlist_head *bucket; | ||
3441 | struct dlm_master_list_entry *mle; | ||
3442 | struct hlist_node *tmp, *list; | ||
3443 | |||
3444 | /* | ||
3445 | * We notified all other nodes that we are exiting the domain and | ||
3446 | * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still | ||
3447 | * around we force free them and wake any processes that are waiting | ||
3448 | * on the mles | ||
3449 | */ | ||
3450 | spin_lock(&dlm->spinlock); | ||
3451 | spin_lock(&dlm->master_lock); | ||
3452 | |||
3453 | BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING); | ||
3454 | BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES)); | ||
3455 | |||
3456 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { | ||
3457 | bucket = dlm_master_hash(dlm, i); | ||
3458 | hlist_for_each_safe(list, tmp, bucket) { | ||
3459 | mle = hlist_entry(list, struct dlm_master_list_entry, | ||
3460 | master_hash_node); | ||
3461 | if (mle->type != DLM_MLE_BLOCK) { | ||
3462 | mlog(ML_ERROR, "bad mle: %p\n", mle); | ||
3463 | dlm_print_one_mle(mle); | ||
3464 | } | ||
3465 | atomic_set(&mle->woken, 1); | ||
3466 | wake_up(&mle->wq); | ||
3467 | |||
3468 | __dlm_unlink_mle(dlm, mle); | ||
3469 | __dlm_mle_detach_hb_events(dlm, mle); | ||
3470 | __dlm_put_mle(mle); | ||
3471 | } | ||
3472 | } | ||
3473 | spin_unlock(&dlm->master_lock); | ||
3474 | spin_unlock(&dlm->spinlock); | ||
3475 | } | ||
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 9dfaac73b36d..aaaffbcbe916 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -1997,6 +1997,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, | |||
1997 | struct list_head *queue; | 1997 | struct list_head *queue; |
1998 | struct dlm_lock *lock, *next; | 1998 | struct dlm_lock *lock, *next; |
1999 | 1999 | ||
2000 | assert_spin_locked(&dlm->spinlock); | ||
2001 | assert_spin_locked(&res->spinlock); | ||
2000 | res->state |= DLM_LOCK_RES_RECOVERING; | 2002 | res->state |= DLM_LOCK_RES_RECOVERING; |
2001 | if (!list_empty(&res->recovering)) { | 2003 | if (!list_empty(&res->recovering)) { |
2002 | mlog(0, | 2004 | mlog(0, |
@@ -2326,19 +2328,15 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
2326 | /* zero the lvb if necessary */ | 2328 | /* zero the lvb if necessary */ |
2327 | dlm_revalidate_lvb(dlm, res, dead_node); | 2329 | dlm_revalidate_lvb(dlm, res, dead_node); |
2328 | if (res->owner == dead_node) { | 2330 | if (res->owner == dead_node) { |
2329 | if (res->state & DLM_LOCK_RES_DROPPING_REF) | 2331 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { |
2330 | mlog(0, "%s:%.*s: owned by " | 2332 | mlog(ML_NOTICE, "Ignore %.*s for " |
2331 | "dead node %u, this node was " | 2333 | "recovery as it is being freed\n", |
2332 | "dropping its ref when it died. " | 2334 | res->lockname.len, |
2333 | "continue, dropping the flag.\n", | 2335 | res->lockname.name); |
2334 | dlm->name, res->lockname.len, | 2336 | } else |
2335 | res->lockname.name, dead_node); | 2337 | dlm_move_lockres_to_recovery_list(dlm, |
2336 | 2338 | res); | |
2337 | /* the wake_up for this will happen when the | ||
2338 | * RECOVERING flag is dropped later */ | ||
2339 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | ||
2340 | 2339 | ||
2341 | dlm_move_lockres_to_recovery_list(dlm, res); | ||
2342 | } else if (res->owner == dlm->node_num) { | 2340 | } else if (res->owner == dlm->node_num) { |
2343 | dlm_free_dead_locks(dlm, res, dead_node); | 2341 | dlm_free_dead_locks(dlm, res, dead_node); |
2344 | __dlm_lockres_calc_usage(dlm, res); | 2342 | __dlm_lockres_calc_usage(dlm, res); |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d4f73ca68fe5..2211acf33d9b 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
@@ -92,19 +92,27 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res) | |||
92 | * truly ready to be freed. */ | 92 | * truly ready to be freed. */ |
93 | int __dlm_lockres_unused(struct dlm_lock_resource *res) | 93 | int __dlm_lockres_unused(struct dlm_lock_resource *res) |
94 | { | 94 | { |
95 | if (!__dlm_lockres_has_locks(res) && | 95 | int bit; |
96 | (list_empty(&res->dirty) && !(res->state & DLM_LOCK_RES_DIRTY))) { | 96 | |
97 | /* try not to scan the bitmap unless the first two | 97 | if (__dlm_lockres_has_locks(res)) |
98 | * conditions are already true */ | 98 | return 0; |
99 | int bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | 99 | |
100 | if (bit >= O2NM_MAX_NODES) { | 100 | if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) |
101 | /* since the bit for dlm->node_num is not | 101 | return 0; |
102 | * set, inflight_locks better be zero */ | 102 | |
103 | BUG_ON(res->inflight_locks != 0); | 103 | if (res->state & DLM_LOCK_RES_RECOVERING) |
104 | return 1; | 104 | return 0; |
105 | } | 105 | |
106 | } | 106 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); |
107 | return 0; | 107 | if (bit < O2NM_MAX_NODES) |
108 | return 0; | ||
109 | |||
110 | /* | ||
111 | * since the bit for dlm->node_num is not set, inflight_locks better | ||
112 | * be zero | ||
113 | */ | ||
114 | BUG_ON(res->inflight_locks != 0); | ||
115 | return 1; | ||
108 | } | 116 | } |
109 | 117 | ||
110 | 118 | ||
@@ -152,45 +160,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, | |||
152 | spin_unlock(&dlm->spinlock); | 160 | spin_unlock(&dlm->spinlock); |
153 | } | 161 | } |
154 | 162 | ||
155 | static int dlm_purge_lockres(struct dlm_ctxt *dlm, | 163 | static void dlm_purge_lockres(struct dlm_ctxt *dlm, |
156 | struct dlm_lock_resource *res) | 164 | struct dlm_lock_resource *res) |
157 | { | 165 | { |
158 | int master; | 166 | int master; |
159 | int ret = 0; | 167 | int ret = 0; |
160 | 168 | ||
161 | spin_lock(&res->spinlock); | 169 | assert_spin_locked(&dlm->spinlock); |
162 | if (!__dlm_lockres_unused(res)) { | 170 | assert_spin_locked(&res->spinlock); |
163 | mlog(0, "%s:%.*s: tried to purge but not unused\n", | ||
164 | dlm->name, res->lockname.len, res->lockname.name); | ||
165 | __dlm_print_one_lock_resource(res); | ||
166 | spin_unlock(&res->spinlock); | ||
167 | BUG(); | ||
168 | } | ||
169 | |||
170 | if (res->state & DLM_LOCK_RES_MIGRATING) { | ||
171 | mlog(0, "%s:%.*s: Delay dropref as this lockres is " | ||
172 | "being remastered\n", dlm->name, res->lockname.len, | ||
173 | res->lockname.name); | ||
174 | /* Re-add the lockres to the end of the purge list */ | ||
175 | if (!list_empty(&res->purge)) { | ||
176 | list_del_init(&res->purge); | ||
177 | list_add_tail(&res->purge, &dlm->purge_list); | ||
178 | } | ||
179 | spin_unlock(&res->spinlock); | ||
180 | return 0; | ||
181 | } | ||
182 | 171 | ||
183 | master = (res->owner == dlm->node_num); | 172 | master = (res->owner == dlm->node_num); |
184 | 173 | ||
185 | if (!master) | ||
186 | res->state |= DLM_LOCK_RES_DROPPING_REF; | ||
187 | spin_unlock(&res->spinlock); | ||
188 | 174 | ||
189 | mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, | 175 | mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, |
190 | res->lockname.name, master); | 176 | res->lockname.name, master); |
191 | 177 | ||
192 | if (!master) { | 178 | if (!master) { |
179 | res->state |= DLM_LOCK_RES_DROPPING_REF; | ||
193 | /* drop spinlock... retake below */ | 180 | /* drop spinlock... retake below */ |
181 | spin_unlock(&res->spinlock); | ||
194 | spin_unlock(&dlm->spinlock); | 182 | spin_unlock(&dlm->spinlock); |
195 | 183 | ||
196 | spin_lock(&res->spinlock); | 184 | spin_lock(&res->spinlock); |
@@ -208,31 +196,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, | |||
208 | mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", | 196 | mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", |
209 | dlm->name, res->lockname.len, res->lockname.name, ret); | 197 | dlm->name, res->lockname.len, res->lockname.name, ret); |
210 | spin_lock(&dlm->spinlock); | 198 | spin_lock(&dlm->spinlock); |
199 | spin_lock(&res->spinlock); | ||
211 | } | 200 | } |
212 | 201 | ||
213 | spin_lock(&res->spinlock); | ||
214 | if (!list_empty(&res->purge)) { | 202 | if (!list_empty(&res->purge)) { |
215 | mlog(0, "removing lockres %.*s:%p from purgelist, " | 203 | mlog(0, "removing lockres %.*s:%p from purgelist, " |
216 | "master = %d\n", res->lockname.len, res->lockname.name, | 204 | "master = %d\n", res->lockname.len, res->lockname.name, |
217 | res, master); | 205 | res, master); |
218 | list_del_init(&res->purge); | 206 | list_del_init(&res->purge); |
219 | spin_unlock(&res->spinlock); | ||
220 | dlm_lockres_put(res); | 207 | dlm_lockres_put(res); |
221 | dlm->purge_count--; | 208 | dlm->purge_count--; |
222 | } else | 209 | } |
223 | spin_unlock(&res->spinlock); | 210 | |
211 | if (!__dlm_lockres_unused(res)) { | ||
212 | mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n", | ||
213 | dlm->name, res->lockname.len, res->lockname.name); | ||
214 | __dlm_print_one_lock_resource(res); | ||
215 | BUG(); | ||
216 | } | ||
224 | 217 | ||
225 | __dlm_unhash_lockres(res); | 218 | __dlm_unhash_lockres(res); |
226 | 219 | ||
227 | /* lockres is not in the hash now. drop the flag and wake up | 220 | /* lockres is not in the hash now. drop the flag and wake up |
228 | * any processes waiting in dlm_get_lock_resource. */ | 221 | * any processes waiting in dlm_get_lock_resource. */ |
229 | if (!master) { | 222 | if (!master) { |
230 | spin_lock(&res->spinlock); | ||
231 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; | 223 | res->state &= ~DLM_LOCK_RES_DROPPING_REF; |
232 | spin_unlock(&res->spinlock); | 224 | spin_unlock(&res->spinlock); |
233 | wake_up(&res->wq); | 225 | wake_up(&res->wq); |
234 | } | 226 | } else |
235 | return 0; | 227 | spin_unlock(&res->spinlock); |
236 | } | 228 | } |
237 | 229 | ||
238 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, | 230 | static void dlm_run_purge_list(struct dlm_ctxt *dlm, |
@@ -251,17 +243,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
251 | lockres = list_entry(dlm->purge_list.next, | 243 | lockres = list_entry(dlm->purge_list.next, |
252 | struct dlm_lock_resource, purge); | 244 | struct dlm_lock_resource, purge); |
253 | 245 | ||
254 | /* Status of the lockres *might* change so double | ||
255 | * check. If the lockres is unused, holding the dlm | ||
256 | * spinlock will prevent people from getting and more | ||
257 | * refs on it -- there's no need to keep the lockres | ||
258 | * spinlock. */ | ||
259 | spin_lock(&lockres->spinlock); | 246 | spin_lock(&lockres->spinlock); |
260 | unused = __dlm_lockres_unused(lockres); | ||
261 | spin_unlock(&lockres->spinlock); | ||
262 | |||
263 | if (!unused) | ||
264 | continue; | ||
265 | 247 | ||
266 | purge_jiffies = lockres->last_used + | 248 | purge_jiffies = lockres->last_used + |
267 | msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); | 249 | msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); |
@@ -273,15 +255,29 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
273 | * in tail order, we can stop at the first | 255 | * in tail order, we can stop at the first |
274 | * unpurgable resource -- anyone added after | 256 | * unpurgable resource -- anyone added after |
275 | * him will have a greater last_used value */ | 257 | * him will have a greater last_used value */ |
258 | spin_unlock(&lockres->spinlock); | ||
276 | break; | 259 | break; |
277 | } | 260 | } |
278 | 261 | ||
262 | /* Status of the lockres *might* change so double | ||
263 | * check. If the lockres is unused, holding the dlm | ||
264 | * spinlock will prevent people from getting and more | ||
265 | * refs on it. */ | ||
266 | unused = __dlm_lockres_unused(lockres); | ||
267 | if (!unused || | ||
268 | (lockres->state & DLM_LOCK_RES_MIGRATING)) { | ||
269 | mlog(0, "lockres %s:%.*s: is in use or " | ||
270 | "being remastered, used %d, state %d\n", | ||
271 | dlm->name, lockres->lockname.len, | ||
272 | lockres->lockname.name, !unused, lockres->state); | ||
273 | list_move_tail(&dlm->purge_list, &lockres->purge); | ||
274 | spin_unlock(&lockres->spinlock); | ||
275 | continue; | ||
276 | } | ||
277 | |||
279 | dlm_lockres_get(lockres); | 278 | dlm_lockres_get(lockres); |
280 | 279 | ||
281 | /* This may drop and reacquire the dlm spinlock if it | 280 | dlm_purge_lockres(dlm, lockres); |
282 | * has to do migration. */ | ||
283 | if (dlm_purge_lockres(dlm, lockres)) | ||
284 | BUG(); | ||
285 | 281 | ||
286 | dlm_lockres_put(lockres); | 282 | dlm_lockres_put(lockres); |
287 | 283 | ||
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index c2903b84bb7a..a7ebd9d42dc8 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -612,6 +612,7 @@ static const struct file_operations dlmfs_file_operations = { | |||
612 | .poll = dlmfs_file_poll, | 612 | .poll = dlmfs_file_poll, |
613 | .read = dlmfs_file_read, | 613 | .read = dlmfs_file_read, |
614 | .write = dlmfs_file_write, | 614 | .write = dlmfs_file_write, |
615 | .llseek = default_llseek, | ||
615 | }; | 616 | }; |
616 | 617 | ||
617 | static const struct inode_operations dlmfs_dir_inode_operations = { | 618 | static const struct inode_operations dlmfs_dir_inode_operations = { |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 5e02a893f46e..e8d94d722ecb 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -3635,10 +3635,18 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
3635 | { | 3635 | { |
3636 | struct inode *inode; | 3636 | struct inode *inode; |
3637 | struct address_space *mapping; | 3637 | struct address_space *mapping; |
3638 | struct ocfs2_inode_info *oi; | ||
3638 | 3639 | ||
3639 | inode = ocfs2_lock_res_inode(lockres); | 3640 | inode = ocfs2_lock_res_inode(lockres); |
3640 | mapping = inode->i_mapping; | 3641 | mapping = inode->i_mapping; |
3641 | 3642 | ||
3643 | if (S_ISDIR(inode->i_mode)) { | ||
3644 | oi = OCFS2_I(inode); | ||
3645 | oi->ip_dir_lock_gen++; | ||
3646 | mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); | ||
3647 | goto out; | ||
3648 | } | ||
3649 | |||
3642 | if (!S_ISREG(inode->i_mode)) | 3650 | if (!S_ISREG(inode->i_mode)) |
3643 | goto out; | 3651 | goto out; |
3644 | 3652 | ||
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index d1ce48e1b3d6..1d596d8c4a4a 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
@@ -84,6 +84,7 @@ enum { | |||
84 | OI_LS_PARENT, | 84 | OI_LS_PARENT, |
85 | OI_LS_RENAME1, | 85 | OI_LS_RENAME1, |
86 | OI_LS_RENAME2, | 86 | OI_LS_RENAME2, |
87 | OI_LS_REFLINK_TARGET, | ||
87 | }; | 88 | }; |
88 | 89 | ||
89 | int ocfs2_dlm_init(struct ocfs2_super *osb); | 90 | int ocfs2_dlm_init(struct ocfs2_super *osb); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 81296b4e3646..1ca6867935bb 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/writeback.h> | 36 | #include <linux/writeback.h> |
37 | #include <linux/falloc.h> | 37 | #include <linux/falloc.h> |
38 | #include <linux/quotaops.h> | 38 | #include <linux/quotaops.h> |
39 | #include <linux/blkdev.h> | ||
39 | 40 | ||
40 | #define MLOG_MASK_PREFIX ML_INODE | 41 | #define MLOG_MASK_PREFIX ML_INODE |
41 | #include <cluster/masklog.h> | 42 | #include <cluster/masklog.h> |
@@ -63,12 +64,6 @@ | |||
63 | 64 | ||
64 | #include "buffer_head_io.h" | 65 | #include "buffer_head_io.h" |
65 | 66 | ||
66 | static int ocfs2_sync_inode(struct inode *inode) | ||
67 | { | ||
68 | filemap_fdatawrite(inode->i_mapping); | ||
69 | return sync_mapping_buffers(inode->i_mapping); | ||
70 | } | ||
71 | |||
72 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) | 67 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) |
73 | { | 68 | { |
74 | struct ocfs2_file_private *fp; | 69 | struct ocfs2_file_private *fp; |
@@ -179,19 +174,22 @@ static int ocfs2_sync_file(struct file *file, int datasync) | |||
179 | { | 174 | { |
180 | int err = 0; | 175 | int err = 0; |
181 | journal_t *journal; | 176 | journal_t *journal; |
182 | struct dentry *dentry = file->f_path.dentry; | ||
183 | struct inode *inode = file->f_mapping->host; | 177 | struct inode *inode = file->f_mapping->host; |
184 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 178 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
185 | 179 | ||
186 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, | 180 | mlog_entry("(0x%p, %d, 0x%p, '%.*s')\n", file, datasync, |
187 | dentry->d_name.len, dentry->d_name.name); | 181 | file->f_path.dentry, file->f_path.dentry->d_name.len, |
188 | 182 | file->f_path.dentry->d_name.name); | |
189 | err = ocfs2_sync_inode(dentry->d_inode); | ||
190 | if (err) | ||
191 | goto bail; | ||
192 | 183 | ||
193 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | 184 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { |
185 | /* | ||
186 | * We still have to flush drive's caches to get data to the | ||
187 | * platter | ||
188 | */ | ||
189 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | ||
190 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | ||
194 | goto bail; | 191 | goto bail; |
192 | } | ||
195 | 193 | ||
196 | journal = osb->journal->j_journal; | 194 | journal = osb->journal->j_journal; |
197 | err = jbd2_journal_force_commit(journal); | 195 | err = jbd2_journal_force_commit(journal); |
@@ -361,7 +359,7 @@ static int ocfs2_cow_file_pos(struct inode *inode, | |||
361 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) | 359 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) |
362 | goto out; | 360 | goto out; |
363 | 361 | ||
364 | return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1); | 362 | return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); |
365 | 363 | ||
366 | out: | 364 | out: |
367 | return status; | 365 | return status; |
@@ -774,7 +772,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | |||
774 | BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); | 772 | BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); |
775 | BUG_ON(abs_from & (inode->i_blkbits - 1)); | 773 | BUG_ON(abs_from & (inode->i_blkbits - 1)); |
776 | 774 | ||
777 | page = grab_cache_page(mapping, index); | 775 | page = find_or_create_page(mapping, index, GFP_NOFS); |
778 | if (!page) { | 776 | if (!page) { |
779 | ret = -ENOMEM; | 777 | ret = -ENOMEM; |
780 | mlog_errno(ret); | 778 | mlog_errno(ret); |
@@ -904,8 +902,8 @@ static int ocfs2_zero_extend_get_range(struct inode *inode, | |||
904 | zero_clusters = last_cpos - zero_cpos; | 902 | zero_clusters = last_cpos - zero_cpos; |
905 | 903 | ||
906 | if (needs_cow) { | 904 | if (needs_cow) { |
907 | rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters, | 905 | rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, |
908 | UINT_MAX); | 906 | zero_clusters, UINT_MAX); |
909 | if (rc) { | 907 | if (rc) { |
910 | mlog_errno(rc); | 908 | mlog_errno(rc); |
911 | goto out; | 909 | goto out; |
@@ -2053,6 +2051,7 @@ out: | |||
2053 | } | 2051 | } |
2054 | 2052 | ||
2055 | static int ocfs2_prepare_inode_for_refcount(struct inode *inode, | 2053 | static int ocfs2_prepare_inode_for_refcount(struct inode *inode, |
2054 | struct file *file, | ||
2056 | loff_t pos, size_t count, | 2055 | loff_t pos, size_t count, |
2057 | int *meta_level) | 2056 | int *meta_level) |
2058 | { | 2057 | { |
@@ -2070,7 +2069,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, | |||
2070 | 2069 | ||
2071 | *meta_level = 1; | 2070 | *meta_level = 1; |
2072 | 2071 | ||
2073 | ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); | 2072 | ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); |
2074 | if (ret) | 2073 | if (ret) |
2075 | mlog_errno(ret); | 2074 | mlog_errno(ret); |
2076 | out: | 2075 | out: |
@@ -2078,7 +2077,7 @@ out: | |||
2078 | return ret; | 2077 | return ret; |
2079 | } | 2078 | } |
2080 | 2079 | ||
2081 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | 2080 | static int ocfs2_prepare_inode_for_write(struct file *file, |
2082 | loff_t *ppos, | 2081 | loff_t *ppos, |
2083 | size_t count, | 2082 | size_t count, |
2084 | int appending, | 2083 | int appending, |
@@ -2086,6 +2085,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
2086 | int *has_refcount) | 2085 | int *has_refcount) |
2087 | { | 2086 | { |
2088 | int ret = 0, meta_level = 0; | 2087 | int ret = 0, meta_level = 0; |
2088 | struct dentry *dentry = file->f_path.dentry; | ||
2089 | struct inode *inode = dentry->d_inode; | 2089 | struct inode *inode = dentry->d_inode; |
2090 | loff_t saved_pos, end; | 2090 | loff_t saved_pos, end; |
2091 | 2091 | ||
@@ -2141,6 +2141,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
2141 | meta_level = -1; | 2141 | meta_level = -1; |
2142 | 2142 | ||
2143 | ret = ocfs2_prepare_inode_for_refcount(inode, | 2143 | ret = ocfs2_prepare_inode_for_refcount(inode, |
2144 | file, | ||
2144 | saved_pos, | 2145 | saved_pos, |
2145 | count, | 2146 | count, |
2146 | &meta_level); | 2147 | &meta_level); |
@@ -2223,6 +2224,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
2223 | struct file *file = iocb->ki_filp; | 2224 | struct file *file = iocb->ki_filp; |
2224 | struct inode *inode = file->f_path.dentry->d_inode; | 2225 | struct inode *inode = file->f_path.dentry->d_inode; |
2225 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2226 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2227 | int full_coherency = !(osb->s_mount_opt & | ||
2228 | OCFS2_MOUNT_COHERENCY_BUFFERED); | ||
2226 | 2229 | ||
2227 | mlog_entry("(0x%p, %u, '%.*s')\n", file, | 2230 | mlog_entry("(0x%p, %u, '%.*s')\n", file, |
2228 | (unsigned int)nr_segs, | 2231 | (unsigned int)nr_segs, |
@@ -2246,16 +2249,39 @@ relock: | |||
2246 | have_alloc_sem = 1; | 2249 | have_alloc_sem = 1; |
2247 | } | 2250 | } |
2248 | 2251 | ||
2249 | /* concurrent O_DIRECT writes are allowed */ | 2252 | /* |
2250 | rw_level = !direct_io; | 2253 | * Concurrent O_DIRECT writes are allowed with |
2254 | * mount_option "coherency=buffered". | ||
2255 | */ | ||
2256 | rw_level = (!direct_io || full_coherency); | ||
2257 | |||
2251 | ret = ocfs2_rw_lock(inode, rw_level); | 2258 | ret = ocfs2_rw_lock(inode, rw_level); |
2252 | if (ret < 0) { | 2259 | if (ret < 0) { |
2253 | mlog_errno(ret); | 2260 | mlog_errno(ret); |
2254 | goto out_sems; | 2261 | goto out_sems; |
2255 | } | 2262 | } |
2256 | 2263 | ||
2264 | /* | ||
2265 | * O_DIRECT writes with "coherency=full" need to take EX cluster | ||
2266 | * inode_lock to guarantee coherency. | ||
2267 | */ | ||
2268 | if (direct_io && full_coherency) { | ||
2269 | /* | ||
2270 | * We need to take and drop the inode lock to force | ||
2271 | * other nodes to drop their caches. Buffered I/O | ||
2272 | * already does this in write_begin(). | ||
2273 | */ | ||
2274 | ret = ocfs2_inode_lock(inode, NULL, 1); | ||
2275 | if (ret < 0) { | ||
2276 | mlog_errno(ret); | ||
2277 | goto out_sems; | ||
2278 | } | ||
2279 | |||
2280 | ocfs2_inode_unlock(inode, 1); | ||
2281 | } | ||
2282 | |||
2257 | can_do_direct = direct_io; | 2283 | can_do_direct = direct_io; |
2258 | ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, | 2284 | ret = ocfs2_prepare_inode_for_write(file, ppos, |
2259 | iocb->ki_left, appending, | 2285 | iocb->ki_left, appending, |
2260 | &can_do_direct, &has_refcount); | 2286 | &can_do_direct, &has_refcount); |
2261 | if (ret < 0) { | 2287 | if (ret < 0) { |
@@ -2303,17 +2329,6 @@ relock: | |||
2303 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, | 2329 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, |
2304 | ppos, count, ocount); | 2330 | ppos, count, ocount); |
2305 | if (written < 0) { | 2331 | if (written < 0) { |
2306 | /* | ||
2307 | * direct write may have instantiated a few | ||
2308 | * blocks outside i_size. Trim these off again. | ||
2309 | * Don't need i_size_read because we hold i_mutex. | ||
2310 | * | ||
2311 | * XXX(truncate): this looks buggy because ocfs2 did not | ||
2312 | * actually implement ->truncate. Take a look at | ||
2313 | * the new truncate sequence and update this accordingly | ||
2314 | */ | ||
2315 | if (*ppos + count > inode->i_size) | ||
2316 | truncate_setsize(inode, inode->i_size); | ||
2317 | ret = written; | 2332 | ret = written; |
2318 | goto out_dio; | 2333 | goto out_dio; |
2319 | } | 2334 | } |
@@ -2329,7 +2344,7 @@ out_dio: | |||
2329 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); | 2344 | BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); |
2330 | 2345 | ||
2331 | if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || | 2346 | if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || |
2332 | ((file->f_flags & O_DIRECT) && has_refcount)) { | 2347 | ((file->f_flags & O_DIRECT) && !direct_io)) { |
2333 | ret = filemap_fdatawrite_range(file->f_mapping, pos, | 2348 | ret = filemap_fdatawrite_range(file->f_mapping, pos, |
2334 | pos + count - 1); | 2349 | pos + count - 1); |
2335 | if (ret < 0) | 2350 | if (ret < 0) |
@@ -2385,7 +2400,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, | |||
2385 | { | 2400 | { |
2386 | int ret; | 2401 | int ret; |
2387 | 2402 | ||
2388 | ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, | 2403 | ret = ocfs2_prepare_inode_for_write(out, &sd->pos, |
2389 | sd->total_len, 0, NULL, NULL); | 2404 | sd->total_len, 0, NULL, NULL); |
2390 | if (ret < 0) { | 2405 | if (ret < 0) { |
2391 | mlog_errno(ret); | 2406 | mlog_errno(ret); |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 0492464916b1..f935fd6600dd 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -335,6 +335,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
335 | else | 335 | else |
336 | inode->i_fop = &ocfs2_dops_no_plocks; | 336 | inode->i_fop = &ocfs2_dops_no_plocks; |
337 | i_size_write(inode, le64_to_cpu(fe->i_size)); | 337 | i_size_write(inode, le64_to_cpu(fe->i_size)); |
338 | OCFS2_I(inode)->ip_dir_lock_gen = 1; | ||
338 | break; | 339 | break; |
339 | case S_IFLNK: | 340 | case S_IFLNK: |
340 | if (ocfs2_inode_is_fast_symlink(inode)) | 341 | if (ocfs2_inode_is_fast_symlink(inode)) |
@@ -488,7 +489,11 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
488 | OCFS2_BH_IGNORE_CACHE); | 489 | OCFS2_BH_IGNORE_CACHE); |
489 | } else { | 490 | } else { |
490 | status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); | 491 | status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); |
491 | if (!status) | 492 | /* |
493 | * If buffer is in jbd, then its checksum may not have been | ||
494 | * computed as yet. | ||
495 | */ | ||
496 | if (!status && !buffer_jbd(bh)) | ||
492 | status = ocfs2_validate_inode_block(osb->sb, bh); | 497 | status = ocfs2_validate_inode_block(osb->sb, bh); |
493 | } | 498 | } |
494 | if (status < 0) { | 499 | if (status < 0) { |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 6de5a869db30..1c508b149b3a 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -46,30 +46,28 @@ struct ocfs2_inode_info | |||
46 | /* These fields are protected by ip_lock */ | 46 | /* These fields are protected by ip_lock */ |
47 | spinlock_t ip_lock; | 47 | spinlock_t ip_lock; |
48 | u32 ip_open_count; | 48 | u32 ip_open_count; |
49 | u32 ip_clusters; | ||
50 | struct list_head ip_io_markers; | 49 | struct list_head ip_io_markers; |
50 | u32 ip_clusters; | ||
51 | 51 | ||
52 | u16 ip_dyn_features; | ||
52 | struct mutex ip_io_mutex; | 53 | struct mutex ip_io_mutex; |
53 | |||
54 | u32 ip_flags; /* see below */ | 54 | u32 ip_flags; /* see below */ |
55 | u32 ip_attr; /* inode attributes */ | 55 | u32 ip_attr; /* inode attributes */ |
56 | u16 ip_dyn_features; | ||
57 | 56 | ||
58 | /* protected by recovery_lock. */ | 57 | /* protected by recovery_lock. */ |
59 | struct inode *ip_next_orphan; | 58 | struct inode *ip_next_orphan; |
60 | 59 | ||
61 | u32 ip_dir_start_lookup; | ||
62 | |||
63 | struct ocfs2_caching_info ip_metadata_cache; | 60 | struct ocfs2_caching_info ip_metadata_cache; |
64 | |||
65 | struct ocfs2_extent_map ip_extent_map; | 61 | struct ocfs2_extent_map ip_extent_map; |
66 | |||
67 | struct inode vfs_inode; | 62 | struct inode vfs_inode; |
68 | struct jbd2_inode ip_jinode; | 63 | struct jbd2_inode ip_jinode; |
69 | 64 | ||
65 | u32 ip_dir_start_lookup; | ||
66 | |||
70 | /* Only valid if the inode is the dir. */ | 67 | /* Only valid if the inode is the dir. */ |
71 | u32 ip_last_used_slot; | 68 | u32 ip_last_used_slot; |
72 | u64 ip_last_used_group; | 69 | u64 ip_last_used_group; |
70 | u32 ip_dir_lock_gen; | ||
73 | 71 | ||
74 | struct ocfs2_alloc_reservation ip_la_data_resv; | 72 | struct ocfs2_alloc_reservation ip_la_data_resv; |
75 | }; | 73 | }; |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 7d9d9c132cef..7a4868196152 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -26,6 +26,26 @@ | |||
26 | 26 | ||
27 | #include <linux/ext2_fs.h> | 27 | #include <linux/ext2_fs.h> |
28 | 28 | ||
29 | #define o2info_from_user(a, b) \ | ||
30 | copy_from_user(&(a), (b), sizeof(a)) | ||
31 | #define o2info_to_user(a, b) \ | ||
32 | copy_to_user((typeof(a) __user *)b, &(a), sizeof(a)) | ||
33 | |||
34 | /* | ||
35 | * This call is void because we are already reporting an error that may | ||
36 | * be -EFAULT. The error will be returned from the ioctl(2) call. It's | ||
37 | * just a best-effort to tell userspace that this request caused the error. | ||
38 | */ | ||
39 | static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq, | ||
40 | struct ocfs2_info_request __user *req) | ||
41 | { | ||
42 | kreq->ir_flags |= OCFS2_INFO_FL_ERROR; | ||
43 | (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); | ||
44 | } | ||
45 | |||
46 | #define o2info_set_request_error(a, b) \ | ||
47 | __o2info_set_request_error((struct ocfs2_info_request *)&(a), b) | ||
48 | |||
29 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) | 49 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) |
30 | { | 50 | { |
31 | int status; | 51 | int status; |
@@ -109,6 +129,328 @@ bail: | |||
109 | return status; | 129 | return status; |
110 | } | 130 | } |
111 | 131 | ||
132 | int ocfs2_info_handle_blocksize(struct inode *inode, | ||
133 | struct ocfs2_info_request __user *req) | ||
134 | { | ||
135 | int status = -EFAULT; | ||
136 | struct ocfs2_info_blocksize oib; | ||
137 | |||
138 | if (o2info_from_user(oib, req)) | ||
139 | goto bail; | ||
140 | |||
141 | oib.ib_blocksize = inode->i_sb->s_blocksize; | ||
142 | oib.ib_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
143 | |||
144 | if (o2info_to_user(oib, req)) | ||
145 | goto bail; | ||
146 | |||
147 | status = 0; | ||
148 | bail: | ||
149 | if (status) | ||
150 | o2info_set_request_error(oib, req); | ||
151 | |||
152 | return status; | ||
153 | } | ||
154 | |||
155 | int ocfs2_info_handle_clustersize(struct inode *inode, | ||
156 | struct ocfs2_info_request __user *req) | ||
157 | { | ||
158 | int status = -EFAULT; | ||
159 | struct ocfs2_info_clustersize oic; | ||
160 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
161 | |||
162 | if (o2info_from_user(oic, req)) | ||
163 | goto bail; | ||
164 | |||
165 | oic.ic_clustersize = osb->s_clustersize; | ||
166 | oic.ic_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
167 | |||
168 | if (o2info_to_user(oic, req)) | ||
169 | goto bail; | ||
170 | |||
171 | status = 0; | ||
172 | bail: | ||
173 | if (status) | ||
174 | o2info_set_request_error(oic, req); | ||
175 | |||
176 | return status; | ||
177 | } | ||
178 | |||
179 | int ocfs2_info_handle_maxslots(struct inode *inode, | ||
180 | struct ocfs2_info_request __user *req) | ||
181 | { | ||
182 | int status = -EFAULT; | ||
183 | struct ocfs2_info_maxslots oim; | ||
184 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
185 | |||
186 | if (o2info_from_user(oim, req)) | ||
187 | goto bail; | ||
188 | |||
189 | oim.im_max_slots = osb->max_slots; | ||
190 | oim.im_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
191 | |||
192 | if (o2info_to_user(oim, req)) | ||
193 | goto bail; | ||
194 | |||
195 | status = 0; | ||
196 | bail: | ||
197 | if (status) | ||
198 | o2info_set_request_error(oim, req); | ||
199 | |||
200 | return status; | ||
201 | } | ||
202 | |||
203 | int ocfs2_info_handle_label(struct inode *inode, | ||
204 | struct ocfs2_info_request __user *req) | ||
205 | { | ||
206 | int status = -EFAULT; | ||
207 | struct ocfs2_info_label oil; | ||
208 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
209 | |||
210 | if (o2info_from_user(oil, req)) | ||
211 | goto bail; | ||
212 | |||
213 | memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); | ||
214 | oil.il_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
215 | |||
216 | if (o2info_to_user(oil, req)) | ||
217 | goto bail; | ||
218 | |||
219 | status = 0; | ||
220 | bail: | ||
221 | if (status) | ||
222 | o2info_set_request_error(oil, req); | ||
223 | |||
224 | return status; | ||
225 | } | ||
226 | |||
227 | int ocfs2_info_handle_uuid(struct inode *inode, | ||
228 | struct ocfs2_info_request __user *req) | ||
229 | { | ||
230 | int status = -EFAULT; | ||
231 | struct ocfs2_info_uuid oiu; | ||
232 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
233 | |||
234 | if (o2info_from_user(oiu, req)) | ||
235 | goto bail; | ||
236 | |||
237 | memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); | ||
238 | oiu.iu_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
239 | |||
240 | if (o2info_to_user(oiu, req)) | ||
241 | goto bail; | ||
242 | |||
243 | status = 0; | ||
244 | bail: | ||
245 | if (status) | ||
246 | o2info_set_request_error(oiu, req); | ||
247 | |||
248 | return status; | ||
249 | } | ||
250 | |||
251 | int ocfs2_info_handle_fs_features(struct inode *inode, | ||
252 | struct ocfs2_info_request __user *req) | ||
253 | { | ||
254 | int status = -EFAULT; | ||
255 | struct ocfs2_info_fs_features oif; | ||
256 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
257 | |||
258 | if (o2info_from_user(oif, req)) | ||
259 | goto bail; | ||
260 | |||
261 | oif.if_compat_features = osb->s_feature_compat; | ||
262 | oif.if_incompat_features = osb->s_feature_incompat; | ||
263 | oif.if_ro_compat_features = osb->s_feature_ro_compat; | ||
264 | oif.if_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
265 | |||
266 | if (o2info_to_user(oif, req)) | ||
267 | goto bail; | ||
268 | |||
269 | status = 0; | ||
270 | bail: | ||
271 | if (status) | ||
272 | o2info_set_request_error(oif, req); | ||
273 | |||
274 | return status; | ||
275 | } | ||
276 | |||
277 | int ocfs2_info_handle_journal_size(struct inode *inode, | ||
278 | struct ocfs2_info_request __user *req) | ||
279 | { | ||
280 | int status = -EFAULT; | ||
281 | struct ocfs2_info_journal_size oij; | ||
282 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
283 | |||
284 | if (o2info_from_user(oij, req)) | ||
285 | goto bail; | ||
286 | |||
287 | oij.ij_journal_size = osb->journal->j_inode->i_size; | ||
288 | |||
289 | oij.ij_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
290 | |||
291 | if (o2info_to_user(oij, req)) | ||
292 | goto bail; | ||
293 | |||
294 | status = 0; | ||
295 | bail: | ||
296 | if (status) | ||
297 | o2info_set_request_error(oij, req); | ||
298 | |||
299 | return status; | ||
300 | } | ||
301 | |||
302 | int ocfs2_info_handle_unknown(struct inode *inode, | ||
303 | struct ocfs2_info_request __user *req) | ||
304 | { | ||
305 | int status = -EFAULT; | ||
306 | struct ocfs2_info_request oir; | ||
307 | |||
308 | if (o2info_from_user(oir, req)) | ||
309 | goto bail; | ||
310 | |||
311 | oir.ir_flags &= ~OCFS2_INFO_FL_FILLED; | ||
312 | |||
313 | if (o2info_to_user(oir, req)) | ||
314 | goto bail; | ||
315 | |||
316 | status = 0; | ||
317 | bail: | ||
318 | if (status) | ||
319 | o2info_set_request_error(oir, req); | ||
320 | |||
321 | return status; | ||
322 | } | ||
323 | |||
324 | /* | ||
325 | * Validate and distinguish OCFS2_IOC_INFO requests. | ||
326 | * | ||
327 | * - validate the magic number. | ||
328 | * - distinguish different requests. | ||
329 | * - validate size of different requests. | ||
330 | */ | ||
331 | int ocfs2_info_handle_request(struct inode *inode, | ||
332 | struct ocfs2_info_request __user *req) | ||
333 | { | ||
334 | int status = -EFAULT; | ||
335 | struct ocfs2_info_request oir; | ||
336 | |||
337 | if (o2info_from_user(oir, req)) | ||
338 | goto bail; | ||
339 | |||
340 | status = -EINVAL; | ||
341 | if (oir.ir_magic != OCFS2_INFO_MAGIC) | ||
342 | goto bail; | ||
343 | |||
344 | switch (oir.ir_code) { | ||
345 | case OCFS2_INFO_BLOCKSIZE: | ||
346 | if (oir.ir_size == sizeof(struct ocfs2_info_blocksize)) | ||
347 | status = ocfs2_info_handle_blocksize(inode, req); | ||
348 | break; | ||
349 | case OCFS2_INFO_CLUSTERSIZE: | ||
350 | if (oir.ir_size == sizeof(struct ocfs2_info_clustersize)) | ||
351 | status = ocfs2_info_handle_clustersize(inode, req); | ||
352 | break; | ||
353 | case OCFS2_INFO_MAXSLOTS: | ||
354 | if (oir.ir_size == sizeof(struct ocfs2_info_maxslots)) | ||
355 | status = ocfs2_info_handle_maxslots(inode, req); | ||
356 | break; | ||
357 | case OCFS2_INFO_LABEL: | ||
358 | if (oir.ir_size == sizeof(struct ocfs2_info_label)) | ||
359 | status = ocfs2_info_handle_label(inode, req); | ||
360 | break; | ||
361 | case OCFS2_INFO_UUID: | ||
362 | if (oir.ir_size == sizeof(struct ocfs2_info_uuid)) | ||
363 | status = ocfs2_info_handle_uuid(inode, req); | ||
364 | break; | ||
365 | case OCFS2_INFO_FS_FEATURES: | ||
366 | if (oir.ir_size == sizeof(struct ocfs2_info_fs_features)) | ||
367 | status = ocfs2_info_handle_fs_features(inode, req); | ||
368 | break; | ||
369 | case OCFS2_INFO_JOURNAL_SIZE: | ||
370 | if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) | ||
371 | status = ocfs2_info_handle_journal_size(inode, req); | ||
372 | break; | ||
373 | default: | ||
374 | status = ocfs2_info_handle_unknown(inode, req); | ||
375 | break; | ||
376 | } | ||
377 | |||
378 | bail: | ||
379 | return status; | ||
380 | } | ||
381 | |||
382 | int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx, | ||
383 | u64 *req_addr, int compat_flag) | ||
384 | { | ||
385 | int status = -EFAULT; | ||
386 | u64 __user *bp = NULL; | ||
387 | |||
388 | if (compat_flag) { | ||
389 | #ifdef CONFIG_COMPAT | ||
390 | /* | ||
391 | * pointer bp stores the base address of a pointers array, | ||
392 | * which collects all addresses of separate request. | ||
393 | */ | ||
394 | bp = (u64 __user *)(unsigned long)compat_ptr(info->oi_requests); | ||
395 | #else | ||
396 | BUG(); | ||
397 | #endif | ||
398 | } else | ||
399 | bp = (u64 __user *)(unsigned long)(info->oi_requests); | ||
400 | |||
401 | if (o2info_from_user(*req_addr, bp + idx)) | ||
402 | goto bail; | ||
403 | |||
404 | status = 0; | ||
405 | bail: | ||
406 | return status; | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * OCFS2_IOC_INFO handles an array of requests passed from userspace. | ||
411 | * | ||
412 | * ocfs2_info_handle() recevies a large info aggregation, grab and | ||
413 | * validate the request count from header, then break it into small | ||
414 | * pieces, later specific handlers can handle them one by one. | ||
415 | * | ||
416 | * Idea here is to make each separate request small enough to ensure | ||
417 | * a better backward&forward compatibility, since a small piece of | ||
418 | * request will be less likely to be broken if disk layout get changed. | ||
419 | */ | ||
420 | int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, | ||
421 | int compat_flag) | ||
422 | { | ||
423 | int i, status = 0; | ||
424 | u64 req_addr; | ||
425 | struct ocfs2_info_request __user *reqp; | ||
426 | |||
427 | if ((info->oi_count > OCFS2_INFO_MAX_REQUEST) || | ||
428 | (!info->oi_requests)) { | ||
429 | status = -EINVAL; | ||
430 | goto bail; | ||
431 | } | ||
432 | |||
433 | for (i = 0; i < info->oi_count; i++) { | ||
434 | |||
435 | status = ocfs2_get_request_ptr(info, i, &req_addr, compat_flag); | ||
436 | if (status) | ||
437 | break; | ||
438 | |||
439 | reqp = (struct ocfs2_info_request *)(unsigned long)req_addr; | ||
440 | if (!reqp) { | ||
441 | status = -EINVAL; | ||
442 | goto bail; | ||
443 | } | ||
444 | |||
445 | status = ocfs2_info_handle_request(inode, reqp); | ||
446 | if (status) | ||
447 | break; | ||
448 | } | ||
449 | |||
450 | bail: | ||
451 | return status; | ||
452 | } | ||
453 | |||
112 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 454 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
113 | { | 455 | { |
114 | struct inode *inode = filp->f_path.dentry->d_inode; | 456 | struct inode *inode = filp->f_path.dentry->d_inode; |
@@ -120,6 +462,7 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
120 | struct reflink_arguments args; | 462 | struct reflink_arguments args; |
121 | const char *old_path, *new_path; | 463 | const char *old_path, *new_path; |
122 | bool preserve; | 464 | bool preserve; |
465 | struct ocfs2_info info; | ||
123 | 466 | ||
124 | switch (cmd) { | 467 | switch (cmd) { |
125 | case OCFS2_IOC_GETFLAGS: | 468 | case OCFS2_IOC_GETFLAGS: |
@@ -174,6 +517,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
174 | preserve = (args.preserve != 0); | 517 | preserve = (args.preserve != 0); |
175 | 518 | ||
176 | return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); | 519 | return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); |
520 | case OCFS2_IOC_INFO: | ||
521 | if (copy_from_user(&info, (struct ocfs2_info __user *)arg, | ||
522 | sizeof(struct ocfs2_info))) | ||
523 | return -EFAULT; | ||
524 | |||
525 | return ocfs2_info_handle(inode, &info, 0); | ||
177 | default: | 526 | default: |
178 | return -ENOTTY; | 527 | return -ENOTTY; |
179 | } | 528 | } |
@@ -185,6 +534,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
185 | bool preserve; | 534 | bool preserve; |
186 | struct reflink_arguments args; | 535 | struct reflink_arguments args; |
187 | struct inode *inode = file->f_path.dentry->d_inode; | 536 | struct inode *inode = file->f_path.dentry->d_inode; |
537 | struct ocfs2_info info; | ||
188 | 538 | ||
189 | switch (cmd) { | 539 | switch (cmd) { |
190 | case OCFS2_IOC32_GETFLAGS: | 540 | case OCFS2_IOC32_GETFLAGS: |
@@ -209,6 +559,12 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
209 | 559 | ||
210 | return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), | 560 | return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), |
211 | compat_ptr(args.new_path), preserve); | 561 | compat_ptr(args.new_path), preserve); |
562 | case OCFS2_IOC_INFO: | ||
563 | if (copy_from_user(&info, (struct ocfs2_info __user *)arg, | ||
564 | sizeof(struct ocfs2_info))) | ||
565 | return -EFAULT; | ||
566 | |||
567 | return ocfs2_info_handle(inode, &info, 1); | ||
212 | default: | 568 | default: |
213 | return -ENOIOCTLCMD; | 569 | return -ENOIOCTLCMD; |
214 | } | 570 | } |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 9b57c0350ff9..faa2303dbf0a 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -301,7 +301,6 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
301 | { | 301 | { |
302 | int status = 0; | 302 | int status = 0; |
303 | unsigned int flushed; | 303 | unsigned int flushed; |
304 | unsigned long old_id; | ||
305 | struct ocfs2_journal *journal = NULL; | 304 | struct ocfs2_journal *journal = NULL; |
306 | 305 | ||
307 | mlog_entry_void(); | 306 | mlog_entry_void(); |
@@ -326,7 +325,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
326 | goto finally; | 325 | goto finally; |
327 | } | 326 | } |
328 | 327 | ||
329 | old_id = ocfs2_inc_trans_id(journal); | 328 | ocfs2_inc_trans_id(journal); |
330 | 329 | ||
331 | flushed = atomic_read(&journal->j_num_trans); | 330 | flushed = atomic_read(&journal->j_num_trans); |
332 | atomic_set(&journal->j_num_trans, 0); | 331 | atomic_set(&journal->j_num_trans, 0); |
@@ -342,9 +341,6 @@ finally: | |||
342 | return status; | 341 | return status; |
343 | } | 342 | } |
344 | 343 | ||
345 | /* pass it NULL and it will allocate a new handle object for you. If | ||
346 | * you pass it a handle however, it may still return error, in which | ||
347 | * case it has free'd the passed handle for you. */ | ||
348 | handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) | 344 | handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) |
349 | { | 345 | { |
350 | journal_t *journal = osb->journal->j_journal; | 346 | journal_t *journal = osb->journal->j_journal; |
@@ -1888,6 +1884,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) | |||
1888 | 1884 | ||
1889 | os = &osb->osb_orphan_scan; | 1885 | os = &osb->osb_orphan_scan; |
1890 | 1886 | ||
1887 | mlog(0, "Begin orphan scan\n"); | ||
1888 | |||
1891 | if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) | 1889 | if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) |
1892 | goto out; | 1890 | goto out; |
1893 | 1891 | ||
@@ -1920,6 +1918,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) | |||
1920 | unlock: | 1918 | unlock: |
1921 | ocfs2_orphan_scan_unlock(osb, seqno); | 1919 | ocfs2_orphan_scan_unlock(osb, seqno); |
1922 | out: | 1920 | out: |
1921 | mlog(0, "Orphan scan completed\n"); | ||
1923 | return; | 1922 | return; |
1924 | } | 1923 | } |
1925 | 1924 | ||
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index b5baaa8e710f..43e56b97f9c0 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -67,11 +67,12 @@ struct ocfs2_journal { | |||
67 | struct buffer_head *j_bh; /* Journal disk inode block */ | 67 | struct buffer_head *j_bh; /* Journal disk inode block */ |
68 | atomic_t j_num_trans; /* Number of transactions | 68 | atomic_t j_num_trans; /* Number of transactions |
69 | * currently in the system. */ | 69 | * currently in the system. */ |
70 | spinlock_t j_lock; | ||
70 | unsigned long j_trans_id; | 71 | unsigned long j_trans_id; |
71 | struct rw_semaphore j_trans_barrier; | 72 | struct rw_semaphore j_trans_barrier; |
72 | wait_queue_head_t j_checkpointed; | 73 | wait_queue_head_t j_checkpointed; |
73 | 74 | ||
74 | spinlock_t j_lock; | 75 | /* both fields protected by j_lock*/ |
75 | struct list_head j_la_cleanups; | 76 | struct list_head j_la_cleanups; |
76 | struct work_struct j_recovery_work; | 77 | struct work_struct j_recovery_work; |
77 | }; | 78 | }; |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index af2b8fe1f139..7e32db9c2c99 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -59,10 +59,11 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) | |||
59 | return ret; | 59 | return ret; |
60 | } | 60 | } |
61 | 61 | ||
62 | static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, | 62 | static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, |
63 | struct page *page) | 63 | struct page *page) |
64 | { | 64 | { |
65 | int ret; | 65 | int ret; |
66 | struct inode *inode = file->f_path.dentry->d_inode; | ||
66 | struct address_space *mapping = inode->i_mapping; | 67 | struct address_space *mapping = inode->i_mapping; |
67 | loff_t pos = page_offset(page); | 68 | loff_t pos = page_offset(page); |
68 | unsigned int len = PAGE_CACHE_SIZE; | 69 | unsigned int len = PAGE_CACHE_SIZE; |
@@ -74,9 +75,11 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, | |||
74 | /* | 75 | /* |
75 | * Another node might have truncated while we were waiting on | 76 | * Another node might have truncated while we were waiting on |
76 | * cluster locks. | 77 | * cluster locks. |
78 | * We don't check size == 0 before the shift. This is borrowed | ||
79 | * from do_generic_file_read. | ||
77 | */ | 80 | */ |
78 | last_index = size >> PAGE_CACHE_SHIFT; | 81 | last_index = (size - 1) >> PAGE_CACHE_SHIFT; |
79 | if (page->index > last_index) { | 82 | if (unlikely(!size || page->index > last_index)) { |
80 | ret = -EINVAL; | 83 | ret = -EINVAL; |
81 | goto out; | 84 | goto out; |
82 | } | 85 | } |
@@ -107,9 +110,9 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, | |||
107 | * because the "write" would invalidate their data. | 110 | * because the "write" would invalidate their data. |
108 | */ | 111 | */ |
109 | if (page->index == last_index) | 112 | if (page->index == last_index) |
110 | len = size & ~PAGE_CACHE_MASK; | 113 | len = ((size - 1) & ~PAGE_CACHE_MASK) + 1; |
111 | 114 | ||
112 | ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, | 115 | ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page, |
113 | &fsdata, di_bh, page); | 116 | &fsdata, di_bh, page); |
114 | if (ret) { | 117 | if (ret) { |
115 | if (ret != -ENOSPC) | 118 | if (ret != -ENOSPC) |
@@ -157,7 +160,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
157 | */ | 160 | */ |
158 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 161 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
159 | 162 | ||
160 | ret = __ocfs2_page_mkwrite(inode, di_bh, page); | 163 | ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page); |
161 | 164 | ||
162 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 165 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
163 | 166 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index f171b51a74f7..e7bde21149ae 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -171,7 +171,8 @@ bail_add: | |||
171 | ret = ERR_PTR(status); | 171 | ret = ERR_PTR(status); |
172 | goto bail_unlock; | 172 | goto bail_unlock; |
173 | } | 173 | } |
174 | } | 174 | } else |
175 | ocfs2_dentry_attach_gen(dentry); | ||
175 | 176 | ||
176 | bail_unlock: | 177 | bail_unlock: |
177 | /* Don't drop the cluster lock until *after* the d_add -- | 178 | /* Don't drop the cluster lock until *after* the d_add -- |
@@ -472,32 +473,23 @@ leave: | |||
472 | return status; | 473 | return status; |
473 | } | 474 | } |
474 | 475 | ||
475 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, | 476 | static int __ocfs2_mknod_locked(struct inode *dir, |
476 | struct inode *dir, | 477 | struct inode *inode, |
477 | struct inode *inode, | 478 | dev_t dev, |
478 | dev_t dev, | 479 | struct buffer_head **new_fe_bh, |
479 | struct buffer_head **new_fe_bh, | 480 | struct buffer_head *parent_fe_bh, |
480 | struct buffer_head *parent_fe_bh, | 481 | handle_t *handle, |
481 | handle_t *handle, | 482 | struct ocfs2_alloc_context *inode_ac, |
482 | struct ocfs2_alloc_context *inode_ac) | 483 | u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit) |
483 | { | 484 | { |
484 | int status = 0; | 485 | int status = 0; |
486 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
485 | struct ocfs2_dinode *fe = NULL; | 487 | struct ocfs2_dinode *fe = NULL; |
486 | struct ocfs2_extent_list *fel; | 488 | struct ocfs2_extent_list *fel; |
487 | u64 suballoc_loc, fe_blkno = 0; | ||
488 | u16 suballoc_bit; | ||
489 | u16 feat; | 489 | u16 feat; |
490 | 490 | ||
491 | *new_fe_bh = NULL; | 491 | *new_fe_bh = NULL; |
492 | 492 | ||
493 | status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh, | ||
494 | inode_ac, &suballoc_loc, | ||
495 | &suballoc_bit, &fe_blkno); | ||
496 | if (status < 0) { | ||
497 | mlog_errno(status); | ||
498 | goto leave; | ||
499 | } | ||
500 | |||
501 | /* populate as many fields early on as possible - many of | 493 | /* populate as many fields early on as possible - many of |
502 | * these are used by the support functions here and in | 494 | * these are used by the support functions here and in |
503 | * callers. */ | 495 | * callers. */ |
@@ -591,6 +583,34 @@ leave: | |||
591 | return status; | 583 | return status; |
592 | } | 584 | } |
593 | 585 | ||
586 | static int ocfs2_mknod_locked(struct ocfs2_super *osb, | ||
587 | struct inode *dir, | ||
588 | struct inode *inode, | ||
589 | dev_t dev, | ||
590 | struct buffer_head **new_fe_bh, | ||
591 | struct buffer_head *parent_fe_bh, | ||
592 | handle_t *handle, | ||
593 | struct ocfs2_alloc_context *inode_ac) | ||
594 | { | ||
595 | int status = 0; | ||
596 | u64 suballoc_loc, fe_blkno = 0; | ||
597 | u16 suballoc_bit; | ||
598 | |||
599 | *new_fe_bh = NULL; | ||
600 | |||
601 | status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh, | ||
602 | inode_ac, &suballoc_loc, | ||
603 | &suballoc_bit, &fe_blkno); | ||
604 | if (status < 0) { | ||
605 | mlog_errno(status); | ||
606 | return status; | ||
607 | } | ||
608 | |||
609 | return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, | ||
610 | parent_fe_bh, handle, inode_ac, | ||
611 | fe_blkno, suballoc_loc, suballoc_bit); | ||
612 | } | ||
613 | |||
594 | static int ocfs2_mkdir(struct inode *dir, | 614 | static int ocfs2_mkdir(struct inode *dir, |
595 | struct dentry *dentry, | 615 | struct dentry *dentry, |
596 | int mode) | 616 | int mode) |
@@ -1852,61 +1872,117 @@ bail: | |||
1852 | return status; | 1872 | return status; |
1853 | } | 1873 | } |
1854 | 1874 | ||
1855 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | 1875 | static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb, |
1856 | struct inode **ret_orphan_dir, | 1876 | struct inode **ret_orphan_dir, |
1857 | u64 blkno, | 1877 | struct buffer_head **ret_orphan_dir_bh) |
1858 | char *name, | ||
1859 | struct ocfs2_dir_lookup_result *lookup) | ||
1860 | { | 1878 | { |
1861 | struct inode *orphan_dir_inode; | 1879 | struct inode *orphan_dir_inode; |
1862 | struct buffer_head *orphan_dir_bh = NULL; | 1880 | struct buffer_head *orphan_dir_bh = NULL; |
1863 | int status = 0; | 1881 | int ret = 0; |
1864 | |||
1865 | status = ocfs2_blkno_stringify(blkno, name); | ||
1866 | if (status < 0) { | ||
1867 | mlog_errno(status); | ||
1868 | return status; | ||
1869 | } | ||
1870 | 1882 | ||
1871 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 1883 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
1872 | ORPHAN_DIR_SYSTEM_INODE, | 1884 | ORPHAN_DIR_SYSTEM_INODE, |
1873 | osb->slot_num); | 1885 | osb->slot_num); |
1874 | if (!orphan_dir_inode) { | 1886 | if (!orphan_dir_inode) { |
1875 | status = -ENOENT; | 1887 | ret = -ENOENT; |
1876 | mlog_errno(status); | 1888 | mlog_errno(ret); |
1877 | return status; | 1889 | return ret; |
1878 | } | 1890 | } |
1879 | 1891 | ||
1880 | mutex_lock(&orphan_dir_inode->i_mutex); | 1892 | mutex_lock(&orphan_dir_inode->i_mutex); |
1881 | 1893 | ||
1882 | status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); | 1894 | ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); |
1883 | if (status < 0) { | 1895 | if (ret < 0) { |
1884 | mlog_errno(status); | 1896 | mutex_unlock(&orphan_dir_inode->i_mutex); |
1885 | goto leave; | 1897 | iput(orphan_dir_inode); |
1898 | |||
1899 | mlog_errno(ret); | ||
1900 | return ret; | ||
1886 | } | 1901 | } |
1887 | 1902 | ||
1888 | status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, | 1903 | *ret_orphan_dir = orphan_dir_inode; |
1889 | orphan_dir_bh, name, | 1904 | *ret_orphan_dir_bh = orphan_dir_bh; |
1890 | OCFS2_ORPHAN_NAMELEN, lookup); | ||
1891 | if (status < 0) { | ||
1892 | ocfs2_inode_unlock(orphan_dir_inode, 1); | ||
1893 | 1905 | ||
1894 | mlog_errno(status); | 1906 | return 0; |
1895 | goto leave; | 1907 | } |
1908 | |||
1909 | static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode, | ||
1910 | struct buffer_head *orphan_dir_bh, | ||
1911 | u64 blkno, | ||
1912 | char *name, | ||
1913 | struct ocfs2_dir_lookup_result *lookup) | ||
1914 | { | ||
1915 | int ret; | ||
1916 | struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb); | ||
1917 | |||
1918 | ret = ocfs2_blkno_stringify(blkno, name); | ||
1919 | if (ret < 0) { | ||
1920 | mlog_errno(ret); | ||
1921 | return ret; | ||
1922 | } | ||
1923 | |||
1924 | ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, | ||
1925 | orphan_dir_bh, name, | ||
1926 | OCFS2_ORPHAN_NAMELEN, lookup); | ||
1927 | if (ret < 0) { | ||
1928 | mlog_errno(ret); | ||
1929 | return ret; | ||
1930 | } | ||
1931 | |||
1932 | return 0; | ||
1933 | } | ||
1934 | |||
1935 | /** | ||
1936 | * ocfs2_prepare_orphan_dir() - Prepare an orphan directory for | ||
1937 | * insertion of an orphan. | ||
1938 | * @osb: ocfs2 file system | ||
1939 | * @ret_orphan_dir: Orphan dir inode - returned locked! | ||
1940 | * @blkno: Actual block number of the inode to be inserted into orphan dir. | ||
1941 | * @lookup: dir lookup result, to be passed back into functions like | ||
1942 | * ocfs2_orphan_add | ||
1943 | * | ||
1944 | * Returns zero on success and the ret_orphan_dir, name and lookup | ||
1945 | * fields will be populated. | ||
1946 | * | ||
1947 | * Returns non-zero on failure. | ||
1948 | */ | ||
1949 | static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, | ||
1950 | struct inode **ret_orphan_dir, | ||
1951 | u64 blkno, | ||
1952 | char *name, | ||
1953 | struct ocfs2_dir_lookup_result *lookup) | ||
1954 | { | ||
1955 | struct inode *orphan_dir_inode = NULL; | ||
1956 | struct buffer_head *orphan_dir_bh = NULL; | ||
1957 | int ret = 0; | ||
1958 | |||
1959 | ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir_inode, | ||
1960 | &orphan_dir_bh); | ||
1961 | if (ret < 0) { | ||
1962 | mlog_errno(ret); | ||
1963 | return ret; | ||
1964 | } | ||
1965 | |||
1966 | ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh, | ||
1967 | blkno, name, lookup); | ||
1968 | if (ret < 0) { | ||
1969 | mlog_errno(ret); | ||
1970 | goto out; | ||
1896 | } | 1971 | } |
1897 | 1972 | ||
1898 | *ret_orphan_dir = orphan_dir_inode; | 1973 | *ret_orphan_dir = orphan_dir_inode; |
1899 | 1974 | ||
1900 | leave: | 1975 | out: |
1901 | if (status) { | 1976 | brelse(orphan_dir_bh); |
1977 | |||
1978 | if (ret) { | ||
1979 | ocfs2_inode_unlock(orphan_dir_inode, 1); | ||
1902 | mutex_unlock(&orphan_dir_inode->i_mutex); | 1980 | mutex_unlock(&orphan_dir_inode->i_mutex); |
1903 | iput(orphan_dir_inode); | 1981 | iput(orphan_dir_inode); |
1904 | } | 1982 | } |
1905 | 1983 | ||
1906 | brelse(orphan_dir_bh); | 1984 | mlog_exit(ret); |
1907 | 1985 | return ret; | |
1908 | mlog_exit(status); | ||
1909 | return status; | ||
1910 | } | 1986 | } |
1911 | 1987 | ||
1912 | static int ocfs2_orphan_add(struct ocfs2_super *osb, | 1988 | static int ocfs2_orphan_add(struct ocfs2_super *osb, |
@@ -2053,6 +2129,99 @@ leave: | |||
2053 | return status; | 2129 | return status; |
2054 | } | 2130 | } |
2055 | 2131 | ||
2132 | /** | ||
2133 | * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to recieve a newly | ||
2134 | * allocated file. This is different from the typical 'add to orphan dir' | ||
2135 | * operation in that the inode does not yet exist. This is a problem because | ||
2136 | * the orphan dir stringifies the inode block number to come up with it's | ||
2137 | * dirent. Obviously if the inode does not yet exist we have a chicken and egg | ||
2138 | * problem. This function works around it by calling deeper into the orphan | ||
2139 | * and suballoc code than other callers. Use this only by necessity. | ||
2140 | * @dir: The directory which this inode will ultimately wind up under - not the | ||
2141 | * orphan dir! | ||
2142 | * @dir_bh: buffer_head the @dir inode block | ||
2143 | * @orphan_name: string of length (CFS2_ORPHAN_NAMELEN + 1). Will be filled | ||
2144 | * with the string to be used for orphan dirent. Pass back to the orphan dir | ||
2145 | * code. | ||
2146 | * @ret_orphan_dir: orphan dir inode returned to be passed back into orphan | ||
2147 | * dir code. | ||
2148 | * @ret_di_blkno: block number where the new inode will be allocated. | ||
2149 | * @orphan_insert: Dir insert context to be passed back into orphan dir code. | ||
2150 | * @ret_inode_ac: Inode alloc context to be passed back to the allocator. | ||
2151 | * | ||
2152 | * Returns zero on success and the ret_orphan_dir, name and lookup | ||
2153 | * fields will be populated. | ||
2154 | * | ||
2155 | * Returns non-zero on failure. | ||
2156 | */ | ||
2157 | static int ocfs2_prep_new_orphaned_file(struct inode *dir, | ||
2158 | struct buffer_head *dir_bh, | ||
2159 | char *orphan_name, | ||
2160 | struct inode **ret_orphan_dir, | ||
2161 | u64 *ret_di_blkno, | ||
2162 | struct ocfs2_dir_lookup_result *orphan_insert, | ||
2163 | struct ocfs2_alloc_context **ret_inode_ac) | ||
2164 | { | ||
2165 | int ret; | ||
2166 | u64 di_blkno; | ||
2167 | struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); | ||
2168 | struct inode *orphan_dir = NULL; | ||
2169 | struct buffer_head *orphan_dir_bh = NULL; | ||
2170 | struct ocfs2_alloc_context *inode_ac = NULL; | ||
2171 | |||
2172 | ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir, &orphan_dir_bh); | ||
2173 | if (ret < 0) { | ||
2174 | mlog_errno(ret); | ||
2175 | return ret; | ||
2176 | } | ||
2177 | |||
2178 | /* reserve an inode spot */ | ||
2179 | ret = ocfs2_reserve_new_inode(osb, &inode_ac); | ||
2180 | if (ret < 0) { | ||
2181 | if (ret != -ENOSPC) | ||
2182 | mlog_errno(ret); | ||
2183 | goto out; | ||
2184 | } | ||
2185 | |||
2186 | ret = ocfs2_find_new_inode_loc(dir, dir_bh, inode_ac, | ||
2187 | &di_blkno); | ||
2188 | if (ret) { | ||
2189 | mlog_errno(ret); | ||
2190 | goto out; | ||
2191 | } | ||
2192 | |||
2193 | ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh, | ||
2194 | di_blkno, orphan_name, orphan_insert); | ||
2195 | if (ret < 0) { | ||
2196 | mlog_errno(ret); | ||
2197 | goto out; | ||
2198 | } | ||
2199 | |||
2200 | out: | ||
2201 | if (ret == 0) { | ||
2202 | *ret_orphan_dir = orphan_dir; | ||
2203 | *ret_di_blkno = di_blkno; | ||
2204 | *ret_inode_ac = inode_ac; | ||
2205 | /* | ||
2206 | * orphan_name and orphan_insert are already up to | ||
2207 | * date via prepare_orphan_dir | ||
2208 | */ | ||
2209 | } else { | ||
2210 | /* Unroll reserve_new_inode* */ | ||
2211 | if (inode_ac) | ||
2212 | ocfs2_free_alloc_context(inode_ac); | ||
2213 | |||
2214 | /* Unroll orphan dir locking */ | ||
2215 | mutex_unlock(&orphan_dir->i_mutex); | ||
2216 | ocfs2_inode_unlock(orphan_dir, 1); | ||
2217 | iput(orphan_dir); | ||
2218 | } | ||
2219 | |||
2220 | brelse(orphan_dir_bh); | ||
2221 | |||
2222 | return 0; | ||
2223 | } | ||
2224 | |||
2056 | int ocfs2_create_inode_in_orphan(struct inode *dir, | 2225 | int ocfs2_create_inode_in_orphan(struct inode *dir, |
2057 | int mode, | 2226 | int mode, |
2058 | struct inode **new_inode) | 2227 | struct inode **new_inode) |
@@ -2068,6 +2237,8 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2068 | struct buffer_head *new_di_bh = NULL; | 2237 | struct buffer_head *new_di_bh = NULL; |
2069 | struct ocfs2_alloc_context *inode_ac = NULL; | 2238 | struct ocfs2_alloc_context *inode_ac = NULL; |
2070 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; | 2239 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; |
2240 | u64 uninitialized_var(di_blkno), suballoc_loc; | ||
2241 | u16 suballoc_bit; | ||
2071 | 2242 | ||
2072 | status = ocfs2_inode_lock(dir, &parent_di_bh, 1); | 2243 | status = ocfs2_inode_lock(dir, &parent_di_bh, 1); |
2073 | if (status < 0) { | 2244 | if (status < 0) { |
@@ -2076,20 +2247,9 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2076 | return status; | 2247 | return status; |
2077 | } | 2248 | } |
2078 | 2249 | ||
2079 | /* | 2250 | status = ocfs2_prep_new_orphaned_file(dir, parent_di_bh, |
2080 | * We give the orphan dir the root blkno to fake an orphan name, | 2251 | orphan_name, &orphan_dir, |
2081 | * and allocate enough space for our insertion. | 2252 | &di_blkno, &orphan_insert, &inode_ac); |
2082 | */ | ||
2083 | status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, | ||
2084 | osb->root_blkno, | ||
2085 | orphan_name, &orphan_insert); | ||
2086 | if (status < 0) { | ||
2087 | mlog_errno(status); | ||
2088 | goto leave; | ||
2089 | } | ||
2090 | |||
2091 | /* reserve an inode spot */ | ||
2092 | status = ocfs2_reserve_new_inode(osb, &inode_ac); | ||
2093 | if (status < 0) { | 2253 | if (status < 0) { |
2094 | if (status != -ENOSPC) | 2254 | if (status != -ENOSPC) |
2095 | mlog_errno(status); | 2255 | mlog_errno(status); |
@@ -2116,17 +2276,20 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, | |||
2116 | goto leave; | 2276 | goto leave; |
2117 | did_quota_inode = 1; | 2277 | did_quota_inode = 1; |
2118 | 2278 | ||
2119 | inode->i_nlink = 0; | 2279 | status = ocfs2_claim_new_inode_at_loc(handle, dir, inode_ac, |
2120 | /* do the real work now. */ | 2280 | &suballoc_loc, |
2121 | status = ocfs2_mknod_locked(osb, dir, inode, | 2281 | &suballoc_bit, di_blkno); |
2122 | 0, &new_di_bh, parent_di_bh, handle, | ||
2123 | inode_ac); | ||
2124 | if (status < 0) { | 2282 | if (status < 0) { |
2125 | mlog_errno(status); | 2283 | mlog_errno(status); |
2126 | goto leave; | 2284 | goto leave; |
2127 | } | 2285 | } |
2128 | 2286 | ||
2129 | status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name); | 2287 | inode->i_nlink = 0; |
2288 | /* do the real work now. */ | ||
2289 | status = __ocfs2_mknod_locked(dir, inode, | ||
2290 | 0, &new_di_bh, parent_di_bh, handle, | ||
2291 | inode_ac, di_blkno, suballoc_loc, | ||
2292 | suballoc_bit); | ||
2130 | if (status < 0) { | 2293 | if (status < 0) { |
2131 | mlog_errno(status); | 2294 | mlog_errno(status); |
2132 | goto leave; | 2295 | goto leave; |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index c67003b6b5a2..d8408217e3bd 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -150,26 +150,33 @@ typedef void (*ocfs2_lock_callback)(int status, unsigned long data); | |||
150 | struct ocfs2_lock_res { | 150 | struct ocfs2_lock_res { |
151 | void *l_priv; | 151 | void *l_priv; |
152 | struct ocfs2_lock_res_ops *l_ops; | 152 | struct ocfs2_lock_res_ops *l_ops; |
153 | spinlock_t l_lock; | 153 | |
154 | 154 | ||
155 | struct list_head l_blocked_list; | 155 | struct list_head l_blocked_list; |
156 | struct list_head l_mask_waiters; | 156 | struct list_head l_mask_waiters; |
157 | 157 | ||
158 | enum ocfs2_lock_type l_type; | ||
159 | unsigned long l_flags; | 158 | unsigned long l_flags; |
160 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; | 159 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; |
161 | int l_level; | ||
162 | unsigned int l_ro_holders; | 160 | unsigned int l_ro_holders; |
163 | unsigned int l_ex_holders; | 161 | unsigned int l_ex_holders; |
164 | struct ocfs2_dlm_lksb l_lksb; | 162 | unsigned char l_level; |
163 | |||
164 | /* Data packed - type enum ocfs2_lock_type */ | ||
165 | unsigned char l_type; | ||
165 | 166 | ||
166 | /* used from AST/BAST funcs. */ | 167 | /* used from AST/BAST funcs. */ |
167 | enum ocfs2_ast_action l_action; | 168 | /* Data packed - enum type ocfs2_ast_action */ |
168 | enum ocfs2_unlock_action l_unlock_action; | 169 | unsigned char l_action; |
169 | int l_requested; | 170 | /* Data packed - enum type ocfs2_unlock_action */ |
170 | int l_blocking; | 171 | unsigned char l_unlock_action; |
172 | unsigned char l_requested; | ||
173 | unsigned char l_blocking; | ||
171 | unsigned int l_pending_gen; | 174 | unsigned int l_pending_gen; |
172 | 175 | ||
176 | spinlock_t l_lock; | ||
177 | |||
178 | struct ocfs2_dlm_lksb l_lksb; | ||
179 | |||
173 | wait_queue_head_t l_event; | 180 | wait_queue_head_t l_event; |
174 | 181 | ||
175 | struct list_head l_debug_list; | 182 | struct list_head l_debug_list; |
@@ -243,7 +250,7 @@ enum ocfs2_local_alloc_state | |||
243 | 250 | ||
244 | enum ocfs2_mount_options | 251 | enum ocfs2_mount_options |
245 | { | 252 | { |
246 | OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Heartbeat started in local mode */ | 253 | OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */ |
247 | OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ | 254 | OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ |
248 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ | 255 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ |
249 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | 256 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ |
@@ -256,6 +263,10 @@ enum ocfs2_mount_options | |||
256 | control lists */ | 263 | control lists */ |
257 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ | 264 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ |
258 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ | 265 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ |
266 | OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12, /* Allow concurrent O_DIRECT | ||
267 | writes */ | ||
268 | OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */ | ||
269 | OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ | ||
259 | }; | 270 | }; |
260 | 271 | ||
261 | #define OCFS2_OSB_SOFT_RO 0x0001 | 272 | #define OCFS2_OSB_SOFT_RO 0x0001 |
@@ -277,7 +288,8 @@ struct ocfs2_super | |||
277 | struct super_block *sb; | 288 | struct super_block *sb; |
278 | struct inode *root_inode; | 289 | struct inode *root_inode; |
279 | struct inode *sys_root_inode; | 290 | struct inode *sys_root_inode; |
280 | struct inode *system_inodes[NUM_SYSTEM_INODES]; | 291 | struct inode *global_system_inodes[NUM_GLOBAL_SYSTEM_INODES]; |
292 | struct inode **local_system_inodes; | ||
281 | 293 | ||
282 | struct ocfs2_slot_info *slot_info; | 294 | struct ocfs2_slot_info *slot_info; |
283 | 295 | ||
@@ -368,6 +380,8 @@ struct ocfs2_super | |||
368 | struct ocfs2_alloc_stats alloc_stats; | 380 | struct ocfs2_alloc_stats alloc_stats; |
369 | char dev_str[20]; /* "major,minor" of the device */ | 381 | char dev_str[20]; /* "major,minor" of the device */ |
370 | 382 | ||
383 | u8 osb_stackflags; | ||
384 | |||
371 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; | 385 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; |
372 | struct ocfs2_cluster_connection *cconn; | 386 | struct ocfs2_cluster_connection *cconn; |
373 | struct ocfs2_lock_res osb_super_lockres; | 387 | struct ocfs2_lock_res osb_super_lockres; |
@@ -601,10 +615,35 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) | |||
601 | return ret; | 615 | return ret; |
602 | } | 616 | } |
603 | 617 | ||
604 | static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) | 618 | static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb) |
605 | { | 619 | { |
606 | return (osb->s_feature_incompat & | 620 | return (osb->s_feature_incompat & |
607 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK); | 621 | (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK | |
622 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)); | ||
623 | } | ||
624 | |||
625 | static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) | ||
626 | { | ||
627 | if (ocfs2_clusterinfo_valid(osb) && | ||
628 | memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, | ||
629 | OCFS2_STACK_LABEL_LEN)) | ||
630 | return 1; | ||
631 | return 0; | ||
632 | } | ||
633 | |||
634 | static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb) | ||
635 | { | ||
636 | if (ocfs2_clusterinfo_valid(osb) && | ||
637 | !memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, | ||
638 | OCFS2_STACK_LABEL_LEN)) | ||
639 | return 1; | ||
640 | return 0; | ||
641 | } | ||
642 | |||
643 | static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) | ||
644 | { | ||
645 | return ocfs2_o2cb_stack(osb) && | ||
646 | (osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT); | ||
608 | } | 647 | } |
609 | 648 | ||
610 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) | 649 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 33f1c9a8258d..c2e4f8222e2f 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -101,7 +101,8 @@ | |||
101 | | OCFS2_FEATURE_INCOMPAT_META_ECC \ | 101 | | OCFS2_FEATURE_INCOMPAT_META_ECC \ |
102 | | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ | 102 | | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ |
103 | | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ | 103 | | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ |
104 | | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG) | 104 | | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \ |
105 | | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) | ||
105 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ | 106 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ |
106 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ | 107 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ |
107 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) | 108 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) |
@@ -170,6 +171,13 @@ | |||
170 | #define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 | 171 | #define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 |
171 | 172 | ||
172 | /* | 173 | /* |
174 | * Incompat bit to indicate useable clusterinfo with stackflags for all | ||
175 | * cluster stacks (userspace adnd o2cb). If this bit is set, | ||
176 | * INCOMPAT_USERSPACE_STACK becomes superfluous and thus should not be set. | ||
177 | */ | ||
178 | #define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000 | ||
179 | |||
180 | /* | ||
173 | * backup superblock flag is used to indicate that this volume | 181 | * backup superblock flag is used to indicate that this volume |
174 | * has backup superblocks. | 182 | * has backup superblocks. |
175 | */ | 183 | */ |
@@ -235,18 +243,31 @@ | |||
235 | #define OCFS2_HAS_REFCOUNT_FL (0x0010) | 243 | #define OCFS2_HAS_REFCOUNT_FL (0x0010) |
236 | 244 | ||
237 | /* Inode attributes, keep in sync with EXT2 */ | 245 | /* Inode attributes, keep in sync with EXT2 */ |
238 | #define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ | 246 | #define OCFS2_SECRM_FL FS_SECRM_FL /* Secure deletion */ |
239 | #define OCFS2_UNRM_FL (0x00000002) /* Undelete */ | 247 | #define OCFS2_UNRM_FL FS_UNRM_FL /* Undelete */ |
240 | #define OCFS2_COMPR_FL (0x00000004) /* Compress file */ | 248 | #define OCFS2_COMPR_FL FS_COMPR_FL /* Compress file */ |
241 | #define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */ | 249 | #define OCFS2_SYNC_FL FS_SYNC_FL /* Synchronous updates */ |
242 | #define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */ | 250 | #define OCFS2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */ |
243 | #define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */ | 251 | #define OCFS2_APPEND_FL FS_APPEND_FL /* writes to file may only append */ |
244 | #define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */ | 252 | #define OCFS2_NODUMP_FL FS_NODUMP_FL /* do not dump file */ |
245 | #define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */ | 253 | #define OCFS2_NOATIME_FL FS_NOATIME_FL /* do not update atime */ |
246 | #define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */ | 254 | /* Reserved for compression usage... */ |
247 | 255 | #define OCFS2_DIRTY_FL FS_DIRTY_FL | |
248 | #define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */ | 256 | #define OCFS2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */ |
249 | #define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */ | 257 | #define OCFS2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */ |
258 | #define OCFS2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */ | ||
259 | /* End compression flags --- maybe not all used */ | ||
260 | #define OCFS2_BTREE_FL FS_BTREE_FL /* btree format dir */ | ||
261 | #define OCFS2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */ | ||
262 | #define OCFS2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */ | ||
263 | #define OCFS2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */ | ||
264 | #define OCFS2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */ | ||
265 | #define OCFS2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */ | ||
266 | #define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/ | ||
267 | #define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */ | ||
268 | |||
269 | #define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ | ||
270 | #define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ | ||
250 | 271 | ||
251 | /* | 272 | /* |
252 | * Extent record flags (e_node.leaf.flags) | 273 | * Extent record flags (e_node.leaf.flags) |
@@ -279,10 +300,13 @@ | |||
279 | #define OCFS2_VOL_UUID_LEN 16 | 300 | #define OCFS2_VOL_UUID_LEN 16 |
280 | #define OCFS2_MAX_VOL_LABEL_LEN 64 | 301 | #define OCFS2_MAX_VOL_LABEL_LEN 64 |
281 | 302 | ||
282 | /* The alternate, userspace stack fields */ | 303 | /* The cluster stack fields */ |
283 | #define OCFS2_STACK_LABEL_LEN 4 | 304 | #define OCFS2_STACK_LABEL_LEN 4 |
284 | #define OCFS2_CLUSTER_NAME_LEN 16 | 305 | #define OCFS2_CLUSTER_NAME_LEN 16 |
285 | 306 | ||
307 | /* Classic (historically speaking) cluster stack */ | ||
308 | #define OCFS2_CLASSIC_CLUSTER_STACK "o2cb" | ||
309 | |||
286 | /* Journal limits (in bytes) */ | 310 | /* Journal limits (in bytes) */ |
287 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) | 311 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) |
288 | 312 | ||
@@ -292,6 +316,11 @@ | |||
292 | */ | 316 | */ |
293 | #define OCFS2_MIN_XATTR_INLINE_SIZE 256 | 317 | #define OCFS2_MIN_XATTR_INLINE_SIZE 256 |
294 | 318 | ||
319 | /* | ||
320 | * Cluster info flags (ocfs2_cluster_info.ci_stackflags) | ||
321 | */ | ||
322 | #define OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT (0x01) | ||
323 | |||
295 | struct ocfs2_system_inode_info { | 324 | struct ocfs2_system_inode_info { |
296 | char *si_name; | 325 | char *si_name; |
297 | int si_iflags; | 326 | int si_iflags; |
@@ -309,6 +338,7 @@ enum { | |||
309 | USER_QUOTA_SYSTEM_INODE, | 338 | USER_QUOTA_SYSTEM_INODE, |
310 | GROUP_QUOTA_SYSTEM_INODE, | 339 | GROUP_QUOTA_SYSTEM_INODE, |
311 | #define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE | 340 | #define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE |
341 | #define OCFS2_FIRST_LOCAL_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE | ||
312 | ORPHAN_DIR_SYSTEM_INODE, | 342 | ORPHAN_DIR_SYSTEM_INODE, |
313 | EXTENT_ALLOC_SYSTEM_INODE, | 343 | EXTENT_ALLOC_SYSTEM_INODE, |
314 | INODE_ALLOC_SYSTEM_INODE, | 344 | INODE_ALLOC_SYSTEM_INODE, |
@@ -317,8 +347,12 @@ enum { | |||
317 | TRUNCATE_LOG_SYSTEM_INODE, | 347 | TRUNCATE_LOG_SYSTEM_INODE, |
318 | LOCAL_USER_QUOTA_SYSTEM_INODE, | 348 | LOCAL_USER_QUOTA_SYSTEM_INODE, |
319 | LOCAL_GROUP_QUOTA_SYSTEM_INODE, | 349 | LOCAL_GROUP_QUOTA_SYSTEM_INODE, |
350 | #define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE | ||
320 | NUM_SYSTEM_INODES | 351 | NUM_SYSTEM_INODES |
321 | }; | 352 | }; |
353 | #define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE | ||
354 | #define NUM_LOCAL_SYSTEM_INODES \ | ||
355 | (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE) | ||
322 | 356 | ||
323 | static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { | 357 | static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { |
324 | /* Global system inodes (single copy) */ | 358 | /* Global system inodes (single copy) */ |
@@ -347,6 +381,7 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { | |||
347 | /* Parameter passed from mount.ocfs2 to module */ | 381 | /* Parameter passed from mount.ocfs2 to module */ |
348 | #define OCFS2_HB_NONE "heartbeat=none" | 382 | #define OCFS2_HB_NONE "heartbeat=none" |
349 | #define OCFS2_HB_LOCAL "heartbeat=local" | 383 | #define OCFS2_HB_LOCAL "heartbeat=local" |
384 | #define OCFS2_HB_GLOBAL "heartbeat=global" | ||
350 | 385 | ||
351 | /* | 386 | /* |
352 | * OCFS2 directory file types. Only the low 3 bits are used. The | 387 | * OCFS2 directory file types. Only the low 3 bits are used. The |
@@ -553,9 +588,21 @@ struct ocfs2_slot_map_extended { | |||
553 | */ | 588 | */ |
554 | }; | 589 | }; |
555 | 590 | ||
591 | /* | ||
592 | * ci_stackflags is only valid if the incompat bit | ||
593 | * OCFS2_FEATURE_INCOMPAT_CLUSTERINFO is set. | ||
594 | */ | ||
556 | struct ocfs2_cluster_info { | 595 | struct ocfs2_cluster_info { |
557 | /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; | 596 | /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; |
558 | __le32 ci_reserved; | 597 | union { |
598 | __le32 ci_reserved; | ||
599 | struct { | ||
600 | __u8 ci_stackflags; | ||
601 | __u8 ci_reserved1; | ||
602 | __u8 ci_reserved2; | ||
603 | __u8 ci_reserved3; | ||
604 | }; | ||
605 | }; | ||
559 | /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; | 606 | /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; |
560 | /*18*/ | 607 | /*18*/ |
561 | }; | 608 | }; |
@@ -592,9 +639,9 @@ struct ocfs2_super_block { | |||
592 | * group header */ | 639 | * group header */ |
593 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ | 640 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ |
594 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ | 641 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ |
595 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace | 642 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Only valid if either |
596 | stack. Only valid | 643 | userspace or clusterinfo |
597 | with INCOMPAT flag. */ | 644 | INCOMPAT flag set. */ |
598 | /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size | 645 | /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size |
599 | for this fs*/ | 646 | for this fs*/ |
600 | __le16 s_reserved0; | 647 | __le16 s_reserved0; |
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 2d3420af1a83..b46f39bf7438 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h | |||
@@ -23,10 +23,10 @@ | |||
23 | /* | 23 | /* |
24 | * ioctl commands | 24 | * ioctl commands |
25 | */ | 25 | */ |
26 | #define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) | 26 | #define OCFS2_IOC_GETFLAGS FS_IOC_GETFLAGS |
27 | #define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) | 27 | #define OCFS2_IOC_SETFLAGS FS_IOC_SETFLAGS |
28 | #define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int) | 28 | #define OCFS2_IOC32_GETFLAGS FS_IOC32_GETFLAGS |
29 | #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) | 29 | #define OCFS2_IOC32_SETFLAGS FS_IOC32_SETFLAGS |
30 | 30 | ||
31 | /* | 31 | /* |
32 | * Space reservation / allocation / free ioctls and argument structure | 32 | * Space reservation / allocation / free ioctls and argument structure |
@@ -76,4 +76,99 @@ struct reflink_arguments { | |||
76 | }; | 76 | }; |
77 | #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) | 77 | #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) |
78 | 78 | ||
79 | /* Following definitions dedicated for ocfs2_info_request ioctls. */ | ||
80 | #define OCFS2_INFO_MAX_REQUEST (50) | ||
81 | #define OCFS2_TEXT_UUID_LEN (OCFS2_VOL_UUID_LEN * 2) | ||
82 | |||
83 | /* Magic number of all requests */ | ||
84 | #define OCFS2_INFO_MAGIC (0x4F32494E) | ||
85 | |||
86 | /* | ||
87 | * Always try to separate info request into small pieces to | ||
88 | * guarantee the backward&forward compatibility. | ||
89 | */ | ||
90 | struct ocfs2_info { | ||
91 | __u64 oi_requests; /* Array of __u64 pointers to requests */ | ||
92 | __u32 oi_count; /* Number of requests in info_requests */ | ||
93 | __u32 oi_pad; | ||
94 | }; | ||
95 | |||
96 | struct ocfs2_info_request { | ||
97 | /*00*/ __u32 ir_magic; /* Magic number */ | ||
98 | __u32 ir_code; /* Info request code */ | ||
99 | __u32 ir_size; /* Size of request */ | ||
100 | __u32 ir_flags; /* Request flags */ | ||
101 | /*10*/ /* Request specific fields */ | ||
102 | }; | ||
103 | |||
104 | struct ocfs2_info_clustersize { | ||
105 | struct ocfs2_info_request ic_req; | ||
106 | __u32 ic_clustersize; | ||
107 | __u32 ic_pad; | ||
108 | }; | ||
109 | |||
110 | struct ocfs2_info_blocksize { | ||
111 | struct ocfs2_info_request ib_req; | ||
112 | __u32 ib_blocksize; | ||
113 | __u32 ib_pad; | ||
114 | }; | ||
115 | |||
116 | struct ocfs2_info_maxslots { | ||
117 | struct ocfs2_info_request im_req; | ||
118 | __u32 im_max_slots; | ||
119 | __u32 im_pad; | ||
120 | }; | ||
121 | |||
122 | struct ocfs2_info_label { | ||
123 | struct ocfs2_info_request il_req; | ||
124 | __u8 il_label[OCFS2_MAX_VOL_LABEL_LEN]; | ||
125 | } __attribute__ ((packed)); | ||
126 | |||
127 | struct ocfs2_info_uuid { | ||
128 | struct ocfs2_info_request iu_req; | ||
129 | __u8 iu_uuid_str[OCFS2_TEXT_UUID_LEN + 1]; | ||
130 | } __attribute__ ((packed)); | ||
131 | |||
132 | struct ocfs2_info_fs_features { | ||
133 | struct ocfs2_info_request if_req; | ||
134 | __u32 if_compat_features; | ||
135 | __u32 if_incompat_features; | ||
136 | __u32 if_ro_compat_features; | ||
137 | __u32 if_pad; | ||
138 | }; | ||
139 | |||
140 | struct ocfs2_info_journal_size { | ||
141 | struct ocfs2_info_request ij_req; | ||
142 | __u64 ij_journal_size; | ||
143 | }; | ||
144 | |||
145 | /* Codes for ocfs2_info_request */ | ||
146 | enum ocfs2_info_type { | ||
147 | OCFS2_INFO_CLUSTERSIZE = 1, | ||
148 | OCFS2_INFO_BLOCKSIZE, | ||
149 | OCFS2_INFO_MAXSLOTS, | ||
150 | OCFS2_INFO_LABEL, | ||
151 | OCFS2_INFO_UUID, | ||
152 | OCFS2_INFO_FS_FEATURES, | ||
153 | OCFS2_INFO_JOURNAL_SIZE, | ||
154 | OCFS2_INFO_NUM_TYPES | ||
155 | }; | ||
156 | |||
157 | /* Flags for struct ocfs2_info_request */ | ||
158 | /* Filled by the caller */ | ||
159 | #define OCFS2_INFO_FL_NON_COHERENT (0x00000001) /* Cluster coherency not | ||
160 | required. This is a hint. | ||
161 | It is up to ocfs2 whether | ||
162 | the request can be fulfilled | ||
163 | without locking. */ | ||
164 | /* Filled by ocfs2 */ | ||
165 | #define OCFS2_INFO_FL_FILLED (0x40000000) /* Filesystem understood | ||
166 | this request and | ||
167 | filled in the answer */ | ||
168 | |||
169 | #define OCFS2_INFO_FL_ERROR (0x80000000) /* Error happened during | ||
170 | request handling. */ | ||
171 | |||
172 | #define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) | ||
173 | |||
79 | #endif /* OCFS2_IOCTL_H */ | 174 | #endif /* OCFS2_IOCTL_H */ |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 3ac5aa733e9c..b5f9160e93e9 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -49,6 +49,7 @@ | |||
49 | 49 | ||
50 | struct ocfs2_cow_context { | 50 | struct ocfs2_cow_context { |
51 | struct inode *inode; | 51 | struct inode *inode; |
52 | struct file *file; | ||
52 | u32 cow_start; | 53 | u32 cow_start; |
53 | u32 cow_len; | 54 | u32 cow_len; |
54 | struct ocfs2_extent_tree data_et; | 55 | struct ocfs2_extent_tree data_et; |
@@ -2436,16 +2437,26 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, | |||
2436 | len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + | 2437 | len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) + |
2437 | le32_to_cpu(rec.r_clusters)) - cpos; | 2438 | le32_to_cpu(rec.r_clusters)) - cpos; |
2438 | /* | 2439 | /* |
2439 | * If the refcount rec already exist, cool. We just need | ||
2440 | * to check whether there is a split. Otherwise we just need | ||
2441 | * to increase the refcount. | ||
2442 | * If we will insert one, increases recs_add. | ||
2443 | * | ||
2444 | * We record all the records which will be inserted to the | 2440 | * We record all the records which will be inserted to the |
2445 | * same refcount block, so that we can tell exactly whether | 2441 | * same refcount block, so that we can tell exactly whether |
2446 | * we need a new refcount block or not. | 2442 | * we need a new refcount block or not. |
2443 | * | ||
2444 | * If we will insert a new one, this is easy and only happens | ||
2445 | * during adding refcounted flag to the extent, so we don't | ||
2446 | * have a chance of spliting. We just need one record. | ||
2447 | * | ||
2448 | * If the refcount rec already exists, that would be a little | ||
2449 | * complicated. we may have to: | ||
2450 | * 1) split at the beginning if the start pos isn't aligned. | ||
2451 | * we need 1 more record in this case. | ||
2452 | * 2) split int the end if the end pos isn't aligned. | ||
2453 | * we need 1 more record in this case. | ||
2454 | * 3) split in the middle because of file system fragmentation. | ||
2455 | * we need 2 more records in this case(we can't detect this | ||
2456 | * beforehand, so always think of the worst case). | ||
2447 | */ | 2457 | */ |
2448 | if (rec.r_refcount) { | 2458 | if (rec.r_refcount) { |
2459 | recs_add += 2; | ||
2449 | /* Check whether we need a split at the beginning. */ | 2460 | /* Check whether we need a split at the beginning. */ |
2450 | if (cpos == start_cpos && | 2461 | if (cpos == start_cpos && |
2451 | cpos != le64_to_cpu(rec.r_cpos)) | 2462 | cpos != le64_to_cpu(rec.r_cpos)) |
@@ -2922,13 +2933,16 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2922 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | 2933 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); |
2923 | struct page *page; | 2934 | struct page *page; |
2924 | pgoff_t page_index; | 2935 | pgoff_t page_index; |
2925 | unsigned int from, to; | 2936 | unsigned int from, to, readahead_pages; |
2926 | loff_t offset, end, map_end; | 2937 | loff_t offset, end, map_end; |
2927 | struct address_space *mapping = context->inode->i_mapping; | 2938 | struct address_space *mapping = context->inode->i_mapping; |
2928 | 2939 | ||
2929 | mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, | 2940 | mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, |
2930 | new_cluster, new_len, cpos); | 2941 | new_cluster, new_len, cpos); |
2931 | 2942 | ||
2943 | readahead_pages = | ||
2944 | (ocfs2_cow_contig_clusters(sb) << | ||
2945 | OCFS2_SB(sb)->s_clustersize_bits) >> PAGE_CACHE_SHIFT; | ||
2932 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; | 2946 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; |
2933 | end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); | 2947 | end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); |
2934 | /* | 2948 | /* |
@@ -2950,7 +2964,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2950 | if (map_end & (PAGE_CACHE_SIZE - 1)) | 2964 | if (map_end & (PAGE_CACHE_SIZE - 1)) |
2951 | to = map_end & (PAGE_CACHE_SIZE - 1); | 2965 | to = map_end & (PAGE_CACHE_SIZE - 1); |
2952 | 2966 | ||
2953 | page = grab_cache_page(mapping, page_index); | 2967 | page = find_or_create_page(mapping, page_index, GFP_NOFS); |
2954 | 2968 | ||
2955 | /* | 2969 | /* |
2956 | * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page | 2970 | * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page |
@@ -2959,6 +2973,14 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2959 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) | 2973 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) |
2960 | BUG_ON(PageDirty(page)); | 2974 | BUG_ON(PageDirty(page)); |
2961 | 2975 | ||
2976 | if (PageReadahead(page) && context->file) { | ||
2977 | page_cache_async_readahead(mapping, | ||
2978 | &context->file->f_ra, | ||
2979 | context->file, | ||
2980 | page, page_index, | ||
2981 | readahead_pages); | ||
2982 | } | ||
2983 | |||
2962 | if (!PageUptodate(page)) { | 2984 | if (!PageUptodate(page)) { |
2963 | ret = block_read_full_page(page, ocfs2_get_block); | 2985 | ret = block_read_full_page(page, ocfs2_get_block); |
2964 | if (ret) { | 2986 | if (ret) { |
@@ -3169,7 +3191,8 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, | |||
3169 | if (map_end > end) | 3191 | if (map_end > end) |
3170 | map_end = end; | 3192 | map_end = end; |
3171 | 3193 | ||
3172 | page = grab_cache_page(context->inode->i_mapping, page_index); | 3194 | page = find_or_create_page(context->inode->i_mapping, |
3195 | page_index, GFP_NOFS); | ||
3173 | BUG_ON(!page); | 3196 | BUG_ON(!page); |
3174 | 3197 | ||
3175 | wait_on_page_writeback(page); | 3198 | wait_on_page_writeback(page); |
@@ -3398,12 +3421,35 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context) | |||
3398 | return ret; | 3421 | return ret; |
3399 | } | 3422 | } |
3400 | 3423 | ||
3424 | static void ocfs2_readahead_for_cow(struct inode *inode, | ||
3425 | struct file *file, | ||
3426 | u32 start, u32 len) | ||
3427 | { | ||
3428 | struct address_space *mapping; | ||
3429 | pgoff_t index; | ||
3430 | unsigned long num_pages; | ||
3431 | int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
3432 | |||
3433 | if (!file) | ||
3434 | return; | ||
3435 | |||
3436 | mapping = file->f_mapping; | ||
3437 | num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT; | ||
3438 | if (!num_pages) | ||
3439 | num_pages = 1; | ||
3440 | |||
3441 | index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT; | ||
3442 | page_cache_sync_readahead(mapping, &file->f_ra, file, | ||
3443 | index, num_pages); | ||
3444 | } | ||
3445 | |||
3401 | /* | 3446 | /* |
3402 | * Starting at cpos, try to CoW write_len clusters. Don't CoW | 3447 | * Starting at cpos, try to CoW write_len clusters. Don't CoW |
3403 | * past max_cpos. This will stop when it runs into a hole or an | 3448 | * past max_cpos. This will stop when it runs into a hole or an |
3404 | * unrefcounted extent. | 3449 | * unrefcounted extent. |
3405 | */ | 3450 | */ |
3406 | static int ocfs2_refcount_cow_hunk(struct inode *inode, | 3451 | static int ocfs2_refcount_cow_hunk(struct inode *inode, |
3452 | struct file *file, | ||
3407 | struct buffer_head *di_bh, | 3453 | struct buffer_head *di_bh, |
3408 | u32 cpos, u32 write_len, u32 max_cpos) | 3454 | u32 cpos, u32 write_len, u32 max_cpos) |
3409 | { | 3455 | { |
@@ -3432,6 +3478,8 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, | |||
3432 | 3478 | ||
3433 | BUG_ON(cow_len == 0); | 3479 | BUG_ON(cow_len == 0); |
3434 | 3480 | ||
3481 | ocfs2_readahead_for_cow(inode, file, cow_start, cow_len); | ||
3482 | |||
3435 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); | 3483 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); |
3436 | if (!context) { | 3484 | if (!context) { |
3437 | ret = -ENOMEM; | 3485 | ret = -ENOMEM; |
@@ -3453,6 +3501,7 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, | |||
3453 | context->ref_root_bh = ref_root_bh; | 3501 | context->ref_root_bh = ref_root_bh; |
3454 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; | 3502 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; |
3455 | context->get_clusters = ocfs2_di_get_clusters; | 3503 | context->get_clusters = ocfs2_di_get_clusters; |
3504 | context->file = file; | ||
3456 | 3505 | ||
3457 | ocfs2_init_dinode_extent_tree(&context->data_et, | 3506 | ocfs2_init_dinode_extent_tree(&context->data_et, |
3458 | INODE_CACHE(inode), di_bh); | 3507 | INODE_CACHE(inode), di_bh); |
@@ -3481,6 +3530,7 @@ out: | |||
3481 | * clusters between cpos and cpos+write_len are safe to modify. | 3530 | * clusters between cpos and cpos+write_len are safe to modify. |
3482 | */ | 3531 | */ |
3483 | int ocfs2_refcount_cow(struct inode *inode, | 3532 | int ocfs2_refcount_cow(struct inode *inode, |
3533 | struct file *file, | ||
3484 | struct buffer_head *di_bh, | 3534 | struct buffer_head *di_bh, |
3485 | u32 cpos, u32 write_len, u32 max_cpos) | 3535 | u32 cpos, u32 write_len, u32 max_cpos) |
3486 | { | 3536 | { |
@@ -3500,7 +3550,7 @@ int ocfs2_refcount_cow(struct inode *inode, | |||
3500 | num_clusters = write_len; | 3550 | num_clusters = write_len; |
3501 | 3551 | ||
3502 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { | 3552 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { |
3503 | ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, | 3553 | ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos, |
3504 | num_clusters, max_cpos); | 3554 | num_clusters, max_cpos); |
3505 | if (ret) { | 3555 | if (ret) { |
3506 | mlog_errno(ret); | 3556 | mlog_errno(ret); |
@@ -4190,8 +4240,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry, | |||
4190 | goto out; | 4240 | goto out; |
4191 | } | 4241 | } |
4192 | 4242 | ||
4193 | mutex_lock(&new_inode->i_mutex); | 4243 | mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD); |
4194 | ret = ocfs2_inode_lock(new_inode, &new_bh, 1); | 4244 | ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1, |
4245 | OI_LS_REFLINK_TARGET); | ||
4195 | if (ret) { | 4246 | if (ret) { |
4196 | mlog_errno(ret); | 4247 | mlog_errno(ret); |
4197 | goto out_unlock; | 4248 | goto out_unlock; |
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 9983ba1570e2..c8ce46f7d8e3 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h | |||
@@ -21,14 +21,14 @@ struct ocfs2_refcount_tree { | |||
21 | struct rb_node rf_node; | 21 | struct rb_node rf_node; |
22 | u64 rf_blkno; | 22 | u64 rf_blkno; |
23 | u32 rf_generation; | 23 | u32 rf_generation; |
24 | struct kref rf_getcnt; | ||
24 | struct rw_semaphore rf_sem; | 25 | struct rw_semaphore rf_sem; |
25 | struct ocfs2_lock_res rf_lockres; | 26 | struct ocfs2_lock_res rf_lockres; |
26 | struct kref rf_getcnt; | ||
27 | int rf_removed; | 27 | int rf_removed; |
28 | 28 | ||
29 | /* the following 4 fields are used by caching_info. */ | 29 | /* the following 4 fields are used by caching_info. */ |
30 | struct ocfs2_caching_info rf_ci; | ||
31 | spinlock_t rf_lock; | 30 | spinlock_t rf_lock; |
31 | struct ocfs2_caching_info rf_ci; | ||
32 | struct mutex rf_io_mutex; | 32 | struct mutex rf_io_mutex; |
33 | struct super_block *rf_sb; | 33 | struct super_block *rf_sb; |
34 | }; | 34 | }; |
@@ -52,7 +52,8 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, | |||
52 | u32 clusters, | 52 | u32 clusters, |
53 | int *credits, | 53 | int *credits, |
54 | int *ref_blocks); | 54 | int *ref_blocks); |
55 | int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, | 55 | int ocfs2_refcount_cow(struct inode *inode, |
56 | struct file *filep, struct buffer_head *di_bh, | ||
56 | u32 cpos, u32 write_len, u32 max_cpos); | 57 | u32 cpos, u32 write_len, u32 max_cpos); |
57 | 58 | ||
58 | typedef int (ocfs2_post_refcount_func)(struct inode *inode, | 59 | typedef int (ocfs2_post_refcount_func)(struct inode *inode, |
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index d8b6e4259b80..3e78db361bc7 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c | |||
@@ -732,25 +732,23 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap, | |||
732 | struct ocfs2_alloc_reservation *resv, | 732 | struct ocfs2_alloc_reservation *resv, |
733 | int *cstart, int *clen) | 733 | int *cstart, int *clen) |
734 | { | 734 | { |
735 | unsigned int wanted = *clen; | ||
736 | |||
737 | if (resv == NULL || ocfs2_resmap_disabled(resmap)) | 735 | if (resv == NULL || ocfs2_resmap_disabled(resmap)) |
738 | return -ENOSPC; | 736 | return -ENOSPC; |
739 | 737 | ||
740 | spin_lock(&resv_lock); | 738 | spin_lock(&resv_lock); |
741 | 739 | ||
742 | /* | ||
743 | * We don't want to over-allocate for temporary | ||
744 | * windows. Otherwise, we run the risk of fragmenting the | ||
745 | * allocation space. | ||
746 | */ | ||
747 | wanted = ocfs2_resv_window_bits(resmap, resv); | ||
748 | if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) | ||
749 | wanted = *clen; | ||
750 | |||
751 | if (ocfs2_resv_empty(resv)) { | 740 | if (ocfs2_resv_empty(resv)) { |
752 | mlog(0, "empty reservation, find new window\n"); | 741 | /* |
742 | * We don't want to over-allocate for temporary | ||
743 | * windows. Otherwise, we run the risk of fragmenting the | ||
744 | * allocation space. | ||
745 | */ | ||
746 | unsigned int wanted = ocfs2_resv_window_bits(resmap, resv); | ||
753 | 747 | ||
748 | if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) | ||
749 | wanted = *clen; | ||
750 | |||
751 | mlog(0, "empty reservation, find new window\n"); | ||
754 | /* | 752 | /* |
755 | * Try to get a window here. If it works, we must fall | 753 | * Try to get a window here. If it works, we must fall |
756 | * through and test the bitmap . This avoids some | 754 | * through and test the bitmap . This avoids some |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index bfbd7e9e949f..ab4e0172cc1d 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
@@ -357,7 +357,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, | |||
357 | { | 357 | { |
358 | int status = 0; | 358 | int status = 0; |
359 | u64 blkno; | 359 | u64 blkno; |
360 | unsigned long long blocks, bytes; | 360 | unsigned long long blocks, bytes = 0; |
361 | unsigned int i; | 361 | unsigned int i; |
362 | struct buffer_head *bh; | 362 | struct buffer_head *bh; |
363 | 363 | ||
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 0d3049f696c5..19965b00c43c 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c | |||
@@ -283,6 +283,8 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
283 | /* for now we only have one cluster/node, make sure we see it | 283 | /* for now we only have one cluster/node, make sure we see it |
284 | * in the heartbeat universe */ | 284 | * in the heartbeat universe */ |
285 | if (!o2hb_check_local_node_heartbeating()) { | 285 | if (!o2hb_check_local_node_heartbeating()) { |
286 | if (o2hb_global_heartbeat_active()) | ||
287 | mlog(ML_ERROR, "Global heartbeat not started\n"); | ||
286 | rc = -EINVAL; | 288 | rc = -EINVAL; |
287 | goto out; | 289 | goto out; |
288 | } | 290 | } |
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 2dc57bca0688..252e7c82f929 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/miscdevice.h> | 22 | #include <linux/miscdevice.h> |
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/smp_lock.h> | ||
26 | #include <linux/reboot.h> | 25 | #include <linux/reboot.h> |
27 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
28 | 27 | ||
@@ -612,12 +611,10 @@ static int ocfs2_control_open(struct inode *inode, struct file *file) | |||
612 | return -ENOMEM; | 611 | return -ENOMEM; |
613 | p->op_this_node = -1; | 612 | p->op_this_node = -1; |
614 | 613 | ||
615 | lock_kernel(); | ||
616 | mutex_lock(&ocfs2_control_lock); | 614 | mutex_lock(&ocfs2_control_lock); |
617 | file->private_data = p; | 615 | file->private_data = p; |
618 | list_add(&p->op_list, &ocfs2_control_private_list); | 616 | list_add(&p->op_list, &ocfs2_control_private_list); |
619 | mutex_unlock(&ocfs2_control_lock); | 617 | mutex_unlock(&ocfs2_control_lock); |
620 | unlock_kernel(); | ||
621 | 618 | ||
622 | return 0; | 619 | return 0; |
623 | } | 620 | } |
@@ -628,6 +625,7 @@ static const struct file_operations ocfs2_control_fops = { | |||
628 | .read = ocfs2_control_read, | 625 | .read = ocfs2_control_read, |
629 | .write = ocfs2_control_write, | 626 | .write = ocfs2_control_write, |
630 | .owner = THIS_MODULE, | 627 | .owner = THIS_MODULE, |
628 | .llseek = default_llseek, | ||
631 | }; | 629 | }; |
632 | 630 | ||
633 | static struct miscdevice ocfs2_control_device = { | 631 | static struct miscdevice ocfs2_control_device = { |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index a8e6a95a353f..5fed60de7630 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -57,11 +57,28 @@ struct ocfs2_suballoc_result { | |||
57 | u64 sr_bg_blkno; /* The bg we allocated from. Set | 57 | u64 sr_bg_blkno; /* The bg we allocated from. Set |
58 | to 0 when a block group is | 58 | to 0 when a block group is |
59 | contiguous. */ | 59 | contiguous. */ |
60 | u64 sr_bg_stable_blkno; /* | ||
61 | * Doesn't change, always | ||
62 | * set to target block | ||
63 | * group descriptor | ||
64 | * block. | ||
65 | */ | ||
60 | u64 sr_blkno; /* The first allocated block */ | 66 | u64 sr_blkno; /* The first allocated block */ |
61 | unsigned int sr_bit_offset; /* The bit in the bg */ | 67 | unsigned int sr_bit_offset; /* The bit in the bg */ |
62 | unsigned int sr_bits; /* How many bits we claimed */ | 68 | unsigned int sr_bits; /* How many bits we claimed */ |
63 | }; | 69 | }; |
64 | 70 | ||
71 | static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res) | ||
72 | { | ||
73 | if (res->sr_blkno == 0) | ||
74 | return 0; | ||
75 | |||
76 | if (res->sr_bg_blkno) | ||
77 | return res->sr_bg_blkno; | ||
78 | |||
79 | return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset); | ||
80 | } | ||
81 | |||
65 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); | 82 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); |
66 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); | 83 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); |
67 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); | 84 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); |
@@ -138,6 +155,10 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) | |||
138 | brelse(ac->ac_bh); | 155 | brelse(ac->ac_bh); |
139 | ac->ac_bh = NULL; | 156 | ac->ac_bh = NULL; |
140 | ac->ac_resv = NULL; | 157 | ac->ac_resv = NULL; |
158 | if (ac->ac_find_loc_priv) { | ||
159 | kfree(ac->ac_find_loc_priv); | ||
160 | ac->ac_find_loc_priv = NULL; | ||
161 | } | ||
141 | } | 162 | } |
142 | 163 | ||
143 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | 164 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) |
@@ -336,7 +357,7 @@ out: | |||
336 | static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, | 357 | static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, |
337 | struct ocfs2_group_desc *bg, | 358 | struct ocfs2_group_desc *bg, |
338 | struct ocfs2_chain_list *cl, | 359 | struct ocfs2_chain_list *cl, |
339 | u64 p_blkno, u32 clusters) | 360 | u64 p_blkno, unsigned int clusters) |
340 | { | 361 | { |
341 | struct ocfs2_extent_list *el = &bg->bg_list; | 362 | struct ocfs2_extent_list *el = &bg->bg_list; |
342 | struct ocfs2_extent_rec *rec; | 363 | struct ocfs2_extent_rec *rec; |
@@ -348,7 +369,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, | |||
348 | rec->e_blkno = cpu_to_le64(p_blkno); | 369 | rec->e_blkno = cpu_to_le64(p_blkno); |
349 | rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / | 370 | rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / |
350 | le16_to_cpu(cl->cl_bpc)); | 371 | le16_to_cpu(cl->cl_bpc)); |
351 | rec->e_leaf_clusters = cpu_to_le32(clusters); | 372 | rec->e_leaf_clusters = cpu_to_le16(clusters); |
352 | le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); | 373 | le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); |
353 | le16_add_cpu(&bg->bg_free_bits_count, | 374 | le16_add_cpu(&bg->bg_free_bits_count, |
354 | clusters * le16_to_cpu(cl->cl_bpc)); | 375 | clusters * le16_to_cpu(cl->cl_bpc)); |
@@ -1359,6 +1380,14 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, | |||
1359 | } | 1380 | } |
1360 | 1381 | ||
1361 | le16_add_cpu(&bg->bg_free_bits_count, -num_bits); | 1382 | le16_add_cpu(&bg->bg_free_bits_count, -num_bits); |
1383 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { | ||
1384 | ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" | ||
1385 | " count %u but claims %u are freed. num_bits %d", | ||
1386 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | ||
1387 | le16_to_cpu(bg->bg_bits), | ||
1388 | le16_to_cpu(bg->bg_free_bits_count), num_bits); | ||
1389 | return -EROFS; | ||
1390 | } | ||
1362 | while(num_bits--) | 1391 | while(num_bits--) |
1363 | ocfs2_set_bit(bit_off++, bitmap); | 1392 | ocfs2_set_bit(bit_off++, bitmap); |
1364 | 1393 | ||
@@ -1678,6 +1707,15 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
1678 | if (!ret) | 1707 | if (!ret) |
1679 | ocfs2_bg_discontig_fix_result(ac, gd, res); | 1708 | ocfs2_bg_discontig_fix_result(ac, gd, res); |
1680 | 1709 | ||
1710 | /* | ||
1711 | * sr_bg_blkno might have been changed by | ||
1712 | * ocfs2_bg_discontig_fix_result | ||
1713 | */ | ||
1714 | res->sr_bg_stable_blkno = group_bh->b_blocknr; | ||
1715 | |||
1716 | if (ac->ac_find_loc_only) | ||
1717 | goto out_loc_only; | ||
1718 | |||
1681 | ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, | 1719 | ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, |
1682 | res->sr_bits, | 1720 | res->sr_bits, |
1683 | le16_to_cpu(gd->bg_chain)); | 1721 | le16_to_cpu(gd->bg_chain)); |
@@ -1691,6 +1729,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
1691 | if (ret < 0) | 1729 | if (ret < 0) |
1692 | mlog_errno(ret); | 1730 | mlog_errno(ret); |
1693 | 1731 | ||
1732 | out_loc_only: | ||
1694 | *bits_left = le16_to_cpu(gd->bg_free_bits_count); | 1733 | *bits_left = le16_to_cpu(gd->bg_free_bits_count); |
1695 | 1734 | ||
1696 | out: | 1735 | out: |
@@ -1708,7 +1747,6 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1708 | { | 1747 | { |
1709 | int status; | 1748 | int status; |
1710 | u16 chain; | 1749 | u16 chain; |
1711 | u32 tmp_used; | ||
1712 | u64 next_group; | 1750 | u64 next_group; |
1713 | struct inode *alloc_inode = ac->ac_inode; | 1751 | struct inode *alloc_inode = ac->ac_inode; |
1714 | struct buffer_head *group_bh = NULL; | 1752 | struct buffer_head *group_bh = NULL; |
@@ -1770,6 +1808,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1770 | if (!status) | 1808 | if (!status) |
1771 | ocfs2_bg_discontig_fix_result(ac, bg, res); | 1809 | ocfs2_bg_discontig_fix_result(ac, bg, res); |
1772 | 1810 | ||
1811 | /* | ||
1812 | * sr_bg_blkno might have been changed by | ||
1813 | * ocfs2_bg_discontig_fix_result | ||
1814 | */ | ||
1815 | res->sr_bg_stable_blkno = group_bh->b_blocknr; | ||
1773 | 1816 | ||
1774 | /* | 1817 | /* |
1775 | * Keep track of previous block descriptor read. When | 1818 | * Keep track of previous block descriptor read. When |
@@ -1796,22 +1839,17 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1796 | } | 1839 | } |
1797 | } | 1840 | } |
1798 | 1841 | ||
1799 | /* Ok, claim our bits now: set the info on dinode, chainlist | 1842 | if (ac->ac_find_loc_only) |
1800 | * and then the group */ | 1843 | goto out_loc_only; |
1801 | status = ocfs2_journal_access_di(handle, | 1844 | |
1802 | INODE_CACHE(alloc_inode), | 1845 | status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, |
1803 | ac->ac_bh, | 1846 | ac->ac_bh, res->sr_bits, |
1804 | OCFS2_JOURNAL_ACCESS_WRITE); | 1847 | chain); |
1805 | if (status < 0) { | 1848 | if (status) { |
1806 | mlog_errno(status); | 1849 | mlog_errno(status); |
1807 | goto bail; | 1850 | goto bail; |
1808 | } | 1851 | } |
1809 | 1852 | ||
1810 | tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); | ||
1811 | fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used); | ||
1812 | le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits); | ||
1813 | ocfs2_journal_dirty(handle, ac->ac_bh); | ||
1814 | |||
1815 | status = ocfs2_block_group_set_bits(handle, | 1853 | status = ocfs2_block_group_set_bits(handle, |
1816 | alloc_inode, | 1854 | alloc_inode, |
1817 | bg, | 1855 | bg, |
@@ -1826,6 +1864,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1826 | mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, | 1864 | mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, |
1827 | (unsigned long long)le64_to_cpu(fe->i_blkno)); | 1865 | (unsigned long long)le64_to_cpu(fe->i_blkno)); |
1828 | 1866 | ||
1867 | out_loc_only: | ||
1829 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); | 1868 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); |
1830 | bail: | 1869 | bail: |
1831 | brelse(group_bh); | 1870 | brelse(group_bh); |
@@ -1845,6 +1884,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, | |||
1845 | int status; | 1884 | int status; |
1846 | u16 victim, i; | 1885 | u16 victim, i; |
1847 | u16 bits_left = 0; | 1886 | u16 bits_left = 0; |
1887 | u64 hint = ac->ac_last_group; | ||
1848 | struct ocfs2_chain_list *cl; | 1888 | struct ocfs2_chain_list *cl; |
1849 | struct ocfs2_dinode *fe; | 1889 | struct ocfs2_dinode *fe; |
1850 | 1890 | ||
@@ -1872,7 +1912,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, | |||
1872 | goto bail; | 1912 | goto bail; |
1873 | } | 1913 | } |
1874 | 1914 | ||
1875 | res->sr_bg_blkno = ac->ac_last_group; | 1915 | res->sr_bg_blkno = hint; |
1876 | if (res->sr_bg_blkno) { | 1916 | if (res->sr_bg_blkno) { |
1877 | /* Attempt to short-circuit the usual search mechanism | 1917 | /* Attempt to short-circuit the usual search mechanism |
1878 | * by jumping straight to the most recently used | 1918 | * by jumping straight to the most recently used |
@@ -1896,8 +1936,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, | |||
1896 | 1936 | ||
1897 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, | 1937 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, |
1898 | res, &bits_left); | 1938 | res, &bits_left); |
1899 | if (!status) | 1939 | if (!status) { |
1940 | hint = ocfs2_group_from_res(res); | ||
1900 | goto set_hint; | 1941 | goto set_hint; |
1942 | } | ||
1901 | if (status < 0 && status != -ENOSPC) { | 1943 | if (status < 0 && status != -ENOSPC) { |
1902 | mlog_errno(status); | 1944 | mlog_errno(status); |
1903 | goto bail; | 1945 | goto bail; |
@@ -1920,8 +1962,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, | |||
1920 | ac->ac_chain = i; | 1962 | ac->ac_chain = i; |
1921 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, | 1963 | status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, |
1922 | res, &bits_left); | 1964 | res, &bits_left); |
1923 | if (!status) | 1965 | if (!status) { |
1966 | hint = ocfs2_group_from_res(res); | ||
1924 | break; | 1967 | break; |
1968 | } | ||
1925 | if (status < 0 && status != -ENOSPC) { | 1969 | if (status < 0 && status != -ENOSPC) { |
1926 | mlog_errno(status); | 1970 | mlog_errno(status); |
1927 | goto bail; | 1971 | goto bail; |
@@ -1936,7 +1980,7 @@ set_hint: | |||
1936 | if (bits_left < min_bits) | 1980 | if (bits_left < min_bits) |
1937 | ac->ac_last_group = 0; | 1981 | ac->ac_last_group = 0; |
1938 | else | 1982 | else |
1939 | ac->ac_last_group = res->sr_bg_blkno; | 1983 | ac->ac_last_group = hint; |
1940 | } | 1984 | } |
1941 | 1985 | ||
1942 | bail: | 1986 | bail: |
@@ -2016,6 +2060,136 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir, | |||
2016 | OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; | 2060 | OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; |
2017 | } | 2061 | } |
2018 | 2062 | ||
2063 | int ocfs2_find_new_inode_loc(struct inode *dir, | ||
2064 | struct buffer_head *parent_fe_bh, | ||
2065 | struct ocfs2_alloc_context *ac, | ||
2066 | u64 *fe_blkno) | ||
2067 | { | ||
2068 | int ret; | ||
2069 | handle_t *handle = NULL; | ||
2070 | struct ocfs2_suballoc_result *res; | ||
2071 | |||
2072 | BUG_ON(!ac); | ||
2073 | BUG_ON(ac->ac_bits_given != 0); | ||
2074 | BUG_ON(ac->ac_bits_wanted != 1); | ||
2075 | BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); | ||
2076 | |||
2077 | res = kzalloc(sizeof(*res), GFP_NOFS); | ||
2078 | if (res == NULL) { | ||
2079 | ret = -ENOMEM; | ||
2080 | mlog_errno(ret); | ||
2081 | goto out; | ||
2082 | } | ||
2083 | |||
2084 | ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac); | ||
2085 | |||
2086 | /* | ||
2087 | * The handle started here is for chain relink. Alternatively, | ||
2088 | * we could just disable relink for these calls. | ||
2089 | */ | ||
2090 | handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC); | ||
2091 | if (IS_ERR(handle)) { | ||
2092 | ret = PTR_ERR(handle); | ||
2093 | handle = NULL; | ||
2094 | mlog_errno(ret); | ||
2095 | goto out; | ||
2096 | } | ||
2097 | |||
2098 | /* | ||
2099 | * This will instruct ocfs2_claim_suballoc_bits and | ||
2100 | * ocfs2_search_one_group to search but save actual allocation | ||
2101 | * for later. | ||
2102 | */ | ||
2103 | ac->ac_find_loc_only = 1; | ||
2104 | |||
2105 | ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res); | ||
2106 | if (ret < 0) { | ||
2107 | mlog_errno(ret); | ||
2108 | goto out; | ||
2109 | } | ||
2110 | |||
2111 | ac->ac_find_loc_priv = res; | ||
2112 | *fe_blkno = res->sr_blkno; | ||
2113 | |||
2114 | out: | ||
2115 | if (handle) | ||
2116 | ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle); | ||
2117 | |||
2118 | if (ret) | ||
2119 | kfree(res); | ||
2120 | |||
2121 | return ret; | ||
2122 | } | ||
2123 | |||
2124 | int ocfs2_claim_new_inode_at_loc(handle_t *handle, | ||
2125 | struct inode *dir, | ||
2126 | struct ocfs2_alloc_context *ac, | ||
2127 | u64 *suballoc_loc, | ||
2128 | u16 *suballoc_bit, | ||
2129 | u64 di_blkno) | ||
2130 | { | ||
2131 | int ret; | ||
2132 | u16 chain; | ||
2133 | struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv; | ||
2134 | struct buffer_head *bg_bh = NULL; | ||
2135 | struct ocfs2_group_desc *bg; | ||
2136 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data; | ||
2137 | |||
2138 | /* | ||
2139 | * Since di_blkno is being passed back in, we check for any | ||
2140 | * inconsistencies which may have happened between | ||
2141 | * calls. These are code bugs as di_blkno is not expected to | ||
2142 | * change once returned from ocfs2_find_new_inode_loc() | ||
2143 | */ | ||
2144 | BUG_ON(res->sr_blkno != di_blkno); | ||
2145 | |||
2146 | ret = ocfs2_read_group_descriptor(ac->ac_inode, di, | ||
2147 | res->sr_bg_stable_blkno, &bg_bh); | ||
2148 | if (ret) { | ||
2149 | mlog_errno(ret); | ||
2150 | goto out; | ||
2151 | } | ||
2152 | |||
2153 | bg = (struct ocfs2_group_desc *) bg_bh->b_data; | ||
2154 | chain = le16_to_cpu(bg->bg_chain); | ||
2155 | |||
2156 | ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle, | ||
2157 | ac->ac_bh, res->sr_bits, | ||
2158 | chain); | ||
2159 | if (ret) { | ||
2160 | mlog_errno(ret); | ||
2161 | goto out; | ||
2162 | } | ||
2163 | |||
2164 | ret = ocfs2_block_group_set_bits(handle, | ||
2165 | ac->ac_inode, | ||
2166 | bg, | ||
2167 | bg_bh, | ||
2168 | res->sr_bit_offset, | ||
2169 | res->sr_bits); | ||
2170 | if (ret < 0) { | ||
2171 | mlog_errno(ret); | ||
2172 | goto out; | ||
2173 | } | ||
2174 | |||
2175 | mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, | ||
2176 | (unsigned long long)di_blkno); | ||
2177 | |||
2178 | atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); | ||
2179 | |||
2180 | BUG_ON(res->sr_bits != 1); | ||
2181 | |||
2182 | *suballoc_loc = res->sr_bg_blkno; | ||
2183 | *suballoc_bit = res->sr_bit_offset; | ||
2184 | ac->ac_bits_given++; | ||
2185 | ocfs2_save_inode_ac_group(dir, ac); | ||
2186 | |||
2187 | out: | ||
2188 | brelse(bg_bh); | ||
2189 | |||
2190 | return ret; | ||
2191 | } | ||
2192 | |||
2019 | int ocfs2_claim_new_inode(handle_t *handle, | 2193 | int ocfs2_claim_new_inode(handle_t *handle, |
2020 | struct inode *dir, | 2194 | struct inode *dir, |
2021 | struct buffer_head *parent_fe_bh, | 2195 | struct buffer_head *parent_fe_bh, |
@@ -2253,6 +2427,14 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, | |||
2253 | (unsigned long *) undo_bg->bg_bitmap); | 2427 | (unsigned long *) undo_bg->bg_bitmap); |
2254 | } | 2428 | } |
2255 | le16_add_cpu(&bg->bg_free_bits_count, num_bits); | 2429 | le16_add_cpu(&bg->bg_free_bits_count, num_bits); |
2430 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { | ||
2431 | ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" | ||
2432 | " count %u but claims %u are freed. num_bits %d", | ||
2433 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | ||
2434 | le16_to_cpu(bg->bg_bits), | ||
2435 | le16_to_cpu(bg->bg_free_bits_count), num_bits); | ||
2436 | return -EROFS; | ||
2437 | } | ||
2256 | 2438 | ||
2257 | if (undo_fn) | 2439 | if (undo_fn) |
2258 | jbd_unlock_bh_state(group_bh); | 2440 | jbd_unlock_bh_state(group_bh); |
@@ -2567,7 +2749,8 @@ out: | |||
2567 | * suballoc_bit. | 2749 | * suballoc_bit. |
2568 | */ | 2750 | */ |
2569 | static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, | 2751 | static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, |
2570 | u16 *suballoc_slot, u16 *suballoc_bit) | 2752 | u16 *suballoc_slot, u64 *group_blkno, |
2753 | u16 *suballoc_bit) | ||
2571 | { | 2754 | { |
2572 | int status; | 2755 | int status; |
2573 | struct buffer_head *inode_bh = NULL; | 2756 | struct buffer_head *inode_bh = NULL; |
@@ -2604,6 +2787,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, | |||
2604 | *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); | 2787 | *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); |
2605 | if (suballoc_bit) | 2788 | if (suballoc_bit) |
2606 | *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); | 2789 | *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); |
2790 | if (group_blkno) | ||
2791 | *group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc); | ||
2607 | 2792 | ||
2608 | bail: | 2793 | bail: |
2609 | brelse(inode_bh); | 2794 | brelse(inode_bh); |
@@ -2621,7 +2806,8 @@ bail: | |||
2621 | */ | 2806 | */ |
2622 | static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, | 2807 | static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, |
2623 | struct inode *suballoc, | 2808 | struct inode *suballoc, |
2624 | struct buffer_head *alloc_bh, u64 blkno, | 2809 | struct buffer_head *alloc_bh, |
2810 | u64 group_blkno, u64 blkno, | ||
2625 | u16 bit, int *res) | 2811 | u16 bit, int *res) |
2626 | { | 2812 | { |
2627 | struct ocfs2_dinode *alloc_di; | 2813 | struct ocfs2_dinode *alloc_di; |
@@ -2642,10 +2828,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, | |||
2642 | goto bail; | 2828 | goto bail; |
2643 | } | 2829 | } |
2644 | 2830 | ||
2645 | if (alloc_di->i_suballoc_loc) | 2831 | bg_blkno = group_blkno ? group_blkno : |
2646 | bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc); | 2832 | ocfs2_which_suballoc_group(blkno, bit); |
2647 | else | ||
2648 | bg_blkno = ocfs2_which_suballoc_group(blkno, bit); | ||
2649 | status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno, | 2833 | status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno, |
2650 | &group_bh); | 2834 | &group_bh); |
2651 | if (status < 0) { | 2835 | if (status < 0) { |
@@ -2680,6 +2864,7 @@ bail: | |||
2680 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) | 2864 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) |
2681 | { | 2865 | { |
2682 | int status; | 2866 | int status; |
2867 | u64 group_blkno = 0; | ||
2683 | u16 suballoc_bit = 0, suballoc_slot = 0; | 2868 | u16 suballoc_bit = 0, suballoc_slot = 0; |
2684 | struct inode *inode_alloc_inode; | 2869 | struct inode *inode_alloc_inode; |
2685 | struct buffer_head *alloc_bh = NULL; | 2870 | struct buffer_head *alloc_bh = NULL; |
@@ -2687,7 +2872,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) | |||
2687 | mlog_entry("blkno: %llu", (unsigned long long)blkno); | 2872 | mlog_entry("blkno: %llu", (unsigned long long)blkno); |
2688 | 2873 | ||
2689 | status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, | 2874 | status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, |
2690 | &suballoc_bit); | 2875 | &group_blkno, &suballoc_bit); |
2691 | if (status < 0) { | 2876 | if (status < 0) { |
2692 | mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status); | 2877 | mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status); |
2693 | goto bail; | 2878 | goto bail; |
@@ -2715,7 +2900,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) | |||
2715 | } | 2900 | } |
2716 | 2901 | ||
2717 | status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, | 2902 | status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, |
2718 | blkno, suballoc_bit, res); | 2903 | group_blkno, blkno, suballoc_bit, res); |
2719 | if (status < 0) | 2904 | if (status < 0) |
2720 | mlog(ML_ERROR, "test suballoc bit failed %d\n", status); | 2905 | mlog(ML_ERROR, "test suballoc bit failed %d\n", status); |
2721 | 2906 | ||
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index a017dd3ee7d9..b8afabfeede4 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -56,6 +56,9 @@ struct ocfs2_alloc_context { | |||
56 | u64 ac_max_block; /* Highest block number to allocate. 0 is | 56 | u64 ac_max_block; /* Highest block number to allocate. 0 is |
57 | is the same as ~0 - unlimited */ | 57 | is the same as ~0 - unlimited */ |
58 | 58 | ||
59 | int ac_find_loc_only; /* hack for reflink operation ordering */ | ||
60 | struct ocfs2_suballoc_result *ac_find_loc_priv; /* */ | ||
61 | |||
59 | struct ocfs2_alloc_reservation *ac_resv; | 62 | struct ocfs2_alloc_reservation *ac_resv; |
60 | }; | 63 | }; |
61 | 64 | ||
@@ -197,4 +200,22 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, | |||
197 | struct ocfs2_alloc_context **meta_ac); | 200 | struct ocfs2_alloc_context **meta_ac); |
198 | 201 | ||
199 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res); | 202 | int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res); |
203 | |||
204 | |||
205 | |||
206 | /* | ||
207 | * The following two interfaces are for ocfs2_create_inode_in_orphan(). | ||
208 | */ | ||
209 | int ocfs2_find_new_inode_loc(struct inode *dir, | ||
210 | struct buffer_head *parent_fe_bh, | ||
211 | struct ocfs2_alloc_context *ac, | ||
212 | u64 *fe_blkno); | ||
213 | |||
214 | int ocfs2_claim_new_inode_at_loc(handle_t *handle, | ||
215 | struct inode *dir, | ||
216 | struct ocfs2_alloc_context *ac, | ||
217 | u64 *suballoc_loc, | ||
218 | u16 *suballoc_bit, | ||
219 | u64 di_blkno); | ||
220 | |||
200 | #endif /* _CHAINALLOC_H_ */ | 221 | #endif /* _CHAINALLOC_H_ */ |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index fa1be1b304d1..56f0cb395820 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -162,6 +162,7 @@ enum { | |||
162 | Opt_nointr, | 162 | Opt_nointr, |
163 | Opt_hb_none, | 163 | Opt_hb_none, |
164 | Opt_hb_local, | 164 | Opt_hb_local, |
165 | Opt_hb_global, | ||
165 | Opt_data_ordered, | 166 | Opt_data_ordered, |
166 | Opt_data_writeback, | 167 | Opt_data_writeback, |
167 | Opt_atime_quantum, | 168 | Opt_atime_quantum, |
@@ -177,6 +178,8 @@ enum { | |||
177 | Opt_noacl, | 178 | Opt_noacl, |
178 | Opt_usrquota, | 179 | Opt_usrquota, |
179 | Opt_grpquota, | 180 | Opt_grpquota, |
181 | Opt_coherency_buffered, | ||
182 | Opt_coherency_full, | ||
180 | Opt_resv_level, | 183 | Opt_resv_level, |
181 | Opt_dir_resv_level, | 184 | Opt_dir_resv_level, |
182 | Opt_err, | 185 | Opt_err, |
@@ -190,6 +193,7 @@ static const match_table_t tokens = { | |||
190 | {Opt_nointr, "nointr"}, | 193 | {Opt_nointr, "nointr"}, |
191 | {Opt_hb_none, OCFS2_HB_NONE}, | 194 | {Opt_hb_none, OCFS2_HB_NONE}, |
192 | {Opt_hb_local, OCFS2_HB_LOCAL}, | 195 | {Opt_hb_local, OCFS2_HB_LOCAL}, |
196 | {Opt_hb_global, OCFS2_HB_GLOBAL}, | ||
193 | {Opt_data_ordered, "data=ordered"}, | 197 | {Opt_data_ordered, "data=ordered"}, |
194 | {Opt_data_writeback, "data=writeback"}, | 198 | {Opt_data_writeback, "data=writeback"}, |
195 | {Opt_atime_quantum, "atime_quantum=%u"}, | 199 | {Opt_atime_quantum, "atime_quantum=%u"}, |
@@ -205,6 +209,8 @@ static const match_table_t tokens = { | |||
205 | {Opt_noacl, "noacl"}, | 209 | {Opt_noacl, "noacl"}, |
206 | {Opt_usrquota, "usrquota"}, | 210 | {Opt_usrquota, "usrquota"}, |
207 | {Opt_grpquota, "grpquota"}, | 211 | {Opt_grpquota, "grpquota"}, |
212 | {Opt_coherency_buffered, "coherency=buffered"}, | ||
213 | {Opt_coherency_full, "coherency=full"}, | ||
208 | {Opt_resv_level, "resv_level=%u"}, | 214 | {Opt_resv_level, "resv_level=%u"}, |
209 | {Opt_dir_resv_level, "dir_resv_level=%u"}, | 215 | {Opt_dir_resv_level, "dir_resv_level=%u"}, |
210 | {Opt_err, NULL} | 216 | {Opt_err, NULL} |
@@ -514,11 +520,11 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb) | |||
514 | 520 | ||
515 | mlog_entry_void(); | 521 | mlog_entry_void(); |
516 | 522 | ||
517 | for (i = 0; i < NUM_SYSTEM_INODES; i++) { | 523 | for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) { |
518 | inode = osb->system_inodes[i]; | 524 | inode = osb->global_system_inodes[i]; |
519 | if (inode) { | 525 | if (inode) { |
520 | iput(inode); | 526 | iput(inode); |
521 | osb->system_inodes[i] = NULL; | 527 | osb->global_system_inodes[i] = NULL; |
522 | } | 528 | } |
523 | } | 529 | } |
524 | 530 | ||
@@ -534,6 +540,20 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb) | |||
534 | osb->root_inode = NULL; | 540 | osb->root_inode = NULL; |
535 | } | 541 | } |
536 | 542 | ||
543 | if (!osb->local_system_inodes) | ||
544 | goto out; | ||
545 | |||
546 | for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) { | ||
547 | if (osb->local_system_inodes[i]) { | ||
548 | iput(osb->local_system_inodes[i]); | ||
549 | osb->local_system_inodes[i] = NULL; | ||
550 | } | ||
551 | } | ||
552 | |||
553 | kfree(osb->local_system_inodes); | ||
554 | osb->local_system_inodes = NULL; | ||
555 | |||
556 | out: | ||
537 | mlog_exit(0); | 557 | mlog_exit(0); |
538 | } | 558 | } |
539 | 559 | ||
@@ -608,8 +628,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
608 | int ret = 0; | 628 | int ret = 0; |
609 | struct mount_options parsed_options; | 629 | struct mount_options parsed_options; |
610 | struct ocfs2_super *osb = OCFS2_SB(sb); | 630 | struct ocfs2_super *osb = OCFS2_SB(sb); |
611 | 631 | u32 tmp; | |
612 | lock_kernel(); | ||
613 | 632 | ||
614 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || | 633 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || |
615 | !ocfs2_check_set_options(sb, &parsed_options)) { | 634 | !ocfs2_check_set_options(sb, &parsed_options)) { |
@@ -617,8 +636,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
617 | goto out; | 636 | goto out; |
618 | } | 637 | } |
619 | 638 | ||
620 | if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) != | 639 | tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | |
621 | (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) { | 640 | OCFS2_MOUNT_HB_NONE; |
641 | if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { | ||
622 | ret = -EINVAL; | 642 | ret = -EINVAL; |
623 | mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); | 643 | mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); |
624 | goto out; | 644 | goto out; |
@@ -717,7 +737,6 @@ unlock_osb: | |||
717 | MS_POSIXACL : 0); | 737 | MS_POSIXACL : 0); |
718 | } | 738 | } |
719 | out: | 739 | out: |
720 | unlock_kernel(); | ||
721 | return ret; | 740 | return ret; |
722 | } | 741 | } |
723 | 742 | ||
@@ -809,23 +828,29 @@ bail: | |||
809 | 828 | ||
810 | static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) | 829 | static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) |
811 | { | 830 | { |
812 | if (ocfs2_mount_local(osb)) { | 831 | u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL; |
813 | if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { | 832 | |
833 | if (osb->s_mount_opt & hb_enabled) { | ||
834 | if (ocfs2_mount_local(osb)) { | ||
814 | mlog(ML_ERROR, "Cannot heartbeat on a locally " | 835 | mlog(ML_ERROR, "Cannot heartbeat on a locally " |
815 | "mounted device.\n"); | 836 | "mounted device.\n"); |
816 | return -EINVAL; | 837 | return -EINVAL; |
817 | } | 838 | } |
818 | } | 839 | if (ocfs2_userspace_stack(osb)) { |
819 | |||
820 | if (ocfs2_userspace_stack(osb)) { | ||
821 | if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { | ||
822 | mlog(ML_ERROR, "Userspace stack expected, but " | 840 | mlog(ML_ERROR, "Userspace stack expected, but " |
823 | "o2cb heartbeat arguments passed to mount\n"); | 841 | "o2cb heartbeat arguments passed to mount\n"); |
824 | return -EINVAL; | 842 | return -EINVAL; |
825 | } | 843 | } |
844 | if (((osb->s_mount_opt & OCFS2_MOUNT_HB_GLOBAL) && | ||
845 | !ocfs2_cluster_o2cb_global_heartbeat(osb)) || | ||
846 | ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) && | ||
847 | ocfs2_cluster_o2cb_global_heartbeat(osb))) { | ||
848 | mlog(ML_ERROR, "Mismatching o2cb heartbeat modes\n"); | ||
849 | return -EINVAL; | ||
850 | } | ||
826 | } | 851 | } |
827 | 852 | ||
828 | if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { | 853 | if (!(osb->s_mount_opt & hb_enabled)) { |
829 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && | 854 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && |
830 | !ocfs2_userspace_stack(osb)) { | 855 | !ocfs2_userspace_stack(osb)) { |
831 | mlog(ML_ERROR, "Heartbeat has to be started to mount " | 856 | mlog(ML_ERROR, "Heartbeat has to be started to mount " |
@@ -1291,6 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1291 | { | 1316 | { |
1292 | int status; | 1317 | int status; |
1293 | char *p; | 1318 | char *p; |
1319 | u32 tmp; | ||
1294 | 1320 | ||
1295 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, | 1321 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, |
1296 | options ? options : "(none)"); | 1322 | options ? options : "(none)"); |
@@ -1322,7 +1348,10 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1322 | mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; | 1348 | mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; |
1323 | break; | 1349 | break; |
1324 | case Opt_hb_none: | 1350 | case Opt_hb_none: |
1325 | mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL; | 1351 | mopt->mount_opt |= OCFS2_MOUNT_HB_NONE; |
1352 | break; | ||
1353 | case Opt_hb_global: | ||
1354 | mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL; | ||
1326 | break; | 1355 | break; |
1327 | case Opt_barrier: | 1356 | case Opt_barrier: |
1328 | if (match_int(&args[0], &option)) { | 1357 | if (match_int(&args[0], &option)) { |
@@ -1438,6 +1467,12 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1438 | case Opt_grpquota: | 1467 | case Opt_grpquota: |
1439 | mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; | 1468 | mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; |
1440 | break; | 1469 | break; |
1470 | case Opt_coherency_buffered: | ||
1471 | mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED; | ||
1472 | break; | ||
1473 | case Opt_coherency_full: | ||
1474 | mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED; | ||
1475 | break; | ||
1441 | case Opt_acl: | 1476 | case Opt_acl: |
1442 | mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; | 1477 | mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; |
1443 | mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; | 1478 | mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; |
@@ -1477,6 +1512,15 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1477 | } | 1512 | } |
1478 | } | 1513 | } |
1479 | 1514 | ||
1515 | /* Ensure only one heartbeat mode */ | ||
1516 | tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | | ||
1517 | OCFS2_MOUNT_HB_NONE); | ||
1518 | if (hweight32(tmp) != 1) { | ||
1519 | mlog(ML_ERROR, "Invalid heartbeat mount options\n"); | ||
1520 | status = 0; | ||
1521 | goto bail; | ||
1522 | } | ||
1523 | |||
1480 | status = 1; | 1524 | status = 1; |
1481 | 1525 | ||
1482 | bail: | 1526 | bail: |
@@ -1490,10 +1534,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1490 | unsigned long opts = osb->s_mount_opt; | 1534 | unsigned long opts = osb->s_mount_opt; |
1491 | unsigned int local_alloc_megs; | 1535 | unsigned int local_alloc_megs; |
1492 | 1536 | ||
1493 | if (opts & OCFS2_MOUNT_HB_LOCAL) | 1537 | if (opts & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL)) { |
1494 | seq_printf(s, ",_netdev,heartbeat=local"); | 1538 | seq_printf(s, ",_netdev"); |
1495 | else | 1539 | if (opts & OCFS2_MOUNT_HB_LOCAL) |
1496 | seq_printf(s, ",heartbeat=none"); | 1540 | seq_printf(s, ",%s", OCFS2_HB_LOCAL); |
1541 | else | ||
1542 | seq_printf(s, ",%s", OCFS2_HB_GLOBAL); | ||
1543 | } else | ||
1544 | seq_printf(s, ",%s", OCFS2_HB_NONE); | ||
1497 | 1545 | ||
1498 | if (opts & OCFS2_MOUNT_NOINTR) | 1546 | if (opts & OCFS2_MOUNT_NOINTR) |
1499 | seq_printf(s, ",nointr"); | 1547 | seq_printf(s, ",nointr"); |
@@ -1536,6 +1584,11 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1536 | if (opts & OCFS2_MOUNT_GRPQUOTA) | 1584 | if (opts & OCFS2_MOUNT_GRPQUOTA) |
1537 | seq_printf(s, ",grpquota"); | 1585 | seq_printf(s, ",grpquota"); |
1538 | 1586 | ||
1587 | if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED) | ||
1588 | seq_printf(s, ",coherency=buffered"); | ||
1589 | else | ||
1590 | seq_printf(s, ",coherency=full"); | ||
1591 | |||
1539 | if (opts & OCFS2_MOUNT_NOUSERXATTR) | 1592 | if (opts & OCFS2_MOUNT_NOUSERXATTR) |
1540 | seq_printf(s, ",nouser_xattr"); | 1593 | seq_printf(s, ",nouser_xattr"); |
1541 | else | 1594 | else |
@@ -1640,13 +1693,9 @@ static void ocfs2_put_super(struct super_block *sb) | |||
1640 | { | 1693 | { |
1641 | mlog_entry("(0x%p)\n", sb); | 1694 | mlog_entry("(0x%p)\n", sb); |
1642 | 1695 | ||
1643 | lock_kernel(); | ||
1644 | |||
1645 | ocfs2_sync_blockdev(sb); | 1696 | ocfs2_sync_blockdev(sb); |
1646 | ocfs2_dismount_volume(sb, 0); | 1697 | ocfs2_dismount_volume(sb, 0); |
1647 | 1698 | ||
1648 | unlock_kernel(); | ||
1649 | |||
1650 | mlog_exit_void(); | 1699 | mlog_exit_void(); |
1651 | } | 1700 | } |
1652 | 1701 | ||
@@ -1990,6 +2039,36 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu | |||
1990 | return 0; | 2039 | return 0; |
1991 | } | 2040 | } |
1992 | 2041 | ||
2042 | /* Make sure entire volume is addressable by our journal. Requires | ||
2043 | osb_clusters_at_boot to be valid and for the journal to have been | ||
2044 | initialized by ocfs2_journal_init(). */ | ||
2045 | static int ocfs2_journal_addressable(struct ocfs2_super *osb) | ||
2046 | { | ||
2047 | int status = 0; | ||
2048 | u64 max_block = | ||
2049 | ocfs2_clusters_to_blocks(osb->sb, | ||
2050 | osb->osb_clusters_at_boot) - 1; | ||
2051 | |||
2052 | /* 32-bit block number is always OK. */ | ||
2053 | if (max_block <= (u32)~0ULL) | ||
2054 | goto out; | ||
2055 | |||
2056 | /* Volume is "huge", so see if our journal is new enough to | ||
2057 | support it. */ | ||
2058 | if (!(OCFS2_HAS_COMPAT_FEATURE(osb->sb, | ||
2059 | OCFS2_FEATURE_COMPAT_JBD2_SB) && | ||
2060 | jbd2_journal_check_used_features(osb->journal->j_journal, 0, 0, | ||
2061 | JBD2_FEATURE_INCOMPAT_64BIT))) { | ||
2062 | mlog(ML_ERROR, "The journal cannot address the entire volume. " | ||
2063 | "Enable the 'block64' journal option with tunefs.ocfs2"); | ||
2064 | status = -EFBIG; | ||
2065 | goto out; | ||
2066 | } | ||
2067 | |||
2068 | out: | ||
2069 | return status; | ||
2070 | } | ||
2071 | |||
1993 | static int ocfs2_initialize_super(struct super_block *sb, | 2072 | static int ocfs2_initialize_super(struct super_block *sb, |
1994 | struct buffer_head *bh, | 2073 | struct buffer_head *bh, |
1995 | int sector_size, | 2074 | int sector_size, |
@@ -2002,6 +2081,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2002 | struct ocfs2_journal *journal; | 2081 | struct ocfs2_journal *journal; |
2003 | __le32 uuid_net_key; | 2082 | __le32 uuid_net_key; |
2004 | struct ocfs2_super *osb; | 2083 | struct ocfs2_super *osb; |
2084 | u64 total_blocks; | ||
2005 | 2085 | ||
2006 | mlog_entry_void(); | 2086 | mlog_entry_void(); |
2007 | 2087 | ||
@@ -2060,6 +2140,15 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2060 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", | 2140 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", |
2061 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 2141 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
2062 | 2142 | ||
2143 | osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); | ||
2144 | if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { | ||
2145 | mlog(ML_ERROR, "Invalid number of node slots (%u)\n", | ||
2146 | osb->max_slots); | ||
2147 | status = -EINVAL; | ||
2148 | goto bail; | ||
2149 | } | ||
2150 | mlog(0, "max_slots for this device: %u\n", osb->max_slots); | ||
2151 | |||
2063 | ocfs2_orphan_scan_init(osb); | 2152 | ocfs2_orphan_scan_init(osb); |
2064 | 2153 | ||
2065 | status = ocfs2_recovery_init(osb); | 2154 | status = ocfs2_recovery_init(osb); |
@@ -2098,15 +2187,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2098 | goto bail; | 2187 | goto bail; |
2099 | } | 2188 | } |
2100 | 2189 | ||
2101 | osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); | ||
2102 | if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { | ||
2103 | mlog(ML_ERROR, "Invalid number of node slots (%u)\n", | ||
2104 | osb->max_slots); | ||
2105 | status = -EINVAL; | ||
2106 | goto bail; | ||
2107 | } | ||
2108 | mlog(0, "max_slots for this device: %u\n", osb->max_slots); | ||
2109 | |||
2110 | osb->slot_recovery_generations = | 2190 | osb->slot_recovery_generations = |
2111 | kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), | 2191 | kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), |
2112 | GFP_KERNEL); | 2192 | GFP_KERNEL); |
@@ -2149,7 +2229,9 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2149 | goto bail; | 2229 | goto bail; |
2150 | } | 2230 | } |
2151 | 2231 | ||
2152 | if (ocfs2_userspace_stack(osb)) { | 2232 | if (ocfs2_clusterinfo_valid(osb)) { |
2233 | osb->osb_stackflags = | ||
2234 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; | ||
2153 | memcpy(osb->osb_cluster_stack, | 2235 | memcpy(osb->osb_cluster_stack, |
2154 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, | 2236 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, |
2155 | OCFS2_STACK_LABEL_LEN); | 2237 | OCFS2_STACK_LABEL_LEN); |
@@ -2214,11 +2296,15 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2214 | goto bail; | 2296 | goto bail; |
2215 | } | 2297 | } |
2216 | 2298 | ||
2217 | if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters) - 1) | 2299 | total_blocks = ocfs2_clusters_to_blocks(osb->sb, |
2218 | > (u32)~0UL) { | 2300 | le32_to_cpu(di->i_clusters)); |
2219 | mlog(ML_ERROR, "Volume might try to write to blocks beyond " | 2301 | |
2220 | "what jbd can address in 32 bits.\n"); | 2302 | status = generic_check_addressable(osb->sb->s_blocksize_bits, |
2221 | status = -EINVAL; | 2303 | total_blocks); |
2304 | if (status) { | ||
2305 | mlog(ML_ERROR, "Volume too large " | ||
2306 | "to mount safely on this system"); | ||
2307 | status = -EFBIG; | ||
2222 | goto bail; | 2308 | goto bail; |
2223 | } | 2309 | } |
2224 | 2310 | ||
@@ -2380,6 +2466,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) | |||
2380 | goto finally; | 2466 | goto finally; |
2381 | } | 2467 | } |
2382 | 2468 | ||
2469 | /* Now that journal has been initialized, check to make sure | ||
2470 | entire volume is addressable. */ | ||
2471 | status = ocfs2_journal_addressable(osb); | ||
2472 | if (status) | ||
2473 | goto finally; | ||
2474 | |||
2383 | /* If the journal was unmounted cleanly then we don't want to | 2475 | /* If the journal was unmounted cleanly then we don't want to |
2384 | * recover anything. Otherwise, journal_load will do that | 2476 | * recover anything. Otherwise, journal_load will do that |
2385 | * dirty work for us :) */ | 2477 | * dirty work for us :) */ |
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 32499d213fc4..9975457c981f 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c | |||
@@ -128,7 +128,7 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry, | |||
128 | } | 128 | } |
129 | 129 | ||
130 | /* Fast symlinks can't be large */ | 130 | /* Fast symlinks can't be large */ |
131 | len = strlen(target); | 131 | len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb)); |
132 | link = kzalloc(len + 1, GFP_NOFS); | 132 | link = kzalloc(len + 1, GFP_NOFS); |
133 | if (!link) { | 133 | if (!link) { |
134 | status = -ENOMEM; | 134 | status = -ENOMEM; |
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index bfe7190cdbf1..902efb23b6a6 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c | |||
@@ -44,11 +44,6 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
44 | int type, | 44 | int type, |
45 | u32 slot); | 45 | u32 slot); |
46 | 46 | ||
47 | static inline int is_global_system_inode(int type); | ||
48 | static inline int is_in_system_inode_array(struct ocfs2_super *osb, | ||
49 | int type, | ||
50 | u32 slot); | ||
51 | |||
52 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 47 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
53 | static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; | 48 | static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; |
54 | #endif | 49 | #endif |
@@ -59,11 +54,52 @@ static inline int is_global_system_inode(int type) | |||
59 | type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; | 54 | type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; |
60 | } | 55 | } |
61 | 56 | ||
62 | static inline int is_in_system_inode_array(struct ocfs2_super *osb, | 57 | static struct inode **get_local_system_inode(struct ocfs2_super *osb, |
63 | int type, | 58 | int type, |
64 | u32 slot) | 59 | u32 slot) |
65 | { | 60 | { |
66 | return slot == osb->slot_num || is_global_system_inode(type); | 61 | int index; |
62 | struct inode **local_system_inodes, **free = NULL; | ||
63 | |||
64 | BUG_ON(slot == OCFS2_INVALID_SLOT); | ||
65 | BUG_ON(type < OCFS2_FIRST_LOCAL_SYSTEM_INODE || | ||
66 | type > OCFS2_LAST_LOCAL_SYSTEM_INODE); | ||
67 | |||
68 | spin_lock(&osb->osb_lock); | ||
69 | local_system_inodes = osb->local_system_inodes; | ||
70 | spin_unlock(&osb->osb_lock); | ||
71 | |||
72 | if (unlikely(!local_system_inodes)) { | ||
73 | local_system_inodes = kzalloc(sizeof(struct inode *) * | ||
74 | NUM_LOCAL_SYSTEM_INODES * | ||
75 | osb->max_slots, | ||
76 | GFP_NOFS); | ||
77 | if (!local_system_inodes) { | ||
78 | mlog_errno(-ENOMEM); | ||
79 | /* | ||
80 | * return NULL here so that ocfs2_get_sytem_file_inodes | ||
81 | * will try to create an inode and use it. We will try | ||
82 | * to initialize local_system_inodes next time. | ||
83 | */ | ||
84 | return NULL; | ||
85 | } | ||
86 | |||
87 | spin_lock(&osb->osb_lock); | ||
88 | if (osb->local_system_inodes) { | ||
89 | /* Someone has initialized it for us. */ | ||
90 | free = local_system_inodes; | ||
91 | local_system_inodes = osb->local_system_inodes; | ||
92 | } else | ||
93 | osb->local_system_inodes = local_system_inodes; | ||
94 | spin_unlock(&osb->osb_lock); | ||
95 | if (unlikely(free)) | ||
96 | kfree(free); | ||
97 | } | ||
98 | |||
99 | index = (slot * NUM_LOCAL_SYSTEM_INODES) + | ||
100 | (type - OCFS2_FIRST_LOCAL_SYSTEM_INODE); | ||
101 | |||
102 | return &local_system_inodes[index]; | ||
67 | } | 103 | } |
68 | 104 | ||
69 | struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, | 105 | struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, |
@@ -74,8 +110,10 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
74 | struct inode **arr = NULL; | 110 | struct inode **arr = NULL; |
75 | 111 | ||
76 | /* avoid the lookup if cached in local system file array */ | 112 | /* avoid the lookup if cached in local system file array */ |
77 | if (is_in_system_inode_array(osb, type, slot)) | 113 | if (is_global_system_inode(type)) { |
78 | arr = &(osb->system_inodes[type]); | 114 | arr = &(osb->global_system_inodes[type]); |
115 | } else | ||
116 | arr = get_local_system_inode(osb, type, slot); | ||
79 | 117 | ||
80 | if (arr && ((inode = *arr) != NULL)) { | 118 | if (arr && ((inode = *arr) != NULL)) { |
81 | /* get a ref in addition to the array ref */ | 119 | /* get a ref in addition to the array ref */ |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d03469f61801..67cd43914641 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -1286,13 +1286,11 @@ int ocfs2_xattr_get_nolock(struct inode *inode, | |||
1286 | xis.inode_bh = xbs.inode_bh = di_bh; | 1286 | xis.inode_bh = xbs.inode_bh = di_bh; |
1287 | di = (struct ocfs2_dinode *)di_bh->b_data; | 1287 | di = (struct ocfs2_dinode *)di_bh->b_data; |
1288 | 1288 | ||
1289 | down_read(&oi->ip_xattr_sem); | ||
1290 | ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, | 1289 | ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, |
1291 | buffer_size, &xis); | 1290 | buffer_size, &xis); |
1292 | if (ret == -ENODATA && di->i_xattr_loc) | 1291 | if (ret == -ENODATA && di->i_xattr_loc) |
1293 | ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, | 1292 | ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, |
1294 | buffer_size, &xbs); | 1293 | buffer_size, &xbs); |
1295 | up_read(&oi->ip_xattr_sem); | ||
1296 | 1294 | ||
1297 | return ret; | 1295 | return ret; |
1298 | } | 1296 | } |
@@ -1316,8 +1314,10 @@ static int ocfs2_xattr_get(struct inode *inode, | |||
1316 | mlog_errno(ret); | 1314 | mlog_errno(ret); |
1317 | return ret; | 1315 | return ret; |
1318 | } | 1316 | } |
1317 | down_read(&OCFS2_I(inode)->ip_xattr_sem); | ||
1319 | ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, | 1318 | ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, |
1320 | name, buffer, buffer_size); | 1319 | name, buffer, buffer_size); |
1320 | up_read(&OCFS2_I(inode)->ip_xattr_sem); | ||
1321 | 1321 | ||
1322 | ocfs2_inode_unlock(inode, 0); | 1322 | ocfs2_inode_unlock(inode, 0); |
1323 | 1323 | ||
@@ -7081,7 +7081,7 @@ static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, | |||
7081 | goto out; | 7081 | goto out; |
7082 | } | 7082 | } |
7083 | 7083 | ||
7084 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) | 7084 | if (!indexed) |
7085 | ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); | 7085 | ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); |
7086 | else | 7086 | else |
7087 | ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); | 7087 | ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); |