diff options
Diffstat (limited to 'fs')
36 files changed, 1069 insertions, 561 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index ed553c60de82..3165aebb43c8 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -5699,7 +5699,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
| 5699 | OCFS2_JOURNAL_ACCESS_WRITE); | 5699 | OCFS2_JOURNAL_ACCESS_WRITE); |
| 5700 | if (ret) { | 5700 | if (ret) { |
| 5701 | mlog_errno(ret); | 5701 | mlog_errno(ret); |
| 5702 | goto out; | 5702 | goto out_commit; |
| 5703 | } | 5703 | } |
| 5704 | 5704 | ||
| 5705 | dquot_free_space_nodirty(inode, | 5705 | dquot_free_space_nodirty(inode, |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c1efe939c774..78b68af3b0e3 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -290,7 +290,15 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
| 290 | } | 290 | } |
| 291 | 291 | ||
| 292 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { | 292 | if (down_read_trylock(&oi->ip_alloc_sem) == 0) { |
| 293 | /* | ||
| 294 | * Unlock the page and cycle ip_alloc_sem so that we don't | ||
| 295 | * busyloop waiting for ip_alloc_sem to unlock | ||
| 296 | */ | ||
| 293 | ret = AOP_TRUNCATED_PAGE; | 297 | ret = AOP_TRUNCATED_PAGE; |
| 298 | unlock_page(page); | ||
| 299 | unlock = 0; | ||
| 300 | down_read(&oi->ip_alloc_sem); | ||
| 301 | up_read(&oi->ip_alloc_sem); | ||
| 294 | goto out_inode_unlock; | 302 | goto out_inode_unlock; |
| 295 | } | 303 | } |
| 296 | 304 | ||
| @@ -563,6 +571,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, | |||
| 563 | { | 571 | { |
| 564 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; | 572 | struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; |
| 565 | int level; | 573 | int level; |
| 574 | wait_queue_head_t *wq = ocfs2_ioend_wq(inode); | ||
| 566 | 575 | ||
| 567 | /* this io's submitter should not have unlocked this before we could */ | 576 | /* this io's submitter should not have unlocked this before we could */ |
| 568 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); | 577 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); |
| @@ -570,6 +579,15 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, | |||
| 570 | if (ocfs2_iocb_is_sem_locked(iocb)) | 579 | if (ocfs2_iocb_is_sem_locked(iocb)) |
| 571 | ocfs2_iocb_clear_sem_locked(iocb); | 580 | ocfs2_iocb_clear_sem_locked(iocb); |
| 572 | 581 | ||
| 582 | if (ocfs2_iocb_is_unaligned_aio(iocb)) { | ||
| 583 | ocfs2_iocb_clear_unaligned_aio(iocb); | ||
| 584 | |||
| 585 | if (atomic_dec_and_test(&OCFS2_I(inode)->ip_unaligned_aio) && | ||
| 586 | waitqueue_active(wq)) { | ||
| 587 | wake_up_all(wq); | ||
| 588 | } | ||
| 589 | } | ||
| 590 | |||
| 573 | ocfs2_iocb_clear_rw_locked(iocb); | 591 | ocfs2_iocb_clear_rw_locked(iocb); |
| 574 | 592 | ||
| 575 | level = ocfs2_iocb_rw_locked_level(iocb); | 593 | level = ocfs2_iocb_rw_locked_level(iocb); |
| @@ -863,6 +881,12 @@ struct ocfs2_write_ctxt { | |||
| 863 | struct page *w_target_page; | 881 | struct page *w_target_page; |
| 864 | 882 | ||
| 865 | /* | 883 | /* |
| 884 | * w_target_locked is used for page_mkwrite path indicating no unlocking | ||
| 885 | * against w_target_page in ocfs2_write_end_nolock. | ||
| 886 | */ | ||
| 887 | unsigned int w_target_locked:1; | ||
| 888 | |||
| 889 | /* | ||
| 866 | * ocfs2_write_end() uses this to know what the real range to | 890 | * ocfs2_write_end() uses this to know what the real range to |
| 867 | * write in the target should be. | 891 | * write in the target should be. |
| 868 | */ | 892 | */ |
| @@ -895,6 +919,24 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) | |||
| 895 | 919 | ||
| 896 | static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) | 920 | static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) |
| 897 | { | 921 | { |
| 922 | int i; | ||
| 923 | |||
| 924 | /* | ||
| 925 | * w_target_locked is only set to true in the page_mkwrite() case. | ||
| 926 | * The intent is to allow us to lock the target page from write_begin() | ||
| 927 | * to write_end(). The caller must hold a ref on w_target_page. | ||
| 928 | */ | ||
| 929 | if (wc->w_target_locked) { | ||
| 930 | BUG_ON(!wc->w_target_page); | ||
| 931 | for (i = 0; i < wc->w_num_pages; i++) { | ||
| 932 | if (wc->w_target_page == wc->w_pages[i]) { | ||
| 933 | wc->w_pages[i] = NULL; | ||
| 934 | break; | ||
| 935 | } | ||
| 936 | } | ||
| 937 | mark_page_accessed(wc->w_target_page); | ||
| 938 | page_cache_release(wc->w_target_page); | ||
| 939 | } | ||
| 898 | ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); | 940 | ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); |
| 899 | 941 | ||
| 900 | brelse(wc->w_di_bh); | 942 | brelse(wc->w_di_bh); |
| @@ -1132,20 +1174,17 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, | |||
| 1132 | */ | 1174 | */ |
| 1133 | lock_page(mmap_page); | 1175 | lock_page(mmap_page); |
| 1134 | 1176 | ||
| 1177 | /* Exit and let the caller retry */ | ||
| 1135 | if (mmap_page->mapping != mapping) { | 1178 | if (mmap_page->mapping != mapping) { |
| 1179 | WARN_ON(mmap_page->mapping); | ||
| 1136 | unlock_page(mmap_page); | 1180 | unlock_page(mmap_page); |
| 1137 | /* | 1181 | ret = -EAGAIN; |
| 1138 | * Sanity check - the locking in | ||
| 1139 | * ocfs2_pagemkwrite() should ensure | ||
| 1140 | * that this code doesn't trigger. | ||
| 1141 | */ | ||
| 1142 | ret = -EINVAL; | ||
| 1143 | mlog_errno(ret); | ||
| 1144 | goto out; | 1182 | goto out; |
| 1145 | } | 1183 | } |
| 1146 | 1184 | ||
| 1147 | page_cache_get(mmap_page); | 1185 | page_cache_get(mmap_page); |
| 1148 | wc->w_pages[i] = mmap_page; | 1186 | wc->w_pages[i] = mmap_page; |
| 1187 | wc->w_target_locked = true; | ||
| 1149 | } else { | 1188 | } else { |
| 1150 | wc->w_pages[i] = find_or_create_page(mapping, index, | 1189 | wc->w_pages[i] = find_or_create_page(mapping, index, |
| 1151 | GFP_NOFS); | 1190 | GFP_NOFS); |
| @@ -1160,6 +1199,8 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, | |||
| 1160 | wc->w_target_page = wc->w_pages[i]; | 1199 | wc->w_target_page = wc->w_pages[i]; |
| 1161 | } | 1200 | } |
| 1162 | out: | 1201 | out: |
| 1202 | if (ret) | ||
| 1203 | wc->w_target_locked = false; | ||
| 1163 | return ret; | 1204 | return ret; |
| 1164 | } | 1205 | } |
| 1165 | 1206 | ||
| @@ -1817,11 +1858,23 @@ try_again: | |||
| 1817 | */ | 1858 | */ |
| 1818 | ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, | 1859 | ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, |
| 1819 | cluster_of_pages, mmap_page); | 1860 | cluster_of_pages, mmap_page); |
| 1820 | if (ret) { | 1861 | if (ret && ret != -EAGAIN) { |
| 1821 | mlog_errno(ret); | 1862 | mlog_errno(ret); |
| 1822 | goto out_quota; | 1863 | goto out_quota; |
| 1823 | } | 1864 | } |
| 1824 | 1865 | ||
| 1866 | /* | ||
| 1867 | * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock | ||
| 1868 | * the target page. In this case, we exit with no error and no target | ||
| 1869 | * page. This will trigger the caller, page_mkwrite(), to re-try | ||
| 1870 | * the operation. | ||
| 1871 | */ | ||
| 1872 | if (ret == -EAGAIN) { | ||
| 1873 | BUG_ON(wc->w_target_page); | ||
| 1874 | ret = 0; | ||
| 1875 | goto out_quota; | ||
| 1876 | } | ||
| 1877 | |||
| 1825 | ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, | 1878 | ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, |
| 1826 | len); | 1879 | len); |
| 1827 | if (ret) { | 1880 | if (ret) { |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 75cf3ad987a6..ffb2da370a99 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
| @@ -78,6 +78,7 @@ enum ocfs2_iocb_lock_bits { | |||
| 78 | OCFS2_IOCB_RW_LOCK = 0, | 78 | OCFS2_IOCB_RW_LOCK = 0, |
| 79 | OCFS2_IOCB_RW_LOCK_LEVEL, | 79 | OCFS2_IOCB_RW_LOCK_LEVEL, |
| 80 | OCFS2_IOCB_SEM, | 80 | OCFS2_IOCB_SEM, |
| 81 | OCFS2_IOCB_UNALIGNED_IO, | ||
| 81 | OCFS2_IOCB_NUM_LOCKS | 82 | OCFS2_IOCB_NUM_LOCKS |
| 82 | }; | 83 | }; |
| 83 | 84 | ||
| @@ -91,4 +92,17 @@ enum ocfs2_iocb_lock_bits { | |||
| 91 | clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private) | 92 | clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private) |
| 92 | #define ocfs2_iocb_is_sem_locked(iocb) \ | 93 | #define ocfs2_iocb_is_sem_locked(iocb) \ |
| 93 | test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private) | 94 | test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private) |
| 95 | |||
| 96 | #define ocfs2_iocb_set_unaligned_aio(iocb) \ | ||
| 97 | set_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private) | ||
| 98 | #define ocfs2_iocb_clear_unaligned_aio(iocb) \ | ||
| 99 | clear_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private) | ||
| 100 | #define ocfs2_iocb_is_unaligned_aio(iocb) \ | ||
| 101 | test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private) | ||
| 102 | |||
| 103 | #define OCFS2_IOEND_WQ_HASH_SZ 37 | ||
| 104 | #define ocfs2_ioend_wq(v) (&ocfs2__ioend_wq[((unsigned long)(v)) %\ | ||
| 105 | OCFS2_IOEND_WQ_HASH_SZ]) | ||
| 106 | extern wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ]; | ||
| 107 | |||
| 94 | #endif /* OCFS2_FILE_H */ | 108 | #endif /* OCFS2_FILE_H */ |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 9a3e6bbff27b..a4e855e3690e 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
| @@ -216,6 +216,7 @@ struct o2hb_region { | |||
| 216 | 216 | ||
| 217 | struct list_head hr_all_item; | 217 | struct list_head hr_all_item; |
| 218 | unsigned hr_unclean_stop:1, | 218 | unsigned hr_unclean_stop:1, |
| 219 | hr_aborted_start:1, | ||
| 219 | hr_item_pinned:1, | 220 | hr_item_pinned:1, |
| 220 | hr_item_dropped:1; | 221 | hr_item_dropped:1; |
| 221 | 222 | ||
| @@ -254,6 +255,10 @@ struct o2hb_region { | |||
| 254 | * a more complete api that doesn't lead to this sort of fragility. */ | 255 | * a more complete api that doesn't lead to this sort of fragility. */ |
| 255 | atomic_t hr_steady_iterations; | 256 | atomic_t hr_steady_iterations; |
| 256 | 257 | ||
| 258 | /* terminate o2hb thread if it does not reach steady state | ||
| 259 | * (hr_steady_iterations == 0) within hr_unsteady_iterations */ | ||
| 260 | atomic_t hr_unsteady_iterations; | ||
| 261 | |||
| 257 | char hr_dev_name[BDEVNAME_SIZE]; | 262 | char hr_dev_name[BDEVNAME_SIZE]; |
| 258 | 263 | ||
| 259 | unsigned int hr_timeout_ms; | 264 | unsigned int hr_timeout_ms; |
| @@ -324,6 +329,10 @@ static void o2hb_write_timeout(struct work_struct *work) | |||
| 324 | 329 | ||
| 325 | static void o2hb_arm_write_timeout(struct o2hb_region *reg) | 330 | static void o2hb_arm_write_timeout(struct o2hb_region *reg) |
| 326 | { | 331 | { |
| 332 | /* Arm writeout only after thread reaches steady state */ | ||
| 333 | if (atomic_read(®->hr_steady_iterations) != 0) | ||
| 334 | return; | ||
| 335 | |||
| 327 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", | 336 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", |
| 328 | O2HB_MAX_WRITE_TIMEOUT_MS); | 337 | O2HB_MAX_WRITE_TIMEOUT_MS); |
| 329 | 338 | ||
| @@ -537,9 +546,14 @@ static int o2hb_verify_crc(struct o2hb_region *reg, | |||
| 537 | return read == computed; | 546 | return read == computed; |
| 538 | } | 547 | } |
| 539 | 548 | ||
| 540 | /* We want to make sure that nobody is heartbeating on top of us -- | 549 | /* |
| 541 | * this will help detect an invalid configuration. */ | 550 | * Compare the slot data with what we wrote in the last iteration. |
| 542 | static void o2hb_check_last_timestamp(struct o2hb_region *reg) | 551 | * If the match fails, print an appropriate error message. This is to |
| 552 | * detect errors like... another node hearting on the same slot, | ||
| 553 | * flaky device that is losing writes, etc. | ||
| 554 | * Returns 1 if check succeeds, 0 otherwise. | ||
| 555 | */ | ||
| 556 | static int o2hb_check_own_slot(struct o2hb_region *reg) | ||
| 543 | { | 557 | { |
| 544 | struct o2hb_disk_slot *slot; | 558 | struct o2hb_disk_slot *slot; |
| 545 | struct o2hb_disk_heartbeat_block *hb_block; | 559 | struct o2hb_disk_heartbeat_block *hb_block; |
| @@ -548,13 +562,13 @@ static void o2hb_check_last_timestamp(struct o2hb_region *reg) | |||
| 548 | slot = ®->hr_slots[o2nm_this_node()]; | 562 | slot = ®->hr_slots[o2nm_this_node()]; |
| 549 | /* Don't check on our 1st timestamp */ | 563 | /* Don't check on our 1st timestamp */ |
| 550 | if (!slot->ds_last_time) | 564 | if (!slot->ds_last_time) |
| 551 | return; | 565 | return 0; |
| 552 | 566 | ||
| 553 | hb_block = slot->ds_raw_block; | 567 | hb_block = slot->ds_raw_block; |
| 554 | if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time && | 568 | if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time && |
| 555 | le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation && | 569 | le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation && |
| 556 | hb_block->hb_node == slot->ds_node_num) | 570 | hb_block->hb_node == slot->ds_node_num) |
| 557 | return; | 571 | return 1; |
| 558 | 572 | ||
| 559 | #define ERRSTR1 "Another node is heartbeating on device" | 573 | #define ERRSTR1 "Another node is heartbeating on device" |
| 560 | #define ERRSTR2 "Heartbeat generation mismatch on device" | 574 | #define ERRSTR2 "Heartbeat generation mismatch on device" |
| @@ -574,6 +588,8 @@ static void o2hb_check_last_timestamp(struct o2hb_region *reg) | |||
| 574 | (unsigned long long)slot->ds_last_time, hb_block->hb_node, | 588 | (unsigned long long)slot->ds_last_time, hb_block->hb_node, |
| 575 | (unsigned long long)le64_to_cpu(hb_block->hb_generation), | 589 | (unsigned long long)le64_to_cpu(hb_block->hb_generation), |
| 576 | (unsigned long long)le64_to_cpu(hb_block->hb_seq)); | 590 | (unsigned long long)le64_to_cpu(hb_block->hb_seq)); |
| 591 | |||
| 592 | return 0; | ||
| 577 | } | 593 | } |
| 578 | 594 | ||
| 579 | static inline void o2hb_prepare_block(struct o2hb_region *reg, | 595 | static inline void o2hb_prepare_block(struct o2hb_region *reg, |
| @@ -719,17 +735,24 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) | |||
| 719 | o2nm_node_put(node); | 735 | o2nm_node_put(node); |
| 720 | } | 736 | } |
| 721 | 737 | ||
| 722 | static void o2hb_set_quorum_device(struct o2hb_region *reg, | 738 | static void o2hb_set_quorum_device(struct o2hb_region *reg) |
| 723 | struct o2hb_disk_slot *slot) | ||
| 724 | { | 739 | { |
| 725 | assert_spin_locked(&o2hb_live_lock); | ||
| 726 | |||
| 727 | if (!o2hb_global_heartbeat_active()) | 740 | if (!o2hb_global_heartbeat_active()) |
| 728 | return; | 741 | return; |
| 729 | 742 | ||
| 730 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | 743 | /* Prevent race with o2hb_heartbeat_group_drop_item() */ |
| 744 | if (kthread_should_stop()) | ||
| 745 | return; | ||
| 746 | |||
| 747 | /* Tag region as quorum only after thread reaches steady state */ | ||
| 748 | if (atomic_read(®->hr_steady_iterations) != 0) | ||
| 731 | return; | 749 | return; |
| 732 | 750 | ||
| 751 | spin_lock(&o2hb_live_lock); | ||
| 752 | |||
| 753 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
| 754 | goto unlock; | ||
| 755 | |||
| 733 | /* | 756 | /* |
| 734 | * A region can be added to the quorum only when it sees all | 757 | * A region can be added to the quorum only when it sees all |
| 735 | * live nodes heartbeat on it. In other words, the region has been | 758 | * live nodes heartbeat on it. In other words, the region has been |
| @@ -737,13 +760,10 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg, | |||
| 737 | */ | 760 | */ |
| 738 | if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, | 761 | if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, |
| 739 | sizeof(o2hb_live_node_bitmap))) | 762 | sizeof(o2hb_live_node_bitmap))) |
| 740 | return; | 763 | goto unlock; |
| 741 | |||
| 742 | if (slot->ds_changed_samples < O2HB_LIVE_THRESHOLD) | ||
| 743 | return; | ||
| 744 | 764 | ||
| 745 | printk(KERN_NOTICE "o2hb: Region %s is now a quorum device\n", | 765 | printk(KERN_NOTICE "o2hb: Region %s (%s) is now a quorum device\n", |
| 746 | config_item_name(®->hr_item)); | 766 | config_item_name(®->hr_item), reg->hr_dev_name); |
| 747 | 767 | ||
| 748 | set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); | 768 | set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); |
| 749 | 769 | ||
| @@ -754,6 +774,8 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg, | |||
| 754 | if (o2hb_pop_count(&o2hb_quorum_region_bitmap, | 774 | if (o2hb_pop_count(&o2hb_quorum_region_bitmap, |
| 755 | O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF) | 775 | O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF) |
| 756 | o2hb_region_unpin(NULL); | 776 | o2hb_region_unpin(NULL); |
| 777 | unlock: | ||
| 778 | spin_unlock(&o2hb_live_lock); | ||
| 757 | } | 779 | } |
| 758 | 780 | ||
| 759 | static int o2hb_check_slot(struct o2hb_region *reg, | 781 | static int o2hb_check_slot(struct o2hb_region *reg, |
| @@ -925,8 +947,6 @@ fire_callbacks: | |||
| 925 | slot->ds_equal_samples = 0; | 947 | slot->ds_equal_samples = 0; |
| 926 | } | 948 | } |
| 927 | out: | 949 | out: |
| 928 | o2hb_set_quorum_device(reg, slot); | ||
| 929 | |||
| 930 | spin_unlock(&o2hb_live_lock); | 950 | spin_unlock(&o2hb_live_lock); |
| 931 | 951 | ||
| 932 | o2hb_run_event_list(&event); | 952 | o2hb_run_event_list(&event); |
| @@ -957,7 +977,8 @@ static int o2hb_highest_node(unsigned long *nodes, | |||
| 957 | 977 | ||
| 958 | static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | 978 | static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) |
| 959 | { | 979 | { |
| 960 | int i, ret, highest_node, change = 0; | 980 | int i, ret, highest_node; |
| 981 | int membership_change = 0, own_slot_ok = 0; | ||
| 961 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 982 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 962 | unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 983 | unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 963 | struct o2hb_bio_wait_ctxt write_wc; | 984 | struct o2hb_bio_wait_ctxt write_wc; |
| @@ -966,7 +987,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
| 966 | sizeof(configured_nodes)); | 987 | sizeof(configured_nodes)); |
| 967 | if (ret) { | 988 | if (ret) { |
| 968 | mlog_errno(ret); | 989 | mlog_errno(ret); |
| 969 | return ret; | 990 | goto bail; |
| 970 | } | 991 | } |
| 971 | 992 | ||
| 972 | /* | 993 | /* |
| @@ -982,8 +1003,9 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
| 982 | 1003 | ||
| 983 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); | 1004 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); |
| 984 | if (highest_node >= O2NM_MAX_NODES) { | 1005 | if (highest_node >= O2NM_MAX_NODES) { |
| 985 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); | 1006 | mlog(ML_NOTICE, "o2hb: No configured nodes found!\n"); |
| 986 | return -EINVAL; | 1007 | ret = -EINVAL; |
| 1008 | goto bail; | ||
| 987 | } | 1009 | } |
| 988 | 1010 | ||
| 989 | /* No sense in reading the slots of nodes that don't exist | 1011 | /* No sense in reading the slots of nodes that don't exist |
| @@ -993,29 +1015,27 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
| 993 | ret = o2hb_read_slots(reg, highest_node + 1); | 1015 | ret = o2hb_read_slots(reg, highest_node + 1); |
| 994 | if (ret < 0) { | 1016 | if (ret < 0) { |
| 995 | mlog_errno(ret); | 1017 | mlog_errno(ret); |
| 996 | return ret; | 1018 | goto bail; |
| 997 | } | 1019 | } |
| 998 | 1020 | ||
| 999 | /* With an up to date view of the slots, we can check that no | 1021 | /* With an up to date view of the slots, we can check that no |
| 1000 | * other node has been improperly configured to heartbeat in | 1022 | * other node has been improperly configured to heartbeat in |
| 1001 | * our slot. */ | 1023 | * our slot. */ |
| 1002 | o2hb_check_last_timestamp(reg); | 1024 | own_slot_ok = o2hb_check_own_slot(reg); |
| 1003 | 1025 | ||
| 1004 | /* fill in the proper info for our next heartbeat */ | 1026 | /* fill in the proper info for our next heartbeat */ |
| 1005 | o2hb_prepare_block(reg, reg->hr_generation); | 1027 | o2hb_prepare_block(reg, reg->hr_generation); |
| 1006 | 1028 | ||
| 1007 | /* And fire off the write. Note that we don't wait on this I/O | ||
| 1008 | * until later. */ | ||
| 1009 | ret = o2hb_issue_node_write(reg, &write_wc); | 1029 | ret = o2hb_issue_node_write(reg, &write_wc); |
| 1010 | if (ret < 0) { | 1030 | if (ret < 0) { |
| 1011 | mlog_errno(ret); | 1031 | mlog_errno(ret); |
| 1012 | return ret; | 1032 | goto bail; |
| 1013 | } | 1033 | } |
| 1014 | 1034 | ||
| 1015 | i = -1; | 1035 | i = -1; |
| 1016 | while((i = find_next_bit(configured_nodes, | 1036 | while((i = find_next_bit(configured_nodes, |
| 1017 | O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { | 1037 | O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { |
| 1018 | change |= o2hb_check_slot(reg, ®->hr_slots[i]); | 1038 | membership_change |= o2hb_check_slot(reg, ®->hr_slots[i]); |
| 1019 | } | 1039 | } |
| 1020 | 1040 | ||
| 1021 | /* | 1041 | /* |
| @@ -1030,18 +1050,39 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
| 1030 | * disk */ | 1050 | * disk */ |
| 1031 | mlog(ML_ERROR, "Write error %d on device \"%s\"\n", | 1051 | mlog(ML_ERROR, "Write error %d on device \"%s\"\n", |
| 1032 | write_wc.wc_error, reg->hr_dev_name); | 1052 | write_wc.wc_error, reg->hr_dev_name); |
| 1033 | return write_wc.wc_error; | 1053 | ret = write_wc.wc_error; |
| 1054 | goto bail; | ||
| 1034 | } | 1055 | } |
| 1035 | 1056 | ||
| 1036 | o2hb_arm_write_timeout(reg); | 1057 | /* Skip disarming the timeout if own slot has stale/bad data */ |
| 1058 | if (own_slot_ok) { | ||
| 1059 | o2hb_set_quorum_device(reg); | ||
| 1060 | o2hb_arm_write_timeout(reg); | ||
| 1061 | } | ||
| 1037 | 1062 | ||
| 1063 | bail: | ||
| 1038 | /* let the person who launched us know when things are steady */ | 1064 | /* let the person who launched us know when things are steady */ |
| 1039 | if (!change && (atomic_read(®->hr_steady_iterations) != 0)) { | 1065 | if (atomic_read(®->hr_steady_iterations) != 0) { |
| 1040 | if (atomic_dec_and_test(®->hr_steady_iterations)) | 1066 | if (!ret && own_slot_ok && !membership_change) { |
| 1067 | if (atomic_dec_and_test(®->hr_steady_iterations)) | ||
| 1068 | wake_up(&o2hb_steady_queue); | ||
| 1069 | } | ||
| 1070 | } | ||
| 1071 | |||
| 1072 | if (atomic_read(®->hr_steady_iterations) != 0) { | ||
| 1073 | if (atomic_dec_and_test(®->hr_unsteady_iterations)) { | ||
| 1074 | printk(KERN_NOTICE "o2hb: Unable to stabilize " | ||
| 1075 | "heartbeart on region %s (%s)\n", | ||
| 1076 | config_item_name(®->hr_item), | ||
| 1077 | reg->hr_dev_name); | ||
| 1078 | atomic_set(®->hr_steady_iterations, 0); | ||
| 1079 | reg->hr_aborted_start = 1; | ||
| 1041 | wake_up(&o2hb_steady_queue); | 1080 | wake_up(&o2hb_steady_queue); |
| 1081 | ret = -EIO; | ||
| 1082 | } | ||
| 1042 | } | 1083 | } |
| 1043 | 1084 | ||
| 1044 | return 0; | 1085 | return ret; |
| 1045 | } | 1086 | } |
| 1046 | 1087 | ||
| 1047 | /* Subtract b from a, storing the result in a. a *must* have a larger | 1088 | /* Subtract b from a, storing the result in a. a *must* have a larger |
| @@ -1095,7 +1136,8 @@ static int o2hb_thread(void *data) | |||
| 1095 | /* Pin node */ | 1136 | /* Pin node */ |
| 1096 | o2nm_depend_this_node(); | 1137 | o2nm_depend_this_node(); |
| 1097 | 1138 | ||
| 1098 | while (!kthread_should_stop() && !reg->hr_unclean_stop) { | 1139 | while (!kthread_should_stop() && |
| 1140 | !reg->hr_unclean_stop && !reg->hr_aborted_start) { | ||
| 1099 | /* We track the time spent inside | 1141 | /* We track the time spent inside |
| 1100 | * o2hb_do_disk_heartbeat so that we avoid more than | 1142 | * o2hb_do_disk_heartbeat so that we avoid more than |
| 1101 | * hr_timeout_ms between disk writes. On busy systems | 1143 | * hr_timeout_ms between disk writes. On busy systems |
| @@ -1103,10 +1145,7 @@ static int o2hb_thread(void *data) | |||
| 1103 | * likely to time itself out. */ | 1145 | * likely to time itself out. */ |
| 1104 | do_gettimeofday(&before_hb); | 1146 | do_gettimeofday(&before_hb); |
| 1105 | 1147 | ||
| 1106 | i = 0; | 1148 | ret = o2hb_do_disk_heartbeat(reg); |
| 1107 | do { | ||
| 1108 | ret = o2hb_do_disk_heartbeat(reg); | ||
| 1109 | } while (ret && ++i < 2); | ||
| 1110 | 1149 | ||
| 1111 | do_gettimeofday(&after_hb); | 1150 | do_gettimeofday(&after_hb); |
| 1112 | elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); | 1151 | elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); |
| @@ -1117,7 +1156,8 @@ static int o2hb_thread(void *data) | |||
| 1117 | after_hb.tv_sec, (unsigned long) after_hb.tv_usec, | 1156 | after_hb.tv_sec, (unsigned long) after_hb.tv_usec, |
| 1118 | elapsed_msec); | 1157 | elapsed_msec); |
| 1119 | 1158 | ||
| 1120 | if (elapsed_msec < reg->hr_timeout_ms) { | 1159 | if (!kthread_should_stop() && |
| 1160 | elapsed_msec < reg->hr_timeout_ms) { | ||
| 1121 | /* the kthread api has blocked signals for us so no | 1161 | /* the kthread api has blocked signals for us so no |
| 1122 | * need to record the return value. */ | 1162 | * need to record the return value. */ |
| 1123 | msleep_interruptible(reg->hr_timeout_ms - elapsed_msec); | 1163 | msleep_interruptible(reg->hr_timeout_ms - elapsed_msec); |
| @@ -1134,20 +1174,20 @@ static int o2hb_thread(void *data) | |||
| 1134 | * to timeout on this region when we could just as easily | 1174 | * to timeout on this region when we could just as easily |
| 1135 | * write a clear generation - thus indicating to them that | 1175 | * write a clear generation - thus indicating to them that |
| 1136 | * this node has left this region. | 1176 | * this node has left this region. |
| 1137 | * | 1177 | */ |
| 1138 | * XXX: Should we skip this on unclean_stop? */ | 1178 | if (!reg->hr_unclean_stop && !reg->hr_aborted_start) { |
| 1139 | o2hb_prepare_block(reg, 0); | 1179 | o2hb_prepare_block(reg, 0); |
| 1140 | ret = o2hb_issue_node_write(reg, &write_wc); | 1180 | ret = o2hb_issue_node_write(reg, &write_wc); |
| 1141 | if (ret == 0) { | 1181 | if (ret == 0) |
| 1142 | o2hb_wait_on_io(reg, &write_wc); | 1182 | o2hb_wait_on_io(reg, &write_wc); |
| 1143 | } else { | 1183 | else |
| 1144 | mlog_errno(ret); | 1184 | mlog_errno(ret); |
| 1145 | } | 1185 | } |
| 1146 | 1186 | ||
| 1147 | /* Unpin node */ | 1187 | /* Unpin node */ |
| 1148 | o2nm_undepend_this_node(); | 1188 | o2nm_undepend_this_node(); |
| 1149 | 1189 | ||
| 1150 | mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n"); | 1190 | mlog(ML_HEARTBEAT|ML_KTHREAD, "o2hb thread exiting\n"); |
| 1151 | 1191 | ||
| 1152 | return 0; | 1192 | return 0; |
| 1153 | } | 1193 | } |
| @@ -1158,6 +1198,7 @@ static int o2hb_debug_open(struct inode *inode, struct file *file) | |||
| 1158 | struct o2hb_debug_buf *db = inode->i_private; | 1198 | struct o2hb_debug_buf *db = inode->i_private; |
| 1159 | struct o2hb_region *reg; | 1199 | struct o2hb_region *reg; |
| 1160 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 1200 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 1201 | unsigned long lts; | ||
| 1161 | char *buf = NULL; | 1202 | char *buf = NULL; |
| 1162 | int i = -1; | 1203 | int i = -1; |
| 1163 | int out = 0; | 1204 | int out = 0; |
| @@ -1194,9 +1235,11 @@ static int o2hb_debug_open(struct inode *inode, struct file *file) | |||
| 1194 | 1235 | ||
| 1195 | case O2HB_DB_TYPE_REGION_ELAPSED_TIME: | 1236 | case O2HB_DB_TYPE_REGION_ELAPSED_TIME: |
| 1196 | reg = (struct o2hb_region *)db->db_data; | 1237 | reg = (struct o2hb_region *)db->db_data; |
| 1197 | out += snprintf(buf + out, PAGE_SIZE - out, "%u\n", | 1238 | lts = reg->hr_last_timeout_start; |
| 1198 | jiffies_to_msecs(jiffies - | 1239 | /* If 0, it has never been set before */ |
| 1199 | reg->hr_last_timeout_start)); | 1240 | if (lts) |
| 1241 | lts = jiffies_to_msecs(jiffies - lts); | ||
| 1242 | out += snprintf(buf + out, PAGE_SIZE - out, "%lu\n", lts); | ||
| 1200 | goto done; | 1243 | goto done; |
| 1201 | 1244 | ||
| 1202 | case O2HB_DB_TYPE_REGION_PINNED: | 1245 | case O2HB_DB_TYPE_REGION_PINNED: |
| @@ -1426,6 +1469,8 @@ static void o2hb_region_release(struct config_item *item) | |||
| 1426 | struct page *page; | 1469 | struct page *page; |
| 1427 | struct o2hb_region *reg = to_o2hb_region(item); | 1470 | struct o2hb_region *reg = to_o2hb_region(item); |
| 1428 | 1471 | ||
| 1472 | mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name); | ||
| 1473 | |||
| 1429 | if (reg->hr_tmp_block) | 1474 | if (reg->hr_tmp_block) |
| 1430 | kfree(reg->hr_tmp_block); | 1475 | kfree(reg->hr_tmp_block); |
| 1431 | 1476 | ||
| @@ -1792,7 +1837,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
| 1792 | live_threshold <<= 1; | 1837 | live_threshold <<= 1; |
| 1793 | spin_unlock(&o2hb_live_lock); | 1838 | spin_unlock(&o2hb_live_lock); |
| 1794 | } | 1839 | } |
| 1795 | atomic_set(®->hr_steady_iterations, live_threshold + 1); | 1840 | ++live_threshold; |
| 1841 | atomic_set(®->hr_steady_iterations, live_threshold); | ||
| 1842 | /* unsteady_iterations is double the steady_iterations */ | ||
| 1843 | atomic_set(®->hr_unsteady_iterations, (live_threshold << 1)); | ||
| 1796 | 1844 | ||
| 1797 | hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", | 1845 | hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", |
| 1798 | reg->hr_item.ci_name); | 1846 | reg->hr_item.ci_name); |
| @@ -1809,14 +1857,12 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
| 1809 | ret = wait_event_interruptible(o2hb_steady_queue, | 1857 | ret = wait_event_interruptible(o2hb_steady_queue, |
| 1810 | atomic_read(®->hr_steady_iterations) == 0); | 1858 | atomic_read(®->hr_steady_iterations) == 0); |
| 1811 | if (ret) { | 1859 | if (ret) { |
| 1812 | /* We got interrupted (hello ptrace!). Clean up */ | 1860 | atomic_set(®->hr_steady_iterations, 0); |
| 1813 | spin_lock(&o2hb_live_lock); | 1861 | reg->hr_aborted_start = 1; |
| 1814 | hb_task = reg->hr_task; | 1862 | } |
| 1815 | reg->hr_task = NULL; | ||
| 1816 | spin_unlock(&o2hb_live_lock); | ||
| 1817 | 1863 | ||
| 1818 | if (hb_task) | 1864 | if (reg->hr_aborted_start) { |
| 1819 | kthread_stop(hb_task); | 1865 | ret = -EIO; |
| 1820 | goto out; | 1866 | goto out; |
| 1821 | } | 1867 | } |
| 1822 | 1868 | ||
| @@ -1833,8 +1879,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
| 1833 | ret = -EIO; | 1879 | ret = -EIO; |
| 1834 | 1880 | ||
| 1835 | if (hb_task && o2hb_global_heartbeat_active()) | 1881 | if (hb_task && o2hb_global_heartbeat_active()) |
| 1836 | printk(KERN_NOTICE "o2hb: Heartbeat started on region %s\n", | 1882 | printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", |
| 1837 | config_item_name(®->hr_item)); | 1883 | config_item_name(®->hr_item), reg->hr_dev_name); |
| 1838 | 1884 | ||
| 1839 | out: | 1885 | out: |
| 1840 | if (filp) | 1886 | if (filp) |
| @@ -2092,13 +2138,6 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
| 2092 | 2138 | ||
| 2093 | /* stop the thread when the user removes the region dir */ | 2139 | /* stop the thread when the user removes the region dir */ |
| 2094 | spin_lock(&o2hb_live_lock); | 2140 | spin_lock(&o2hb_live_lock); |
| 2095 | if (o2hb_global_heartbeat_active()) { | ||
| 2096 | clear_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
| 2097 | clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
| 2098 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
| 2099 | quorum_region = 1; | ||
| 2100 | clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); | ||
| 2101 | } | ||
| 2102 | hb_task = reg->hr_task; | 2141 | hb_task = reg->hr_task; |
| 2103 | reg->hr_task = NULL; | 2142 | reg->hr_task = NULL; |
| 2104 | reg->hr_item_dropped = 1; | 2143 | reg->hr_item_dropped = 1; |
| @@ -2107,19 +2146,30 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
| 2107 | if (hb_task) | 2146 | if (hb_task) |
| 2108 | kthread_stop(hb_task); | 2147 | kthread_stop(hb_task); |
| 2109 | 2148 | ||
| 2149 | if (o2hb_global_heartbeat_active()) { | ||
| 2150 | spin_lock(&o2hb_live_lock); | ||
| 2151 | clear_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
| 2152 | clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
| 2153 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
| 2154 | quorum_region = 1; | ||
| 2155 | clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); | ||
| 2156 | spin_unlock(&o2hb_live_lock); | ||
| 2157 | printk(KERN_NOTICE "o2hb: Heartbeat %s on region %s (%s)\n", | ||
| 2158 | ((atomic_read(®->hr_steady_iterations) == 0) ? | ||
| 2159 | "stopped" : "start aborted"), config_item_name(item), | ||
| 2160 | reg->hr_dev_name); | ||
| 2161 | } | ||
| 2162 | |||
| 2110 | /* | 2163 | /* |
| 2111 | * If we're racing a dev_write(), we need to wake them. They will | 2164 | * If we're racing a dev_write(), we need to wake them. They will |
| 2112 | * check reg->hr_task | 2165 | * check reg->hr_task |
| 2113 | */ | 2166 | */ |
| 2114 | if (atomic_read(®->hr_steady_iterations) != 0) { | 2167 | if (atomic_read(®->hr_steady_iterations) != 0) { |
| 2168 | reg->hr_aborted_start = 1; | ||
| 2115 | atomic_set(®->hr_steady_iterations, 0); | 2169 | atomic_set(®->hr_steady_iterations, 0); |
| 2116 | wake_up(&o2hb_steady_queue); | 2170 | wake_up(&o2hb_steady_queue); |
| 2117 | } | 2171 | } |
| 2118 | 2172 | ||
| 2119 | if (o2hb_global_heartbeat_active()) | ||
| 2120 | printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", | ||
| 2121 | config_item_name(®->hr_item)); | ||
| 2122 | |||
| 2123 | config_item_put(item); | 2173 | config_item_put(item); |
| 2124 | 2174 | ||
| 2125 | if (!o2hb_global_heartbeat_active() || !quorum_region) | 2175 | if (!o2hb_global_heartbeat_active() || !quorum_region) |
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index 3a5835904b3d..dc45deb19e68 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c | |||
| @@ -47,6 +47,7 @@ | |||
| 47 | #define SC_DEBUG_NAME "sock_containers" | 47 | #define SC_DEBUG_NAME "sock_containers" |
| 48 | #define NST_DEBUG_NAME "send_tracking" | 48 | #define NST_DEBUG_NAME "send_tracking" |
| 49 | #define STATS_DEBUG_NAME "stats" | 49 | #define STATS_DEBUG_NAME "stats" |
| 50 | #define NODES_DEBUG_NAME "connected_nodes" | ||
| 50 | 51 | ||
| 51 | #define SHOW_SOCK_CONTAINERS 0 | 52 | #define SHOW_SOCK_CONTAINERS 0 |
| 52 | #define SHOW_SOCK_STATS 1 | 53 | #define SHOW_SOCK_STATS 1 |
| @@ -55,6 +56,7 @@ static struct dentry *o2net_dentry; | |||
| 55 | static struct dentry *sc_dentry; | 56 | static struct dentry *sc_dentry; |
| 56 | static struct dentry *nst_dentry; | 57 | static struct dentry *nst_dentry; |
| 57 | static struct dentry *stats_dentry; | 58 | static struct dentry *stats_dentry; |
| 59 | static struct dentry *nodes_dentry; | ||
| 58 | 60 | ||
| 59 | static DEFINE_SPINLOCK(o2net_debug_lock); | 61 | static DEFINE_SPINLOCK(o2net_debug_lock); |
| 60 | 62 | ||
| @@ -491,53 +493,87 @@ static const struct file_operations sc_seq_fops = { | |||
| 491 | .release = sc_fop_release, | 493 | .release = sc_fop_release, |
| 492 | }; | 494 | }; |
| 493 | 495 | ||
| 494 | int o2net_debugfs_init(void) | 496 | static int o2net_fill_bitmap(char *buf, int len) |
| 495 | { | 497 | { |
| 496 | o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); | 498 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 497 | if (!o2net_dentry) { | 499 | int i = -1, out = 0; |
| 498 | mlog_errno(-ENOMEM); | ||
| 499 | goto bail; | ||
| 500 | } | ||
| 501 | 500 | ||
| 502 | nst_dentry = debugfs_create_file(NST_DEBUG_NAME, S_IFREG|S_IRUSR, | 501 | o2net_fill_node_map(map, sizeof(map)); |
| 503 | o2net_dentry, NULL, | ||
| 504 | &nst_seq_fops); | ||
| 505 | if (!nst_dentry) { | ||
| 506 | mlog_errno(-ENOMEM); | ||
| 507 | goto bail; | ||
| 508 | } | ||
| 509 | 502 | ||
| 510 | sc_dentry = debugfs_create_file(SC_DEBUG_NAME, S_IFREG|S_IRUSR, | 503 | while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) |
| 511 | o2net_dentry, NULL, | 504 | out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); |
| 512 | &sc_seq_fops); | 505 | out += snprintf(buf + out, PAGE_SIZE - out, "\n"); |
| 513 | if (!sc_dentry) { | ||
| 514 | mlog_errno(-ENOMEM); | ||
| 515 | goto bail; | ||
| 516 | } | ||
| 517 | 506 | ||
| 518 | stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, S_IFREG|S_IRUSR, | 507 | return out; |
| 519 | o2net_dentry, NULL, | 508 | } |
| 520 | &stats_seq_fops); | 509 | |
| 521 | if (!stats_dentry) { | 510 | static int nodes_fop_open(struct inode *inode, struct file *file) |
| 522 | mlog_errno(-ENOMEM); | 511 | { |
| 523 | goto bail; | 512 | char *buf; |
| 524 | } | 513 | |
| 514 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
| 515 | if (!buf) | ||
| 516 | return -ENOMEM; | ||
| 517 | |||
| 518 | i_size_write(inode, o2net_fill_bitmap(buf, PAGE_SIZE)); | ||
| 519 | |||
| 520 | file->private_data = buf; | ||
| 525 | 521 | ||
| 526 | return 0; | 522 | return 0; |
| 527 | bail: | ||
| 528 | debugfs_remove(stats_dentry); | ||
| 529 | debugfs_remove(sc_dentry); | ||
| 530 | debugfs_remove(nst_dentry); | ||
| 531 | debugfs_remove(o2net_dentry); | ||
| 532 | return -ENOMEM; | ||
| 533 | } | 523 | } |
| 534 | 524 | ||
| 525 | static int o2net_debug_release(struct inode *inode, struct file *file) | ||
| 526 | { | ||
| 527 | kfree(file->private_data); | ||
| 528 | return 0; | ||
| 529 | } | ||
| 530 | |||
| 531 | static ssize_t o2net_debug_read(struct file *file, char __user *buf, | ||
| 532 | size_t nbytes, loff_t *ppos) | ||
| 533 | { | ||
| 534 | return simple_read_from_buffer(buf, nbytes, ppos, file->private_data, | ||
| 535 | i_size_read(file->f_mapping->host)); | ||
| 536 | } | ||
| 537 | |||
| 538 | static const struct file_operations nodes_fops = { | ||
| 539 | .open = nodes_fop_open, | ||
| 540 | .release = o2net_debug_release, | ||
| 541 | .read = o2net_debug_read, | ||
| 542 | .llseek = generic_file_llseek, | ||
| 543 | }; | ||
| 544 | |||
| 535 | void o2net_debugfs_exit(void) | 545 | void o2net_debugfs_exit(void) |
| 536 | { | 546 | { |
| 547 | debugfs_remove(nodes_dentry); | ||
| 537 | debugfs_remove(stats_dentry); | 548 | debugfs_remove(stats_dentry); |
| 538 | debugfs_remove(sc_dentry); | 549 | debugfs_remove(sc_dentry); |
| 539 | debugfs_remove(nst_dentry); | 550 | debugfs_remove(nst_dentry); |
| 540 | debugfs_remove(o2net_dentry); | 551 | debugfs_remove(o2net_dentry); |
| 541 | } | 552 | } |
| 542 | 553 | ||
| 554 | int o2net_debugfs_init(void) | ||
| 555 | { | ||
| 556 | mode_t mode = S_IFREG|S_IRUSR; | ||
| 557 | |||
| 558 | o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); | ||
| 559 | if (o2net_dentry) | ||
| 560 | nst_dentry = debugfs_create_file(NST_DEBUG_NAME, mode, | ||
| 561 | o2net_dentry, NULL, &nst_seq_fops); | ||
| 562 | if (nst_dentry) | ||
| 563 | sc_dentry = debugfs_create_file(SC_DEBUG_NAME, mode, | ||
| 564 | o2net_dentry, NULL, &sc_seq_fops); | ||
| 565 | if (sc_dentry) | ||
| 566 | stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, mode, | ||
| 567 | o2net_dentry, NULL, &stats_seq_fops); | ||
| 568 | if (stats_dentry) | ||
| 569 | nodes_dentry = debugfs_create_file(NODES_DEBUG_NAME, mode, | ||
| 570 | o2net_dentry, NULL, &nodes_fops); | ||
| 571 | if (nodes_dentry) | ||
| 572 | return 0; | ||
| 573 | |||
| 574 | o2net_debugfs_exit(); | ||
| 575 | mlog_errno(-ENOMEM); | ||
| 576 | return -ENOMEM; | ||
| 577 | } | ||
| 578 | |||
| 543 | #endif /* CONFIG_DEBUG_FS */ | 579 | #endif /* CONFIG_DEBUG_FS */ |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index ad7d0c155de4..044e7b58d31c 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
| @@ -546,7 +546,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
| 546 | } | 546 | } |
| 547 | 547 | ||
| 548 | if (was_valid && !valid) { | 548 | if (was_valid && !valid) { |
| 549 | printk(KERN_NOTICE "o2net: no longer connected to " | 549 | printk(KERN_NOTICE "o2net: No longer connected to " |
| 550 | SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); | 550 | SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); |
| 551 | o2net_complete_nodes_nsw(nn); | 551 | o2net_complete_nodes_nsw(nn); |
| 552 | } | 552 | } |
| @@ -556,7 +556,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, | |||
| 556 | cancel_delayed_work(&nn->nn_connect_expired); | 556 | cancel_delayed_work(&nn->nn_connect_expired); |
| 557 | printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n", | 557 | printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n", |
| 558 | o2nm_this_node() > sc->sc_node->nd_num ? | 558 | o2nm_this_node() > sc->sc_node->nd_num ? |
| 559 | "connected to" : "accepted connection from", | 559 | "Connected to" : "Accepted connection from", |
| 560 | SC_NODEF_ARGS(sc)); | 560 | SC_NODEF_ARGS(sc)); |
| 561 | } | 561 | } |
| 562 | 562 | ||
| @@ -644,7 +644,7 @@ static void o2net_state_change(struct sock *sk) | |||
| 644 | o2net_sc_queue_work(sc, &sc->sc_connect_work); | 644 | o2net_sc_queue_work(sc, &sc->sc_connect_work); |
| 645 | break; | 645 | break; |
| 646 | default: | 646 | default: |
| 647 | printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT | 647 | printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT |
| 648 | " shutdown, state %d\n", | 648 | " shutdown, state %d\n", |
| 649 | SC_NODEF_ARGS(sc), sk->sk_state); | 649 | SC_NODEF_ARGS(sc), sk->sk_state); |
| 650 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); | 650 | o2net_sc_queue_work(sc, &sc->sc_shutdown_work); |
| @@ -1035,6 +1035,25 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, | |||
| 1035 | return ret; | 1035 | return ret; |
| 1036 | } | 1036 | } |
| 1037 | 1037 | ||
| 1038 | /* Get a map of all nodes to which this node is currently connected to */ | ||
| 1039 | void o2net_fill_node_map(unsigned long *map, unsigned bytes) | ||
| 1040 | { | ||
| 1041 | struct o2net_sock_container *sc; | ||
| 1042 | int node, ret; | ||
| 1043 | |||
| 1044 | BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long))); | ||
| 1045 | |||
| 1046 | memset(map, 0, bytes); | ||
| 1047 | for (node = 0; node < O2NM_MAX_NODES; ++node) { | ||
| 1048 | o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret); | ||
| 1049 | if (!ret) { | ||
| 1050 | set_bit(node, map); | ||
| 1051 | sc_put(sc); | ||
| 1052 | } | ||
| 1053 | } | ||
| 1054 | } | ||
| 1055 | EXPORT_SYMBOL_GPL(o2net_fill_node_map); | ||
| 1056 | |||
| 1038 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | 1057 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, |
| 1039 | size_t caller_veclen, u8 target_node, int *status) | 1058 | size_t caller_veclen, u8 target_node, int *status) |
| 1040 | { | 1059 | { |
| @@ -1285,11 +1304,11 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
| 1285 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); | 1304 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); |
| 1286 | 1305 | ||
| 1287 | if (hand->protocol_version != cpu_to_be64(O2NET_PROTOCOL_VERSION)) { | 1306 | if (hand->protocol_version != cpu_to_be64(O2NET_PROTOCOL_VERSION)) { |
| 1288 | mlog(ML_NOTICE, SC_NODEF_FMT " advertised net protocol " | 1307 | printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " Advertised net " |
| 1289 | "version %llu but %llu is required, disconnecting\n", | 1308 | "protocol version %llu but %llu is required. " |
| 1290 | SC_NODEF_ARGS(sc), | 1309 | "Disconnecting.\n", SC_NODEF_ARGS(sc), |
| 1291 | (unsigned long long)be64_to_cpu(hand->protocol_version), | 1310 | (unsigned long long)be64_to_cpu(hand->protocol_version), |
| 1292 | O2NET_PROTOCOL_VERSION); | 1311 | O2NET_PROTOCOL_VERSION); |
| 1293 | 1312 | ||
| 1294 | /* don't bother reconnecting if its the wrong version. */ | 1313 | /* don't bother reconnecting if its the wrong version. */ |
| 1295 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1314 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
| @@ -1303,33 +1322,33 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) | |||
| 1303 | */ | 1322 | */ |
| 1304 | if (be32_to_cpu(hand->o2net_idle_timeout_ms) != | 1323 | if (be32_to_cpu(hand->o2net_idle_timeout_ms) != |
| 1305 | o2net_idle_timeout()) { | 1324 | o2net_idle_timeout()) { |
| 1306 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " | 1325 | printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a network " |
| 1307 | "%u ms, but we use %u ms locally. disconnecting\n", | 1326 | "idle timeout of %u ms, but we use %u ms locally. " |
| 1308 | SC_NODEF_ARGS(sc), | 1327 | "Disconnecting.\n", SC_NODEF_ARGS(sc), |
| 1309 | be32_to_cpu(hand->o2net_idle_timeout_ms), | 1328 | be32_to_cpu(hand->o2net_idle_timeout_ms), |
| 1310 | o2net_idle_timeout()); | 1329 | o2net_idle_timeout()); |
| 1311 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1330 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
| 1312 | return -1; | 1331 | return -1; |
| 1313 | } | 1332 | } |
| 1314 | 1333 | ||
| 1315 | if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != | 1334 | if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != |
| 1316 | o2net_keepalive_delay()) { | 1335 | o2net_keepalive_delay()) { |
| 1317 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " | 1336 | printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a keepalive " |
| 1318 | "%u ms, but we use %u ms locally. disconnecting\n", | 1337 | "delay of %u ms, but we use %u ms locally. " |
| 1319 | SC_NODEF_ARGS(sc), | 1338 | "Disconnecting.\n", SC_NODEF_ARGS(sc), |
| 1320 | be32_to_cpu(hand->o2net_keepalive_delay_ms), | 1339 | be32_to_cpu(hand->o2net_keepalive_delay_ms), |
| 1321 | o2net_keepalive_delay()); | 1340 | o2net_keepalive_delay()); |
| 1322 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1341 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
| 1323 | return -1; | 1342 | return -1; |
| 1324 | } | 1343 | } |
| 1325 | 1344 | ||
| 1326 | if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != | 1345 | if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != |
| 1327 | O2HB_MAX_WRITE_TIMEOUT_MS) { | 1346 | O2HB_MAX_WRITE_TIMEOUT_MS) { |
| 1328 | mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of " | 1347 | printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a heartbeat " |
| 1329 | "%u ms, but we use %u ms locally. disconnecting\n", | 1348 | "timeout of %u ms, but we use %u ms locally. " |
| 1330 | SC_NODEF_ARGS(sc), | 1349 | "Disconnecting.\n", SC_NODEF_ARGS(sc), |
| 1331 | be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), | 1350 | be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), |
| 1332 | O2HB_MAX_WRITE_TIMEOUT_MS); | 1351 | O2HB_MAX_WRITE_TIMEOUT_MS); |
| 1333 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); | 1352 | o2net_ensure_shutdown(nn, sc, -ENOTCONN); |
| 1334 | return -1; | 1353 | return -1; |
| 1335 | } | 1354 | } |
| @@ -1540,28 +1559,16 @@ static void o2net_idle_timer(unsigned long data) | |||
| 1540 | { | 1559 | { |
| 1541 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; | 1560 | struct o2net_sock_container *sc = (struct o2net_sock_container *)data; |
| 1542 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); | 1561 | struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); |
| 1543 | |||
| 1544 | #ifdef CONFIG_DEBUG_FS | 1562 | #ifdef CONFIG_DEBUG_FS |
| 1545 | ktime_t now = ktime_get(); | 1563 | unsigned long msecs = ktime_to_ms(ktime_get()) - |
| 1564 | ktime_to_ms(sc->sc_tv_timer); | ||
| 1565 | #else | ||
| 1566 | unsigned long msecs = o2net_idle_timeout(); | ||
| 1546 | #endif | 1567 | #endif |
| 1547 | 1568 | ||
| 1548 | printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " | 1569 | printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been " |
| 1549 | "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), | 1570 | "idle for %lu.%lu secs, shutting it down.\n", SC_NODEF_ARGS(sc), |
| 1550 | o2net_idle_timeout() / 1000, | 1571 | msecs / 1000, msecs % 1000); |
| 1551 | o2net_idle_timeout() % 1000); | ||
| 1552 | |||
| 1553 | #ifdef CONFIG_DEBUG_FS | ||
| 1554 | mlog(ML_NOTICE, "Here are some times that might help debug the " | ||
| 1555 | "situation: (Timer: %lld, Now %lld, DataReady %lld, Advance %lld-%lld, " | ||
| 1556 | "Key 0x%08x, Func %u, FuncTime %lld-%lld)\n", | ||
| 1557 | (long long)ktime_to_us(sc->sc_tv_timer), (long long)ktime_to_us(now), | ||
| 1558 | (long long)ktime_to_us(sc->sc_tv_data_ready), | ||
| 1559 | (long long)ktime_to_us(sc->sc_tv_advance_start), | ||
| 1560 | (long long)ktime_to_us(sc->sc_tv_advance_stop), | ||
| 1561 | sc->sc_msg_key, sc->sc_msg_type, | ||
| 1562 | (long long)ktime_to_us(sc->sc_tv_func_start), | ||
| 1563 | (long long)ktime_to_us(sc->sc_tv_func_stop)); | ||
| 1564 | #endif | ||
| 1565 | 1572 | ||
| 1566 | /* | 1573 | /* |
| 1567 | * Initialize the nn_timeout so that the next connection attempt | 1574 | * Initialize the nn_timeout so that the next connection attempt |
| @@ -1694,8 +1701,8 @@ static void o2net_start_connect(struct work_struct *work) | |||
| 1694 | 1701 | ||
| 1695 | out: | 1702 | out: |
| 1696 | if (ret) { | 1703 | if (ret) { |
| 1697 | mlog(ML_NOTICE, "connect attempt to " SC_NODEF_FMT " failed " | 1704 | printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT |
| 1698 | "with errno %d\n", SC_NODEF_ARGS(sc), ret); | 1705 | " failed with errno %d\n", SC_NODEF_ARGS(sc), ret); |
| 1699 | /* 0 err so that another will be queued and attempted | 1706 | /* 0 err so that another will be queued and attempted |
| 1700 | * from set_nn_state */ | 1707 | * from set_nn_state */ |
| 1701 | if (sc) | 1708 | if (sc) |
| @@ -1718,8 +1725,8 @@ static void o2net_connect_expired(struct work_struct *work) | |||
| 1718 | 1725 | ||
| 1719 | spin_lock(&nn->nn_lock); | 1726 | spin_lock(&nn->nn_lock); |
| 1720 | if (!nn->nn_sc_valid) { | 1727 | if (!nn->nn_sc_valid) { |
| 1721 | mlog(ML_ERROR, "no connection established with node %u after " | 1728 | printk(KERN_NOTICE "o2net: No connection established with " |
| 1722 | "%u.%u seconds, giving up and returning errors.\n", | 1729 | "node %u after %u.%u seconds, giving up.\n", |
| 1723 | o2net_num_from_nn(nn), | 1730 | o2net_num_from_nn(nn), |
| 1724 | o2net_idle_timeout() / 1000, | 1731 | o2net_idle_timeout() / 1000, |
| 1725 | o2net_idle_timeout() % 1000); | 1732 | o2net_idle_timeout() % 1000); |
| @@ -1862,21 +1869,21 @@ static int o2net_accept_one(struct socket *sock) | |||
| 1862 | 1869 | ||
| 1863 | node = o2nm_get_node_by_ip(sin.sin_addr.s_addr); | 1870 | node = o2nm_get_node_by_ip(sin.sin_addr.s_addr); |
| 1864 | if (node == NULL) { | 1871 | if (node == NULL) { |
| 1865 | mlog(ML_NOTICE, "attempt to connect from unknown node at %pI4:%d\n", | 1872 | printk(KERN_NOTICE "o2net: Attempt to connect from unknown " |
| 1866 | &sin.sin_addr.s_addr, ntohs(sin.sin_port)); | 1873 | "node at %pI4:%d\n", &sin.sin_addr.s_addr, |
| 1874 | ntohs(sin.sin_port)); | ||
| 1867 | ret = -EINVAL; | 1875 | ret = -EINVAL; |
| 1868 | goto out; | 1876 | goto out; |
| 1869 | } | 1877 | } |
| 1870 | 1878 | ||
| 1871 | if (o2nm_this_node() >= node->nd_num) { | 1879 | if (o2nm_this_node() >= node->nd_num) { |
| 1872 | local_node = o2nm_get_node_by_num(o2nm_this_node()); | 1880 | local_node = o2nm_get_node_by_num(o2nm_this_node()); |
| 1873 | mlog(ML_NOTICE, "unexpected connect attempt seen at node '%s' (" | 1881 | printk(KERN_NOTICE "o2net: Unexpected connect attempt seen " |
| 1874 | "%u, %pI4:%d) from node '%s' (%u, %pI4:%d)\n", | 1882 | "at node '%s' (%u, %pI4:%d) from node '%s' (%u, " |
| 1875 | local_node->nd_name, local_node->nd_num, | 1883 | "%pI4:%d)\n", local_node->nd_name, local_node->nd_num, |
| 1876 | &(local_node->nd_ipv4_address), | 1884 | &(local_node->nd_ipv4_address), |
| 1877 | ntohs(local_node->nd_ipv4_port), | 1885 | ntohs(local_node->nd_ipv4_port), node->nd_name, |
| 1878 | node->nd_name, node->nd_num, &sin.sin_addr.s_addr, | 1886 | node->nd_num, &sin.sin_addr.s_addr, ntohs(sin.sin_port)); |
| 1879 | ntohs(sin.sin_port)); | ||
| 1880 | ret = -EINVAL; | 1887 | ret = -EINVAL; |
| 1881 | goto out; | 1888 | goto out; |
| 1882 | } | 1889 | } |
| @@ -1901,10 +1908,10 @@ static int o2net_accept_one(struct socket *sock) | |||
| 1901 | ret = 0; | 1908 | ret = 0; |
| 1902 | spin_unlock(&nn->nn_lock); | 1909 | spin_unlock(&nn->nn_lock); |
| 1903 | if (ret) { | 1910 | if (ret) { |
| 1904 | mlog(ML_NOTICE, "attempt to connect from node '%s' at " | 1911 | printk(KERN_NOTICE "o2net: Attempt to connect from node '%s' " |
| 1905 | "%pI4:%d but it already has an open connection\n", | 1912 | "at %pI4:%d but it already has an open connection\n", |
| 1906 | node->nd_name, &sin.sin_addr.s_addr, | 1913 | node->nd_name, &sin.sin_addr.s_addr, |
| 1907 | ntohs(sin.sin_port)); | 1914 | ntohs(sin.sin_port)); |
| 1908 | goto out; | 1915 | goto out; |
| 1909 | } | 1916 | } |
| 1910 | 1917 | ||
| @@ -1984,7 +1991,7 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port) | |||
| 1984 | 1991 | ||
| 1985 | ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); | 1992 | ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); |
| 1986 | if (ret < 0) { | 1993 | if (ret < 0) { |
| 1987 | mlog(ML_ERROR, "unable to create socket, ret=%d\n", ret); | 1994 | printk(KERN_ERR "o2net: Error %d while creating socket\n", ret); |
| 1988 | goto out; | 1995 | goto out; |
| 1989 | } | 1996 | } |
| 1990 | 1997 | ||
| @@ -2001,16 +2008,15 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port) | |||
| 2001 | sock->sk->sk_reuse = 1; | 2008 | sock->sk->sk_reuse = 1; |
| 2002 | ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); | 2009 | ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); |
| 2003 | if (ret < 0) { | 2010 | if (ret < 0) { |
| 2004 | mlog(ML_ERROR, "unable to bind socket at %pI4:%u, " | 2011 | printk(KERN_ERR "o2net: Error %d while binding socket at " |
| 2005 | "ret=%d\n", &addr, ntohs(port), ret); | 2012 | "%pI4:%u\n", ret, &addr, ntohs(port)); |
| 2006 | goto out; | 2013 | goto out; |
| 2007 | } | 2014 | } |
| 2008 | 2015 | ||
| 2009 | ret = sock->ops->listen(sock, 64); | 2016 | ret = sock->ops->listen(sock, 64); |
| 2010 | if (ret < 0) { | 2017 | if (ret < 0) |
| 2011 | mlog(ML_ERROR, "unable to listen on %pI4:%u, ret=%d\n", | 2018 | printk(KERN_ERR "o2net: Error %d while listening on %pI4:%u\n", |
| 2012 | &addr, ntohs(port), ret); | 2019 | ret, &addr, ntohs(port)); |
| 2013 | } | ||
| 2014 | 2020 | ||
| 2015 | out: | 2021 | out: |
| 2016 | if (ret) { | 2022 | if (ret) { |
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h index fd6179eb26d4..5bada2a69b50 100644 --- a/fs/ocfs2/cluster/tcp.h +++ b/fs/ocfs2/cluster/tcp.h | |||
| @@ -106,6 +106,8 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, | |||
| 106 | struct list_head *unreg_list); | 106 | struct list_head *unreg_list); |
| 107 | void o2net_unregister_handler_list(struct list_head *list); | 107 | void o2net_unregister_handler_list(struct list_head *list); |
| 108 | 108 | ||
| 109 | void o2net_fill_node_map(unsigned long *map, unsigned bytes); | ||
| 110 | |||
| 109 | struct o2nm_node; | 111 | struct o2nm_node; |
| 110 | int o2net_register_hb_callbacks(void); | 112 | int o2net_register_hb_callbacks(void); |
| 111 | void o2net_unregister_hb_callbacks(void); | 113 | void o2net_unregister_hb_callbacks(void); |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index e2878b5895fb..8fe4e2892ab9 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
| @@ -1184,8 +1184,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, | |||
| 1184 | if (pde) | 1184 | if (pde) |
| 1185 | le16_add_cpu(&pde->rec_len, | 1185 | le16_add_cpu(&pde->rec_len, |
| 1186 | le16_to_cpu(de->rec_len)); | 1186 | le16_to_cpu(de->rec_len)); |
| 1187 | else | 1187 | de->inode = 0; |
| 1188 | de->inode = 0; | ||
| 1189 | dir->i_version++; | 1188 | dir->i_version++; |
| 1190 | ocfs2_journal_dirty(handle, bh); | 1189 | ocfs2_journal_dirty(handle, bh); |
| 1191 | goto bail; | 1190 | goto bail; |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index d602abb51b61..a5952ceecba5 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
| @@ -859,8 +859,8 @@ void dlm_complete_recovery_thread(struct dlm_ctxt *dlm); | |||
| 859 | void dlm_wait_for_recovery(struct dlm_ctxt *dlm); | 859 | void dlm_wait_for_recovery(struct dlm_ctxt *dlm); |
| 860 | void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); | 860 | void dlm_kick_recovery_thread(struct dlm_ctxt *dlm); |
| 861 | int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); | 861 | int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node); |
| 862 | int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); | 862 | void dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout); |
| 863 | int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout); | 863 | void dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout); |
| 864 | 864 | ||
| 865 | void dlm_put(struct dlm_ctxt *dlm); | 865 | void dlm_put(struct dlm_ctxt *dlm); |
| 866 | struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); | 866 | struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm); |
| @@ -877,9 +877,8 @@ static inline void dlm_lockres_get(struct dlm_lock_resource *res) | |||
| 877 | kref_get(&res->refs); | 877 | kref_get(&res->refs); |
| 878 | } | 878 | } |
| 879 | void dlm_lockres_put(struct dlm_lock_resource *res); | 879 | void dlm_lockres_put(struct dlm_lock_resource *res); |
| 880 | void __dlm_unhash_lockres(struct dlm_lock_resource *res); | 880 | void __dlm_unhash_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); |
| 881 | void __dlm_insert_lockres(struct dlm_ctxt *dlm, | 881 | void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); |
| 882 | struct dlm_lock_resource *res); | ||
| 883 | struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, | 882 | struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, |
| 884 | const char *name, | 883 | const char *name, |
| 885 | unsigned int len, | 884 | unsigned int len, |
| @@ -902,46 +901,15 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, | |||
| 902 | const char *name, | 901 | const char *name, |
| 903 | unsigned int namelen); | 902 | unsigned int namelen); |
| 904 | 903 | ||
| 905 | #define dlm_lockres_set_refmap_bit(bit,res) \ | 904 | void dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm, |
| 906 | __dlm_lockres_set_refmap_bit(bit,res,__FILE__,__LINE__) | 905 | struct dlm_lock_resource *res, int bit); |
| 907 | #define dlm_lockres_clear_refmap_bit(bit,res) \ | 906 | void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm, |
| 908 | __dlm_lockres_clear_refmap_bit(bit,res,__FILE__,__LINE__) | 907 | struct dlm_lock_resource *res, int bit); |
| 909 | 908 | ||
| 910 | static inline void __dlm_lockres_set_refmap_bit(int bit, | 909 | void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, |
| 911 | struct dlm_lock_resource *res, | 910 | struct dlm_lock_resource *res); |
| 912 | const char *file, | 911 | void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, |
| 913 | int line) | 912 | struct dlm_lock_resource *res); |
| 914 | { | ||
| 915 | //printk("%s:%d:%.*s: setting bit %d\n", file, line, | ||
| 916 | // res->lockname.len, res->lockname.name, bit); | ||
| 917 | set_bit(bit, res->refmap); | ||
| 918 | } | ||
| 919 | |||
| 920 | static inline void __dlm_lockres_clear_refmap_bit(int bit, | ||
| 921 | struct dlm_lock_resource *res, | ||
| 922 | const char *file, | ||
| 923 | int line) | ||
| 924 | { | ||
| 925 | //printk("%s:%d:%.*s: clearing bit %d\n", file, line, | ||
| 926 | // res->lockname.len, res->lockname.name, bit); | ||
| 927 | clear_bit(bit, res->refmap); | ||
| 928 | } | ||
| 929 | |||
| 930 | void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, | ||
| 931 | struct dlm_lock_resource *res, | ||
| 932 | const char *file, | ||
| 933 | int line); | ||
| 934 | void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | ||
| 935 | struct dlm_lock_resource *res, | ||
| 936 | int new_lockres, | ||
| 937 | const char *file, | ||
| 938 | int line); | ||
| 939 | #define dlm_lockres_drop_inflight_ref(d,r) \ | ||
| 940 | __dlm_lockres_drop_inflight_ref(d,r,__FILE__,__LINE__) | ||
| 941 | #define dlm_lockres_grab_inflight_ref(d,r) \ | ||
| 942 | __dlm_lockres_grab_inflight_ref(d,r,0,__FILE__,__LINE__) | ||
| 943 | #define dlm_lockres_grab_inflight_ref_new(d,r) \ | ||
| 944 | __dlm_lockres_grab_inflight_ref(d,r,1,__FILE__,__LINE__) | ||
| 945 | 913 | ||
| 946 | void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 914 | void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
| 947 | void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 915 | void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 6ed6b95dcf93..92f2ead0fab6 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
| @@ -157,16 +157,18 @@ static int dlm_protocol_compare(struct dlm_protocol_version *existing, | |||
| 157 | 157 | ||
| 158 | static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); | 158 | static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm); |
| 159 | 159 | ||
| 160 | void __dlm_unhash_lockres(struct dlm_lock_resource *lockres) | 160 | void __dlm_unhash_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) |
| 161 | { | 161 | { |
| 162 | if (!hlist_unhashed(&lockres->hash_node)) { | 162 | if (hlist_unhashed(&res->hash_node)) |
| 163 | hlist_del_init(&lockres->hash_node); | 163 | return; |
| 164 | dlm_lockres_put(lockres); | 164 | |
| 165 | } | 165 | mlog(0, "%s: Unhash res %.*s\n", dlm->name, res->lockname.len, |
| 166 | res->lockname.name); | ||
| 167 | hlist_del_init(&res->hash_node); | ||
| 168 | dlm_lockres_put(res); | ||
| 166 | } | 169 | } |
| 167 | 170 | ||
| 168 | void __dlm_insert_lockres(struct dlm_ctxt *dlm, | 171 | void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) |
| 169 | struct dlm_lock_resource *res) | ||
| 170 | { | 172 | { |
| 171 | struct hlist_head *bucket; | 173 | struct hlist_head *bucket; |
| 172 | struct qstr *q; | 174 | struct qstr *q; |
| @@ -180,6 +182,9 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm, | |||
| 180 | dlm_lockres_get(res); | 182 | dlm_lockres_get(res); |
| 181 | 183 | ||
| 182 | hlist_add_head(&res->hash_node, bucket); | 184 | hlist_add_head(&res->hash_node, bucket); |
| 185 | |||
| 186 | mlog(0, "%s: Hash res %.*s\n", dlm->name, res->lockname.len, | ||
| 187 | res->lockname.name); | ||
| 183 | } | 188 | } |
| 184 | 189 | ||
| 185 | struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, | 190 | struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, |
| @@ -539,17 +544,17 @@ again: | |||
| 539 | 544 | ||
| 540 | static void __dlm_print_nodes(struct dlm_ctxt *dlm) | 545 | static void __dlm_print_nodes(struct dlm_ctxt *dlm) |
| 541 | { | 546 | { |
| 542 | int node = -1; | 547 | int node = -1, num = 0; |
| 543 | 548 | ||
| 544 | assert_spin_locked(&dlm->spinlock); | 549 | assert_spin_locked(&dlm->spinlock); |
| 545 | 550 | ||
| 546 | printk(KERN_NOTICE "o2dlm: Nodes in domain %s: ", dlm->name); | 551 | printk("( "); |
| 547 | |||
| 548 | while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, | 552 | while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, |
| 549 | node + 1)) < O2NM_MAX_NODES) { | 553 | node + 1)) < O2NM_MAX_NODES) { |
| 550 | printk("%d ", node); | 554 | printk("%d ", node); |
| 555 | ++num; | ||
| 551 | } | 556 | } |
| 552 | printk("\n"); | 557 | printk(") %u nodes\n", num); |
| 553 | } | 558 | } |
| 554 | 559 | ||
| 555 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, | 560 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, |
| @@ -566,11 +571,10 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 566 | 571 | ||
| 567 | node = exit_msg->node_idx; | 572 | node = exit_msg->node_idx; |
| 568 | 573 | ||
| 569 | printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s\n", node, dlm->name); | ||
| 570 | |||
| 571 | spin_lock(&dlm->spinlock); | 574 | spin_lock(&dlm->spinlock); |
| 572 | clear_bit(node, dlm->domain_map); | 575 | clear_bit(node, dlm->domain_map); |
| 573 | clear_bit(node, dlm->exit_domain_map); | 576 | clear_bit(node, dlm->exit_domain_map); |
| 577 | printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s ", node, dlm->name); | ||
| 574 | __dlm_print_nodes(dlm); | 578 | __dlm_print_nodes(dlm); |
| 575 | 579 | ||
| 576 | /* notify anything attached to the heartbeat events */ | 580 | /* notify anything attached to the heartbeat events */ |
| @@ -755,6 +759,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
| 755 | 759 | ||
| 756 | dlm_mark_domain_leaving(dlm); | 760 | dlm_mark_domain_leaving(dlm); |
| 757 | dlm_leave_domain(dlm); | 761 | dlm_leave_domain(dlm); |
| 762 | printk(KERN_NOTICE "o2dlm: Leaving domain %s\n", dlm->name); | ||
| 758 | dlm_force_free_mles(dlm); | 763 | dlm_force_free_mles(dlm); |
| 759 | dlm_complete_dlm_shutdown(dlm); | 764 | dlm_complete_dlm_shutdown(dlm); |
| 760 | } | 765 | } |
| @@ -970,7 +975,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 970 | clear_bit(assert->node_idx, dlm->exit_domain_map); | 975 | clear_bit(assert->node_idx, dlm->exit_domain_map); |
| 971 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); | 976 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); |
| 972 | 977 | ||
| 973 | printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n", | 978 | printk(KERN_NOTICE "o2dlm: Node %u joins domain %s ", |
| 974 | assert->node_idx, dlm->name); | 979 | assert->node_idx, dlm->name); |
| 975 | __dlm_print_nodes(dlm); | 980 | __dlm_print_nodes(dlm); |
| 976 | 981 | ||
| @@ -1701,8 +1706,10 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) | |||
| 1701 | bail: | 1706 | bail: |
| 1702 | spin_lock(&dlm->spinlock); | 1707 | spin_lock(&dlm->spinlock); |
| 1703 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); | 1708 | __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); |
| 1704 | if (!status) | 1709 | if (!status) { |
| 1710 | printk(KERN_NOTICE "o2dlm: Joining domain %s ", dlm->name); | ||
| 1705 | __dlm_print_nodes(dlm); | 1711 | __dlm_print_nodes(dlm); |
| 1712 | } | ||
| 1706 | spin_unlock(&dlm->spinlock); | 1713 | spin_unlock(&dlm->spinlock); |
| 1707 | 1714 | ||
| 1708 | if (ctxt) { | 1715 | if (ctxt) { |
| @@ -2131,13 +2138,6 @@ struct dlm_ctxt * dlm_register_domain(const char *domain, | |||
| 2131 | goto leave; | 2138 | goto leave; |
| 2132 | } | 2139 | } |
| 2133 | 2140 | ||
| 2134 | if (!o2hb_check_local_node_heartbeating()) { | ||
| 2135 | mlog(ML_ERROR, "the local node has not been configured, or is " | ||
| 2136 | "not heartbeating\n"); | ||
| 2137 | ret = -EPROTO; | ||
| 2138 | goto leave; | ||
| 2139 | } | ||
| 2140 | |||
| 2141 | mlog(0, "register called for domain \"%s\"\n", domain); | 2141 | mlog(0, "register called for domain \"%s\"\n", domain); |
| 2142 | 2142 | ||
| 2143 | retry: | 2143 | retry: |
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 8d39e0fd66f7..975810b98492 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c | |||
| @@ -183,10 +183,6 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, | |||
| 183 | kick_thread = 1; | 183 | kick_thread = 1; |
| 184 | } | 184 | } |
| 185 | } | 185 | } |
| 186 | /* reduce the inflight count, this may result in the lockres | ||
| 187 | * being purged below during calc_usage */ | ||
| 188 | if (lock->ml.node == dlm->node_num) | ||
| 189 | dlm_lockres_drop_inflight_ref(dlm, res); | ||
| 190 | 186 | ||
| 191 | spin_unlock(&res->spinlock); | 187 | spin_unlock(&res->spinlock); |
| 192 | wake_up(&res->wq); | 188 | wake_up(&res->wq); |
| @@ -231,10 +227,16 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, | |||
| 231 | lock->ml.type, res->lockname.len, | 227 | lock->ml.type, res->lockname.len, |
| 232 | res->lockname.name, flags); | 228 | res->lockname.name, flags); |
| 233 | 229 | ||
| 230 | /* | ||
| 231 | * Wait if resource is getting recovered, remastered, etc. | ||
| 232 | * If the resource was remastered and new owner is self, then exit. | ||
| 233 | */ | ||
| 234 | spin_lock(&res->spinlock); | 234 | spin_lock(&res->spinlock); |
| 235 | |||
| 236 | /* will exit this call with spinlock held */ | ||
| 237 | __dlm_wait_on_lockres(res); | 235 | __dlm_wait_on_lockres(res); |
| 236 | if (res->owner == dlm->node_num) { | ||
| 237 | spin_unlock(&res->spinlock); | ||
| 238 | return DLM_RECOVERING; | ||
| 239 | } | ||
| 238 | res->state |= DLM_LOCK_RES_IN_PROGRESS; | 240 | res->state |= DLM_LOCK_RES_IN_PROGRESS; |
| 239 | 241 | ||
| 240 | /* add lock to local (secondary) queue */ | 242 | /* add lock to local (secondary) queue */ |
| @@ -319,27 +321,23 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm, | |||
| 319 | tmpret = o2net_send_message(DLM_CREATE_LOCK_MSG, dlm->key, &create, | 321 | tmpret = o2net_send_message(DLM_CREATE_LOCK_MSG, dlm->key, &create, |
| 320 | sizeof(create), res->owner, &status); | 322 | sizeof(create), res->owner, &status); |
| 321 | if (tmpret >= 0) { | 323 | if (tmpret >= 0) { |
| 322 | // successfully sent and received | 324 | ret = status; |
| 323 | ret = status; // this is already a dlm_status | ||
| 324 | if (ret == DLM_REJECTED) { | 325 | if (ret == DLM_REJECTED) { |
| 325 | mlog(ML_ERROR, "%s:%.*s: BUG. this is a stale lockres " | 326 | mlog(ML_ERROR, "%s: res %.*s, Stale lockres no longer " |
| 326 | "no longer owned by %u. that node is coming back " | 327 | "owned by node %u. That node is coming back up " |
| 327 | "up currently.\n", dlm->name, create.namelen, | 328 | "currently.\n", dlm->name, create.namelen, |
| 328 | create.name, res->owner); | 329 | create.name, res->owner); |
| 329 | dlm_print_one_lock_resource(res); | 330 | dlm_print_one_lock_resource(res); |
| 330 | BUG(); | 331 | BUG(); |
| 331 | } | 332 | } |
| 332 | } else { | 333 | } else { |
| 333 | mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " | 334 | mlog(ML_ERROR, "%s: res %.*s, Error %d send CREATE LOCK to " |
| 334 | "node %u\n", tmpret, DLM_CREATE_LOCK_MSG, dlm->key, | 335 | "node %u\n", dlm->name, create.namelen, create.name, |
| 335 | res->owner); | 336 | tmpret, res->owner); |
| 336 | if (dlm_is_host_down(tmpret)) { | 337 | if (dlm_is_host_down(tmpret)) |
| 337 | ret = DLM_RECOVERING; | 338 | ret = DLM_RECOVERING; |
| 338 | mlog(0, "node %u died so returning DLM_RECOVERING " | 339 | else |
| 339 | "from lock message!\n", res->owner); | ||
| 340 | } else { | ||
| 341 | ret = dlm_err_to_dlm_status(tmpret); | 340 | ret = dlm_err_to_dlm_status(tmpret); |
| 342 | } | ||
| 343 | } | 341 | } |
| 344 | 342 | ||
| 345 | return ret; | 343 | return ret; |
| @@ -440,7 +438,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, | |||
| 440 | /* zero memory only if kernel-allocated */ | 438 | /* zero memory only if kernel-allocated */ |
| 441 | lksb = kzalloc(sizeof(*lksb), GFP_NOFS); | 439 | lksb = kzalloc(sizeof(*lksb), GFP_NOFS); |
| 442 | if (!lksb) { | 440 | if (!lksb) { |
| 443 | kfree(lock); | 441 | kmem_cache_free(dlm_lock_cache, lock); |
| 444 | return NULL; | 442 | return NULL; |
| 445 | } | 443 | } |
| 446 | kernel_allocated = 1; | 444 | kernel_allocated = 1; |
| @@ -718,18 +716,10 @@ retry_lock: | |||
| 718 | 716 | ||
| 719 | if (status == DLM_RECOVERING || status == DLM_MIGRATING || | 717 | if (status == DLM_RECOVERING || status == DLM_MIGRATING || |
| 720 | status == DLM_FORWARD) { | 718 | status == DLM_FORWARD) { |
| 721 | mlog(0, "retrying lock with migration/" | ||
| 722 | "recovery/in progress\n"); | ||
| 723 | msleep(100); | 719 | msleep(100); |
| 724 | /* no waiting for dlm_reco_thread */ | ||
| 725 | if (recovery) { | 720 | if (recovery) { |
| 726 | if (status != DLM_RECOVERING) | 721 | if (status != DLM_RECOVERING) |
| 727 | goto retry_lock; | 722 | goto retry_lock; |
| 728 | |||
| 729 | mlog(0, "%s: got RECOVERING " | ||
| 730 | "for $RECOVERY lock, master " | ||
| 731 | "was %u\n", dlm->name, | ||
| 732 | res->owner); | ||
| 733 | /* wait to see the node go down, then | 723 | /* wait to see the node go down, then |
| 734 | * drop down and allow the lockres to | 724 | * drop down and allow the lockres to |
| 735 | * get cleaned up. need to remaster. */ | 725 | * get cleaned up. need to remaster. */ |
| @@ -741,6 +731,14 @@ retry_lock: | |||
| 741 | } | 731 | } |
| 742 | } | 732 | } |
| 743 | 733 | ||
| 734 | /* Inflight taken in dlm_get_lock_resource() is dropped here */ | ||
| 735 | spin_lock(&res->spinlock); | ||
| 736 | dlm_lockres_drop_inflight_ref(dlm, res); | ||
| 737 | spin_unlock(&res->spinlock); | ||
| 738 | |||
| 739 | dlm_lockres_calc_usage(dlm, res); | ||
| 740 | dlm_kick_thread(dlm, res); | ||
| 741 | |||
| 744 | if (status != DLM_NORMAL) { | 742 | if (status != DLM_NORMAL) { |
| 745 | lock->lksb->flags &= ~DLM_LKSB_GET_LVB; | 743 | lock->lksb->flags &= ~DLM_LKSB_GET_LVB; |
| 746 | if (status != DLM_NOTQUEUED) | 744 | if (status != DLM_NOTQUEUED) |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 11eefb8c12e9..005261c333b0 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
| @@ -631,39 +631,54 @@ error: | |||
| 631 | return NULL; | 631 | return NULL; |
| 632 | } | 632 | } |
| 633 | 633 | ||
| 634 | void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | 634 | void dlm_lockres_set_refmap_bit(struct dlm_ctxt *dlm, |
| 635 | struct dlm_lock_resource *res, | 635 | struct dlm_lock_resource *res, int bit) |
| 636 | int new_lockres, | ||
| 637 | const char *file, | ||
| 638 | int line) | ||
| 639 | { | 636 | { |
| 640 | if (!new_lockres) | 637 | assert_spin_locked(&res->spinlock); |
| 641 | assert_spin_locked(&res->spinlock); | 638 | |
| 639 | mlog(0, "res %.*s, set node %u, %ps()\n", res->lockname.len, | ||
| 640 | res->lockname.name, bit, __builtin_return_address(0)); | ||
| 641 | |||
| 642 | set_bit(bit, res->refmap); | ||
| 643 | } | ||
| 644 | |||
| 645 | void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm, | ||
| 646 | struct dlm_lock_resource *res, int bit) | ||
| 647 | { | ||
| 648 | assert_spin_locked(&res->spinlock); | ||
| 649 | |||
| 650 | mlog(0, "res %.*s, clr node %u, %ps()\n", res->lockname.len, | ||
| 651 | res->lockname.name, bit, __builtin_return_address(0)); | ||
| 652 | |||
| 653 | clear_bit(bit, res->refmap); | ||
| 654 | } | ||
| 655 | |||
| 656 | |||
| 657 | void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | ||
| 658 | struct dlm_lock_resource *res) | ||
| 659 | { | ||
| 660 | assert_spin_locked(&res->spinlock); | ||
| 642 | 661 | ||
| 643 | if (!test_bit(dlm->node_num, res->refmap)) { | ||
| 644 | BUG_ON(res->inflight_locks != 0); | ||
| 645 | dlm_lockres_set_refmap_bit(dlm->node_num, res); | ||
| 646 | } | ||
| 647 | res->inflight_locks++; | 662 | res->inflight_locks++; |
| 648 | mlog(0, "%s:%.*s: inflight++: now %u\n", | 663 | |
| 649 | dlm->name, res->lockname.len, res->lockname.name, | 664 | mlog(0, "%s: res %.*s, inflight++: now %u, %ps()\n", dlm->name, |
| 650 | res->inflight_locks); | 665 | res->lockname.len, res->lockname.name, res->inflight_locks, |
| 666 | __builtin_return_address(0)); | ||
| 651 | } | 667 | } |
| 652 | 668 | ||
| 653 | void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, | 669 | void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, |
| 654 | struct dlm_lock_resource *res, | 670 | struct dlm_lock_resource *res) |
| 655 | const char *file, | ||
| 656 | int line) | ||
| 657 | { | 671 | { |
| 658 | assert_spin_locked(&res->spinlock); | 672 | assert_spin_locked(&res->spinlock); |
| 659 | 673 | ||
| 660 | BUG_ON(res->inflight_locks == 0); | 674 | BUG_ON(res->inflight_locks == 0); |
| 675 | |||
| 661 | res->inflight_locks--; | 676 | res->inflight_locks--; |
| 662 | mlog(0, "%s:%.*s: inflight--: now %u\n", | 677 | |
| 663 | dlm->name, res->lockname.len, res->lockname.name, | 678 | mlog(0, "%s: res %.*s, inflight--: now %u, %ps()\n", dlm->name, |
| 664 | res->inflight_locks); | 679 | res->lockname.len, res->lockname.name, res->inflight_locks, |
| 665 | if (res->inflight_locks == 0) | 680 | __builtin_return_address(0)); |
| 666 | dlm_lockres_clear_refmap_bit(dlm->node_num, res); | 681 | |
| 667 | wake_up(&res->wq); | 682 | wake_up(&res->wq); |
| 668 | } | 683 | } |
| 669 | 684 | ||
| @@ -697,7 +712,6 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, | |||
| 697 | unsigned int hash; | 712 | unsigned int hash; |
| 698 | int tries = 0; | 713 | int tries = 0; |
| 699 | int bit, wait_on_recovery = 0; | 714 | int bit, wait_on_recovery = 0; |
| 700 | int drop_inflight_if_nonlocal = 0; | ||
| 701 | 715 | ||
| 702 | BUG_ON(!lockid); | 716 | BUG_ON(!lockid); |
| 703 | 717 | ||
| @@ -709,36 +723,33 @@ lookup: | |||
| 709 | spin_lock(&dlm->spinlock); | 723 | spin_lock(&dlm->spinlock); |
| 710 | tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash); | 724 | tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash); |
| 711 | if (tmpres) { | 725 | if (tmpres) { |
| 712 | int dropping_ref = 0; | ||
| 713 | |||
| 714 | spin_unlock(&dlm->spinlock); | 726 | spin_unlock(&dlm->spinlock); |
| 715 | |||
| 716 | spin_lock(&tmpres->spinlock); | 727 | spin_lock(&tmpres->spinlock); |
| 717 | /* We wait for the other thread that is mastering the resource */ | 728 | /* Wait on the thread that is mastering the resource */ |
| 718 | if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { | 729 | if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { |
| 719 | __dlm_wait_on_lockres(tmpres); | 730 | __dlm_wait_on_lockres(tmpres); |
| 720 | BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN); | 731 | BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN); |
| 732 | spin_unlock(&tmpres->spinlock); | ||
| 733 | dlm_lockres_put(tmpres); | ||
| 734 | tmpres = NULL; | ||
| 735 | goto lookup; | ||
| 721 | } | 736 | } |
| 722 | 737 | ||
| 723 | if (tmpres->owner == dlm->node_num) { | 738 | /* Wait on the resource purge to complete before continuing */ |
| 724 | BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF); | 739 | if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) { |
| 725 | dlm_lockres_grab_inflight_ref(dlm, tmpres); | 740 | BUG_ON(tmpres->owner == dlm->node_num); |
| 726 | } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF) | 741 | __dlm_wait_on_lockres_flags(tmpres, |
| 727 | dropping_ref = 1; | 742 | DLM_LOCK_RES_DROPPING_REF); |
| 728 | spin_unlock(&tmpres->spinlock); | ||
| 729 | |||
| 730 | /* wait until done messaging the master, drop our ref to allow | ||
| 731 | * the lockres to be purged, start over. */ | ||
| 732 | if (dropping_ref) { | ||
| 733 | spin_lock(&tmpres->spinlock); | ||
| 734 | __dlm_wait_on_lockres_flags(tmpres, DLM_LOCK_RES_DROPPING_REF); | ||
| 735 | spin_unlock(&tmpres->spinlock); | 743 | spin_unlock(&tmpres->spinlock); |
| 736 | dlm_lockres_put(tmpres); | 744 | dlm_lockres_put(tmpres); |
| 737 | tmpres = NULL; | 745 | tmpres = NULL; |
| 738 | goto lookup; | 746 | goto lookup; |
| 739 | } | 747 | } |
| 740 | 748 | ||
| 741 | mlog(0, "found in hash!\n"); | 749 | /* Grab inflight ref to pin the resource */ |
| 750 | dlm_lockres_grab_inflight_ref(dlm, tmpres); | ||
| 751 | |||
| 752 | spin_unlock(&tmpres->spinlock); | ||
| 742 | if (res) | 753 | if (res) |
| 743 | dlm_lockres_put(res); | 754 | dlm_lockres_put(res); |
| 744 | res = tmpres; | 755 | res = tmpres; |
| @@ -829,8 +840,8 @@ lookup: | |||
| 829 | * but they might own this lockres. wait on them. */ | 840 | * but they might own this lockres. wait on them. */ |
| 830 | bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); | 841 | bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); |
| 831 | if (bit < O2NM_MAX_NODES) { | 842 | if (bit < O2NM_MAX_NODES) { |
| 832 | mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to " | 843 | mlog(0, "%s: res %.*s, At least one node (%d) " |
| 833 | "recover before lock mastery can begin\n", | 844 | "to recover before lock mastery can begin\n", |
| 834 | dlm->name, namelen, (char *)lockid, bit); | 845 | dlm->name, namelen, (char *)lockid, bit); |
| 835 | wait_on_recovery = 1; | 846 | wait_on_recovery = 1; |
| 836 | } | 847 | } |
| @@ -843,12 +854,11 @@ lookup: | |||
| 843 | 854 | ||
| 844 | /* finally add the lockres to its hash bucket */ | 855 | /* finally add the lockres to its hash bucket */ |
| 845 | __dlm_insert_lockres(dlm, res); | 856 | __dlm_insert_lockres(dlm, res); |
| 846 | /* since this lockres is new it doesn't not require the spinlock */ | ||
| 847 | dlm_lockres_grab_inflight_ref_new(dlm, res); | ||
| 848 | 857 | ||
| 849 | /* if this node does not become the master make sure to drop | 858 | /* Grab inflight ref to pin the resource */ |
| 850 | * this inflight reference below */ | 859 | spin_lock(&res->spinlock); |
| 851 | drop_inflight_if_nonlocal = 1; | 860 | dlm_lockres_grab_inflight_ref(dlm, res); |
| 861 | spin_unlock(&res->spinlock); | ||
| 852 | 862 | ||
| 853 | /* get an extra ref on the mle in case this is a BLOCK | 863 | /* get an extra ref on the mle in case this is a BLOCK |
| 854 | * if so, the creator of the BLOCK may try to put the last | 864 | * if so, the creator of the BLOCK may try to put the last |
| @@ -864,8 +874,8 @@ redo_request: | |||
| 864 | * dlm spinlock would be detectable be a change on the mle, | 874 | * dlm spinlock would be detectable be a change on the mle, |
| 865 | * so we only need to clear out the recovery map once. */ | 875 | * so we only need to clear out the recovery map once. */ |
| 866 | if (dlm_is_recovery_lock(lockid, namelen)) { | 876 | if (dlm_is_recovery_lock(lockid, namelen)) { |
| 867 | mlog(ML_NOTICE, "%s: recovery map is not empty, but " | 877 | mlog(0, "%s: Recovery map is not empty, but must " |
| 868 | "must master $RECOVERY lock now\n", dlm->name); | 878 | "master $RECOVERY lock now\n", dlm->name); |
| 869 | if (!dlm_pre_master_reco_lockres(dlm, res)) | 879 | if (!dlm_pre_master_reco_lockres(dlm, res)) |
| 870 | wait_on_recovery = 0; | 880 | wait_on_recovery = 0; |
| 871 | else { | 881 | else { |
| @@ -883,8 +893,8 @@ redo_request: | |||
| 883 | spin_lock(&dlm->spinlock); | 893 | spin_lock(&dlm->spinlock); |
| 884 | bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); | 894 | bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); |
| 885 | if (bit < O2NM_MAX_NODES) { | 895 | if (bit < O2NM_MAX_NODES) { |
| 886 | mlog(ML_NOTICE, "%s:%.*s: at least one node (%d) to " | 896 | mlog(0, "%s: res %.*s, At least one node (%d) " |
| 887 | "recover before lock mastery can begin\n", | 897 | "to recover before lock mastery can begin\n", |
| 888 | dlm->name, namelen, (char *)lockid, bit); | 898 | dlm->name, namelen, (char *)lockid, bit); |
| 889 | wait_on_recovery = 1; | 899 | wait_on_recovery = 1; |
| 890 | } else | 900 | } else |
| @@ -913,8 +923,8 @@ redo_request: | |||
| 913 | * yet, keep going until it does. this is how the | 923 | * yet, keep going until it does. this is how the |
| 914 | * master will know that asserts are needed back to | 924 | * master will know that asserts are needed back to |
| 915 | * the lower nodes. */ | 925 | * the lower nodes. */ |
| 916 | mlog(0, "%s:%.*s: requests only up to %u but master " | 926 | mlog(0, "%s: res %.*s, Requests only up to %u but " |
| 917 | "is %u, keep going\n", dlm->name, namelen, | 927 | "master is %u, keep going\n", dlm->name, namelen, |
| 918 | lockid, nodenum, mle->master); | 928 | lockid, nodenum, mle->master); |
| 919 | } | 929 | } |
| 920 | } | 930 | } |
| @@ -924,13 +934,12 @@ wait: | |||
| 924 | ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked); | 934 | ret = dlm_wait_for_lock_mastery(dlm, res, mle, &blocked); |
| 925 | if (ret < 0) { | 935 | if (ret < 0) { |
| 926 | wait_on_recovery = 1; | 936 | wait_on_recovery = 1; |
| 927 | mlog(0, "%s:%.*s: node map changed, redo the " | 937 | mlog(0, "%s: res %.*s, Node map changed, redo the master " |
| 928 | "master request now, blocked=%d\n", | 938 | "request now, blocked=%d\n", dlm->name, res->lockname.len, |
| 929 | dlm->name, res->lockname.len, | ||
| 930 | res->lockname.name, blocked); | 939 | res->lockname.name, blocked); |
| 931 | if (++tries > 20) { | 940 | if (++tries > 20) { |
| 932 | mlog(ML_ERROR, "%s:%.*s: spinning on " | 941 | mlog(ML_ERROR, "%s: res %.*s, Spinning on " |
| 933 | "dlm_wait_for_lock_mastery, blocked=%d\n", | 942 | "dlm_wait_for_lock_mastery, blocked = %d\n", |
| 934 | dlm->name, res->lockname.len, | 943 | dlm->name, res->lockname.len, |
| 935 | res->lockname.name, blocked); | 944 | res->lockname.name, blocked); |
| 936 | dlm_print_one_lock_resource(res); | 945 | dlm_print_one_lock_resource(res); |
| @@ -940,7 +949,8 @@ wait: | |||
| 940 | goto redo_request; | 949 | goto redo_request; |
| 941 | } | 950 | } |
| 942 | 951 | ||
| 943 | mlog(0, "lockres mastered by %u\n", res->owner); | 952 | mlog(0, "%s: res %.*s, Mastered by %u\n", dlm->name, res->lockname.len, |
| 953 | res->lockname.name, res->owner); | ||
| 944 | /* make sure we never continue without this */ | 954 | /* make sure we never continue without this */ |
| 945 | BUG_ON(res->owner == O2NM_MAX_NODES); | 955 | BUG_ON(res->owner == O2NM_MAX_NODES); |
| 946 | 956 | ||
| @@ -952,8 +962,6 @@ wait: | |||
| 952 | 962 | ||
| 953 | wake_waiters: | 963 | wake_waiters: |
| 954 | spin_lock(&res->spinlock); | 964 | spin_lock(&res->spinlock); |
| 955 | if (res->owner != dlm->node_num && drop_inflight_if_nonlocal) | ||
| 956 | dlm_lockres_drop_inflight_ref(dlm, res); | ||
| 957 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; | 965 | res->state &= ~DLM_LOCK_RES_IN_PROGRESS; |
| 958 | spin_unlock(&res->spinlock); | 966 | spin_unlock(&res->spinlock); |
| 959 | wake_up(&res->wq); | 967 | wake_up(&res->wq); |
| @@ -1426,9 +1434,7 @@ way_up_top: | |||
| 1426 | } | 1434 | } |
| 1427 | 1435 | ||
| 1428 | if (res->owner == dlm->node_num) { | 1436 | if (res->owner == dlm->node_num) { |
| 1429 | mlog(0, "%s:%.*s: setting bit %u in refmap\n", | 1437 | dlm_lockres_set_refmap_bit(dlm, res, request->node_idx); |
| 1430 | dlm->name, namelen, name, request->node_idx); | ||
| 1431 | dlm_lockres_set_refmap_bit(request->node_idx, res); | ||
| 1432 | spin_unlock(&res->spinlock); | 1438 | spin_unlock(&res->spinlock); |
| 1433 | response = DLM_MASTER_RESP_YES; | 1439 | response = DLM_MASTER_RESP_YES; |
| 1434 | if (mle) | 1440 | if (mle) |
| @@ -1493,10 +1499,8 @@ way_up_top: | |||
| 1493 | * go back and clean the mles on any | 1499 | * go back and clean the mles on any |
| 1494 | * other nodes */ | 1500 | * other nodes */ |
| 1495 | dispatch_assert = 1; | 1501 | dispatch_assert = 1; |
| 1496 | dlm_lockres_set_refmap_bit(request->node_idx, res); | 1502 | dlm_lockres_set_refmap_bit(dlm, res, |
| 1497 | mlog(0, "%s:%.*s: setting bit %u in refmap\n", | 1503 | request->node_idx); |
| 1498 | dlm->name, namelen, name, | ||
| 1499 | request->node_idx); | ||
| 1500 | } else | 1504 | } else |
| 1501 | response = DLM_MASTER_RESP_NO; | 1505 | response = DLM_MASTER_RESP_NO; |
| 1502 | } else { | 1506 | } else { |
| @@ -1702,7 +1706,7 @@ again: | |||
| 1702 | "lockres, set the bit in the refmap\n", | 1706 | "lockres, set the bit in the refmap\n", |
| 1703 | namelen, lockname, to); | 1707 | namelen, lockname, to); |
| 1704 | spin_lock(&res->spinlock); | 1708 | spin_lock(&res->spinlock); |
| 1705 | dlm_lockres_set_refmap_bit(to, res); | 1709 | dlm_lockres_set_refmap_bit(dlm, res, to); |
| 1706 | spin_unlock(&res->spinlock); | 1710 | spin_unlock(&res->spinlock); |
| 1707 | } | 1711 | } |
| 1708 | } | 1712 | } |
| @@ -2187,8 +2191,6 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | |||
| 2187 | namelen = res->lockname.len; | 2191 | namelen = res->lockname.len; |
| 2188 | BUG_ON(namelen > O2NM_MAX_NAME_LEN); | 2192 | BUG_ON(namelen > O2NM_MAX_NAME_LEN); |
| 2189 | 2193 | ||
| 2190 | mlog(0, "%s:%.*s: sending deref to %d\n", | ||
| 2191 | dlm->name, namelen, lockname, res->owner); | ||
| 2192 | memset(&deref, 0, sizeof(deref)); | 2194 | memset(&deref, 0, sizeof(deref)); |
| 2193 | deref.node_idx = dlm->node_num; | 2195 | deref.node_idx = dlm->node_num; |
| 2194 | deref.namelen = namelen; | 2196 | deref.namelen = namelen; |
| @@ -2197,14 +2199,12 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | |||
| 2197 | ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key, | 2199 | ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key, |
| 2198 | &deref, sizeof(deref), res->owner, &r); | 2200 | &deref, sizeof(deref), res->owner, &r); |
| 2199 | if (ret < 0) | 2201 | if (ret < 0) |
| 2200 | mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " | 2202 | mlog(ML_ERROR, "%s: res %.*s, error %d send DEREF to node %u\n", |
| 2201 | "node %u\n", ret, DLM_DEREF_LOCKRES_MSG, dlm->key, | 2203 | dlm->name, namelen, lockname, ret, res->owner); |
| 2202 | res->owner); | ||
| 2203 | else if (r < 0) { | 2204 | else if (r < 0) { |
| 2204 | /* BAD. other node says I did not have a ref. */ | 2205 | /* BAD. other node says I did not have a ref. */ |
| 2205 | mlog(ML_ERROR,"while dropping ref on %s:%.*s " | 2206 | mlog(ML_ERROR, "%s: res %.*s, DEREF to node %u got %d\n", |
| 2206 | "(master=%u) got %d.\n", dlm->name, namelen, | 2207 | dlm->name, namelen, lockname, res->owner, r); |
| 2207 | lockname, res->owner, r); | ||
| 2208 | dlm_print_one_lock_resource(res); | 2208 | dlm_print_one_lock_resource(res); |
| 2209 | BUG(); | 2209 | BUG(); |
| 2210 | } | 2210 | } |
| @@ -2260,7 +2260,7 @@ int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 2260 | else { | 2260 | else { |
| 2261 | BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); | 2261 | BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); |
| 2262 | if (test_bit(node, res->refmap)) { | 2262 | if (test_bit(node, res->refmap)) { |
| 2263 | dlm_lockres_clear_refmap_bit(node, res); | 2263 | dlm_lockres_clear_refmap_bit(dlm, res, node); |
| 2264 | cleared = 1; | 2264 | cleared = 1; |
| 2265 | } | 2265 | } |
| 2266 | } | 2266 | } |
| @@ -2320,7 +2320,7 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) | |||
| 2320 | BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); | 2320 | BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF); |
| 2321 | if (test_bit(node, res->refmap)) { | 2321 | if (test_bit(node, res->refmap)) { |
| 2322 | __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); | 2322 | __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG); |
| 2323 | dlm_lockres_clear_refmap_bit(node, res); | 2323 | dlm_lockres_clear_refmap_bit(dlm, res, node); |
| 2324 | cleared = 1; | 2324 | cleared = 1; |
| 2325 | } | 2325 | } |
| 2326 | spin_unlock(&res->spinlock); | 2326 | spin_unlock(&res->spinlock); |
| @@ -2802,7 +2802,8 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, | |||
| 2802 | BUG_ON(!list_empty(&lock->bast_list)); | 2802 | BUG_ON(!list_empty(&lock->bast_list)); |
| 2803 | BUG_ON(lock->ast_pending); | 2803 | BUG_ON(lock->ast_pending); |
| 2804 | BUG_ON(lock->bast_pending); | 2804 | BUG_ON(lock->bast_pending); |
| 2805 | dlm_lockres_clear_refmap_bit(lock->ml.node, res); | 2805 | dlm_lockres_clear_refmap_bit(dlm, res, |
| 2806 | lock->ml.node); | ||
| 2806 | list_del_init(&lock->list); | 2807 | list_del_init(&lock->list); |
| 2807 | dlm_lock_put(lock); | 2808 | dlm_lock_put(lock); |
| 2808 | /* In a normal unlock, we would have added a | 2809 | /* In a normal unlock, we would have added a |
| @@ -2823,7 +2824,7 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, | |||
| 2823 | mlog(0, "%s:%.*s: node %u had a ref to this " | 2824 | mlog(0, "%s:%.*s: node %u had a ref to this " |
| 2824 | "migrating lockres, clearing\n", dlm->name, | 2825 | "migrating lockres, clearing\n", dlm->name, |
| 2825 | res->lockname.len, res->lockname.name, bit); | 2826 | res->lockname.len, res->lockname.name, bit); |
| 2826 | dlm_lockres_clear_refmap_bit(bit, res); | 2827 | dlm_lockres_clear_refmap_bit(dlm, res, bit); |
| 2827 | } | 2828 | } |
| 2828 | bit++; | 2829 | bit++; |
| 2829 | } | 2830 | } |
| @@ -2916,9 +2917,9 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, | |||
| 2916 | &migrate, sizeof(migrate), nodenum, | 2917 | &migrate, sizeof(migrate), nodenum, |
| 2917 | &status); | 2918 | &status); |
| 2918 | if (ret < 0) { | 2919 | if (ret < 0) { |
| 2919 | mlog(ML_ERROR, "Error %d when sending message %u (key " | 2920 | mlog(ML_ERROR, "%s: res %.*s, Error %d send " |
| 2920 | "0x%x) to node %u\n", ret, DLM_MIGRATE_REQUEST_MSG, | 2921 | "MIGRATE_REQUEST to node %u\n", dlm->name, |
| 2921 | dlm->key, nodenum); | 2922 | migrate.namelen, migrate.name, ret, nodenum); |
| 2922 | if (!dlm_is_host_down(ret)) { | 2923 | if (!dlm_is_host_down(ret)) { |
| 2923 | mlog(ML_ERROR, "unhandled error=%d!\n", ret); | 2924 | mlog(ML_ERROR, "unhandled error=%d!\n", ret); |
| 2924 | BUG(); | 2925 | BUG(); |
| @@ -2937,7 +2938,7 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm, | |||
| 2937 | dlm->name, res->lockname.len, res->lockname.name, | 2938 | dlm->name, res->lockname.len, res->lockname.name, |
| 2938 | nodenum); | 2939 | nodenum); |
| 2939 | spin_lock(&res->spinlock); | 2940 | spin_lock(&res->spinlock); |
| 2940 | dlm_lockres_set_refmap_bit(nodenum, res); | 2941 | dlm_lockres_set_refmap_bit(dlm, res, nodenum); |
| 2941 | spin_unlock(&res->spinlock); | 2942 | spin_unlock(&res->spinlock); |
| 2942 | } | 2943 | } |
| 2943 | } | 2944 | } |
| @@ -3271,7 +3272,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, | |||
| 3271 | * mastery reference here since old_master will briefly have | 3272 | * mastery reference here since old_master will briefly have |
| 3272 | * a reference after the migration completes */ | 3273 | * a reference after the migration completes */ |
| 3273 | spin_lock(&res->spinlock); | 3274 | spin_lock(&res->spinlock); |
| 3274 | dlm_lockres_set_refmap_bit(old_master, res); | 3275 | dlm_lockres_set_refmap_bit(dlm, res, old_master); |
| 3275 | spin_unlock(&res->spinlock); | 3276 | spin_unlock(&res->spinlock); |
| 3276 | 3277 | ||
| 3277 | mlog(0, "now time to do a migrate request to other nodes\n"); | 3278 | mlog(0, "now time to do a migrate request to other nodes\n"); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 7efab6d28a21..01ebfd0bdad7 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
| @@ -362,40 +362,38 @@ static int dlm_is_node_recovered(struct dlm_ctxt *dlm, u8 node) | |||
| 362 | } | 362 | } |
| 363 | 363 | ||
| 364 | 364 | ||
| 365 | int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) | 365 | void dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout) |
| 366 | { | 366 | { |
| 367 | if (timeout) { | 367 | if (dlm_is_node_dead(dlm, node)) |
| 368 | mlog(ML_NOTICE, "%s: waiting %dms for notification of " | 368 | return; |
| 369 | "death of node %u\n", dlm->name, timeout, node); | 369 | |
| 370 | printk(KERN_NOTICE "o2dlm: Waiting on the death of node %u in " | ||
| 371 | "domain %s\n", node, dlm->name); | ||
| 372 | |||
| 373 | if (timeout) | ||
| 370 | wait_event_timeout(dlm->dlm_reco_thread_wq, | 374 | wait_event_timeout(dlm->dlm_reco_thread_wq, |
| 371 | dlm_is_node_dead(dlm, node), | 375 | dlm_is_node_dead(dlm, node), |
| 372 | msecs_to_jiffies(timeout)); | 376 | msecs_to_jiffies(timeout)); |
| 373 | } else { | 377 | else |
| 374 | mlog(ML_NOTICE, "%s: waiting indefinitely for notification " | ||
| 375 | "of death of node %u\n", dlm->name, node); | ||
| 376 | wait_event(dlm->dlm_reco_thread_wq, | 378 | wait_event(dlm->dlm_reco_thread_wq, |
| 377 | dlm_is_node_dead(dlm, node)); | 379 | dlm_is_node_dead(dlm, node)); |
| 378 | } | ||
| 379 | /* for now, return 0 */ | ||
| 380 | return 0; | ||
| 381 | } | 380 | } |
| 382 | 381 | ||
| 383 | int dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout) | 382 | void dlm_wait_for_node_recovery(struct dlm_ctxt *dlm, u8 node, int timeout) |
| 384 | { | 383 | { |
| 385 | if (timeout) { | 384 | if (dlm_is_node_recovered(dlm, node)) |
| 386 | mlog(0, "%s: waiting %dms for notification of " | 385 | return; |
| 387 | "recovery of node %u\n", dlm->name, timeout, node); | 386 | |
| 387 | printk(KERN_NOTICE "o2dlm: Waiting on the recovery of node %u in " | ||
| 388 | "domain %s\n", node, dlm->name); | ||
| 389 | |||
| 390 | if (timeout) | ||
| 388 | wait_event_timeout(dlm->dlm_reco_thread_wq, | 391 | wait_event_timeout(dlm->dlm_reco_thread_wq, |
| 389 | dlm_is_node_recovered(dlm, node), | 392 | dlm_is_node_recovered(dlm, node), |
| 390 | msecs_to_jiffies(timeout)); | 393 | msecs_to_jiffies(timeout)); |
| 391 | } else { | 394 | else |
| 392 | mlog(0, "%s: waiting indefinitely for notification " | ||
| 393 | "of recovery of node %u\n", dlm->name, node); | ||
| 394 | wait_event(dlm->dlm_reco_thread_wq, | 395 | wait_event(dlm->dlm_reco_thread_wq, |
| 395 | dlm_is_node_recovered(dlm, node)); | 396 | dlm_is_node_recovered(dlm, node)); |
| 396 | } | ||
| 397 | /* for now, return 0 */ | ||
| 398 | return 0; | ||
| 399 | } | 397 | } |
| 400 | 398 | ||
| 401 | /* callers of the top-level api calls (dlmlock/dlmunlock) should | 399 | /* callers of the top-level api calls (dlmlock/dlmunlock) should |
| @@ -430,6 +428,8 @@ static void dlm_begin_recovery(struct dlm_ctxt *dlm) | |||
| 430 | { | 428 | { |
| 431 | spin_lock(&dlm->spinlock); | 429 | spin_lock(&dlm->spinlock); |
| 432 | BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE); | 430 | BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE); |
| 431 | printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n", | ||
| 432 | dlm->name, dlm->reco.dead_node); | ||
| 433 | dlm->reco.state |= DLM_RECO_STATE_ACTIVE; | 433 | dlm->reco.state |= DLM_RECO_STATE_ACTIVE; |
| 434 | spin_unlock(&dlm->spinlock); | 434 | spin_unlock(&dlm->spinlock); |
| 435 | } | 435 | } |
| @@ -440,9 +440,18 @@ static void dlm_end_recovery(struct dlm_ctxt *dlm) | |||
| 440 | BUG_ON(!(dlm->reco.state & DLM_RECO_STATE_ACTIVE)); | 440 | BUG_ON(!(dlm->reco.state & DLM_RECO_STATE_ACTIVE)); |
| 441 | dlm->reco.state &= ~DLM_RECO_STATE_ACTIVE; | 441 | dlm->reco.state &= ~DLM_RECO_STATE_ACTIVE; |
| 442 | spin_unlock(&dlm->spinlock); | 442 | spin_unlock(&dlm->spinlock); |
| 443 | printk(KERN_NOTICE "o2dlm: End recovery on domain %s\n", dlm->name); | ||
| 443 | wake_up(&dlm->reco.event); | 444 | wake_up(&dlm->reco.event); |
| 444 | } | 445 | } |
| 445 | 446 | ||
| 447 | static void dlm_print_recovery_master(struct dlm_ctxt *dlm) | ||
| 448 | { | ||
| 449 | printk(KERN_NOTICE "o2dlm: Node %u (%s) is the Recovery Master for the " | ||
| 450 | "dead node %u in domain %s\n", dlm->reco.new_master, | ||
| 451 | (dlm->node_num == dlm->reco.new_master ? "me" : "he"), | ||
| 452 | dlm->reco.dead_node, dlm->name); | ||
| 453 | } | ||
| 454 | |||
| 446 | static int dlm_do_recovery(struct dlm_ctxt *dlm) | 455 | static int dlm_do_recovery(struct dlm_ctxt *dlm) |
| 447 | { | 456 | { |
| 448 | int status = 0; | 457 | int status = 0; |
| @@ -505,9 +514,8 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) | |||
| 505 | } | 514 | } |
| 506 | mlog(0, "another node will master this recovery session.\n"); | 515 | mlog(0, "another node will master this recovery session.\n"); |
| 507 | } | 516 | } |
| 508 | mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n", | 517 | |
| 509 | dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.new_master, | 518 | dlm_print_recovery_master(dlm); |
| 510 | dlm->node_num, dlm->reco.dead_node); | ||
| 511 | 519 | ||
| 512 | /* it is safe to start everything back up here | 520 | /* it is safe to start everything back up here |
| 513 | * because all of the dead node's lock resources | 521 | * because all of the dead node's lock resources |
| @@ -518,15 +526,13 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) | |||
| 518 | return 0; | 526 | return 0; |
| 519 | 527 | ||
| 520 | master_here: | 528 | master_here: |
| 521 | mlog(ML_NOTICE, "(%d) Node %u is the Recovery Master for the Dead Node " | 529 | dlm_print_recovery_master(dlm); |
| 522 | "%u for Domain %s\n", task_pid_nr(dlm->dlm_reco_thread_task), | ||
| 523 | dlm->node_num, dlm->reco.dead_node, dlm->name); | ||
| 524 | 530 | ||
| 525 | status = dlm_remaster_locks(dlm, dlm->reco.dead_node); | 531 | status = dlm_remaster_locks(dlm, dlm->reco.dead_node); |
| 526 | if (status < 0) { | 532 | if (status < 0) { |
| 527 | /* we should never hit this anymore */ | 533 | /* we should never hit this anymore */ |
| 528 | mlog(ML_ERROR, "error %d remastering locks for node %u, " | 534 | mlog(ML_ERROR, "%s: Error %d remastering locks for node %u, " |
| 529 | "retrying.\n", status, dlm->reco.dead_node); | 535 | "retrying.\n", dlm->name, status, dlm->reco.dead_node); |
| 530 | /* yield a bit to allow any final network messages | 536 | /* yield a bit to allow any final network messages |
| 531 | * to get handled on remaining nodes */ | 537 | * to get handled on remaining nodes */ |
| 532 | msleep(100); | 538 | msleep(100); |
| @@ -567,7 +573,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) | |||
| 567 | BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); | 573 | BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); |
| 568 | ndata->state = DLM_RECO_NODE_DATA_REQUESTING; | 574 | ndata->state = DLM_RECO_NODE_DATA_REQUESTING; |
| 569 | 575 | ||
| 570 | mlog(0, "requesting lock info from node %u\n", | 576 | mlog(0, "%s: Requesting lock info from node %u\n", dlm->name, |
| 571 | ndata->node_num); | 577 | ndata->node_num); |
| 572 | 578 | ||
| 573 | if (ndata->node_num == dlm->node_num) { | 579 | if (ndata->node_num == dlm->node_num) { |
| @@ -640,7 +646,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) | |||
| 640 | spin_unlock(&dlm_reco_state_lock); | 646 | spin_unlock(&dlm_reco_state_lock); |
| 641 | } | 647 | } |
| 642 | 648 | ||
| 643 | mlog(0, "done requesting all lock info\n"); | 649 | mlog(0, "%s: Done requesting all lock info\n", dlm->name); |
| 644 | 650 | ||
| 645 | /* nodes should be sending reco data now | 651 | /* nodes should be sending reco data now |
| 646 | * just need to wait */ | 652 | * just need to wait */ |
| @@ -802,10 +808,9 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, | |||
| 802 | 808 | ||
| 803 | /* negative status is handled by caller */ | 809 | /* negative status is handled by caller */ |
| 804 | if (ret < 0) | 810 | if (ret < 0) |
| 805 | mlog(ML_ERROR, "Error %d when sending message %u (key " | 811 | mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u " |
| 806 | "0x%x) to node %u\n", ret, DLM_LOCK_REQUEST_MSG, | 812 | "to recover dead node %u\n", dlm->name, ret, |
| 807 | dlm->key, request_from); | 813 | request_from, dead_node); |
| 808 | |||
| 809 | // return from here, then | 814 | // return from here, then |
| 810 | // sleep until all received or error | 815 | // sleep until all received or error |
| 811 | return ret; | 816 | return ret; |
| @@ -956,9 +961,9 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to) | |||
| 956 | ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, | 961 | ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, |
| 957 | sizeof(done_msg), send_to, &tmpret); | 962 | sizeof(done_msg), send_to, &tmpret); |
| 958 | if (ret < 0) { | 963 | if (ret < 0) { |
| 959 | mlog(ML_ERROR, "Error %d when sending message %u (key " | 964 | mlog(ML_ERROR, "%s: Error %d send RECO_DATA_DONE to node %u " |
| 960 | "0x%x) to node %u\n", ret, DLM_RECO_DATA_DONE_MSG, | 965 | "to recover dead node %u\n", dlm->name, ret, send_to, |
| 961 | dlm->key, send_to); | 966 | dead_node); |
| 962 | if (!dlm_is_host_down(ret)) { | 967 | if (!dlm_is_host_down(ret)) { |
| 963 | BUG(); | 968 | BUG(); |
| 964 | } | 969 | } |
| @@ -1127,9 +1132,11 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm, | |||
| 1127 | if (ret < 0) { | 1132 | if (ret < 0) { |
| 1128 | /* XXX: negative status is not handled. | 1133 | /* XXX: negative status is not handled. |
| 1129 | * this will end up killing this node. */ | 1134 | * this will end up killing this node. */ |
| 1130 | mlog(ML_ERROR, "Error %d when sending message %u (key " | 1135 | mlog(ML_ERROR, "%s: res %.*s, Error %d send MIG_LOCKRES to " |
| 1131 | "0x%x) to node %u\n", ret, DLM_MIG_LOCKRES_MSG, | 1136 | "node %u (%s)\n", dlm->name, mres->lockname_len, |
| 1132 | dlm->key, send_to); | 1137 | mres->lockname, ret, send_to, |
| 1138 | (orig_flags & DLM_MRES_MIGRATION ? | ||
| 1139 | "migration" : "recovery")); | ||
| 1133 | } else { | 1140 | } else { |
| 1134 | /* might get an -ENOMEM back here */ | 1141 | /* might get an -ENOMEM back here */ |
| 1135 | ret = status; | 1142 | ret = status; |
| @@ -1767,7 +1774,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
| 1767 | dlm->name, mres->lockname_len, mres->lockname, | 1774 | dlm->name, mres->lockname_len, mres->lockname, |
| 1768 | from); | 1775 | from); |
| 1769 | spin_lock(&res->spinlock); | 1776 | spin_lock(&res->spinlock); |
| 1770 | dlm_lockres_set_refmap_bit(from, res); | 1777 | dlm_lockres_set_refmap_bit(dlm, res, from); |
| 1771 | spin_unlock(&res->spinlock); | 1778 | spin_unlock(&res->spinlock); |
| 1772 | added++; | 1779 | added++; |
| 1773 | break; | 1780 | break; |
| @@ -1965,7 +1972,7 @@ skip_lvb: | |||
| 1965 | mlog(0, "%s:%.*s: added lock for node %u, " | 1972 | mlog(0, "%s:%.*s: added lock for node %u, " |
| 1966 | "setting refmap bit\n", dlm->name, | 1973 | "setting refmap bit\n", dlm->name, |
| 1967 | res->lockname.len, res->lockname.name, ml->node); | 1974 | res->lockname.len, res->lockname.name, ml->node); |
| 1968 | dlm_lockres_set_refmap_bit(ml->node, res); | 1975 | dlm_lockres_set_refmap_bit(dlm, res, ml->node); |
| 1969 | added++; | 1976 | added++; |
| 1970 | } | 1977 | } |
| 1971 | spin_unlock(&res->spinlock); | 1978 | spin_unlock(&res->spinlock); |
| @@ -2084,6 +2091,9 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, | |||
| 2084 | 2091 | ||
| 2085 | list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { | 2092 | list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { |
| 2086 | if (res->owner == dead_node) { | 2093 | if (res->owner == dead_node) { |
| 2094 | mlog(0, "%s: res %.*s, Changing owner from %u to %u\n", | ||
| 2095 | dlm->name, res->lockname.len, res->lockname.name, | ||
| 2096 | res->owner, new_master); | ||
| 2087 | list_del_init(&res->recovering); | 2097 | list_del_init(&res->recovering); |
| 2088 | spin_lock(&res->spinlock); | 2098 | spin_lock(&res->spinlock); |
| 2089 | /* new_master has our reference from | 2099 | /* new_master has our reference from |
| @@ -2105,40 +2115,30 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm, | |||
| 2105 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { | 2115 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { |
| 2106 | bucket = dlm_lockres_hash(dlm, i); | 2116 | bucket = dlm_lockres_hash(dlm, i); |
| 2107 | hlist_for_each_entry(res, hash_iter, bucket, hash_node) { | 2117 | hlist_for_each_entry(res, hash_iter, bucket, hash_node) { |
| 2108 | if (res->state & DLM_LOCK_RES_RECOVERING) { | 2118 | if (!(res->state & DLM_LOCK_RES_RECOVERING)) |
| 2109 | if (res->owner == dead_node) { | 2119 | continue; |
| 2110 | mlog(0, "(this=%u) res %.*s owner=%u " | ||
| 2111 | "was not on recovering list, but " | ||
| 2112 | "clearing state anyway\n", | ||
| 2113 | dlm->node_num, res->lockname.len, | ||
| 2114 | res->lockname.name, new_master); | ||
| 2115 | } else if (res->owner == dlm->node_num) { | ||
| 2116 | mlog(0, "(this=%u) res %.*s owner=%u " | ||
| 2117 | "was not on recovering list, " | ||
| 2118 | "owner is THIS node, clearing\n", | ||
| 2119 | dlm->node_num, res->lockname.len, | ||
| 2120 | res->lockname.name, new_master); | ||
| 2121 | } else | ||
| 2122 | continue; | ||
| 2123 | 2120 | ||
| 2124 | if (!list_empty(&res->recovering)) { | 2121 | if (res->owner != dead_node && |
| 2125 | mlog(0, "%s:%.*s: lockres was " | 2122 | res->owner != dlm->node_num) |
| 2126 | "marked RECOVERING, owner=%u\n", | 2123 | continue; |
| 2127 | dlm->name, res->lockname.len, | 2124 | |
| 2128 | res->lockname.name, res->owner); | 2125 | if (!list_empty(&res->recovering)) { |
| 2129 | list_del_init(&res->recovering); | 2126 | list_del_init(&res->recovering); |
| 2130 | dlm_lockres_put(res); | 2127 | dlm_lockres_put(res); |
| 2131 | } | ||
| 2132 | spin_lock(&res->spinlock); | ||
| 2133 | /* new_master has our reference from | ||
| 2134 | * the lock state sent during recovery */ | ||
| 2135 | dlm_change_lockres_owner(dlm, res, new_master); | ||
| 2136 | res->state &= ~DLM_LOCK_RES_RECOVERING; | ||
| 2137 | if (__dlm_lockres_has_locks(res)) | ||
| 2138 | __dlm_dirty_lockres(dlm, res); | ||
| 2139 | spin_unlock(&res->spinlock); | ||
| 2140 | wake_up(&res->wq); | ||
| 2141 | } | 2128 | } |
| 2129 | |||
| 2130 | /* new_master has our reference from | ||
| 2131 | * the lock state sent during recovery */ | ||
| 2132 | mlog(0, "%s: res %.*s, Changing owner from %u to %u\n", | ||
| 2133 | dlm->name, res->lockname.len, res->lockname.name, | ||
| 2134 | res->owner, new_master); | ||
| 2135 | spin_lock(&res->spinlock); | ||
| 2136 | dlm_change_lockres_owner(dlm, res, new_master); | ||
| 2137 | res->state &= ~DLM_LOCK_RES_RECOVERING; | ||
| 2138 | if (__dlm_lockres_has_locks(res)) | ||
| 2139 | __dlm_dirty_lockres(dlm, res); | ||
| 2140 | spin_unlock(&res->spinlock); | ||
| 2141 | wake_up(&res->wq); | ||
| 2142 | } | 2142 | } |
| 2143 | } | 2143 | } |
| 2144 | } | 2144 | } |
| @@ -2252,12 +2252,12 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, | |||
| 2252 | res->lockname.len, res->lockname.name, freed, dead_node); | 2252 | res->lockname.len, res->lockname.name, freed, dead_node); |
| 2253 | __dlm_print_one_lock_resource(res); | 2253 | __dlm_print_one_lock_resource(res); |
| 2254 | } | 2254 | } |
| 2255 | dlm_lockres_clear_refmap_bit(dead_node, res); | 2255 | dlm_lockres_clear_refmap_bit(dlm, res, dead_node); |
| 2256 | } else if (test_bit(dead_node, res->refmap)) { | 2256 | } else if (test_bit(dead_node, res->refmap)) { |
| 2257 | mlog(0, "%s:%.*s: dead node %u had a ref, but had " | 2257 | mlog(0, "%s:%.*s: dead node %u had a ref, but had " |
| 2258 | "no locks and had not purged before dying\n", dlm->name, | 2258 | "no locks and had not purged before dying\n", dlm->name, |
| 2259 | res->lockname.len, res->lockname.name, dead_node); | 2259 | res->lockname.len, res->lockname.name, dead_node); |
| 2260 | dlm_lockres_clear_refmap_bit(dead_node, res); | 2260 | dlm_lockres_clear_refmap_bit(dlm, res, dead_node); |
| 2261 | } | 2261 | } |
| 2262 | 2262 | ||
| 2263 | /* do not kick thread yet */ | 2263 | /* do not kick thread yet */ |
| @@ -2324,9 +2324,9 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
| 2324 | dlm_revalidate_lvb(dlm, res, dead_node); | 2324 | dlm_revalidate_lvb(dlm, res, dead_node); |
| 2325 | if (res->owner == dead_node) { | 2325 | if (res->owner == dead_node) { |
| 2326 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { | 2326 | if (res->state & DLM_LOCK_RES_DROPPING_REF) { |
| 2327 | mlog(ML_NOTICE, "Ignore %.*s for " | 2327 | mlog(ML_NOTICE, "%s: res %.*s, Skip " |
| 2328 | "recovery as it is being freed\n", | 2328 | "recovery as it is being freed\n", |
| 2329 | res->lockname.len, | 2329 | dlm->name, res->lockname.len, |
| 2330 | res->lockname.name); | 2330 | res->lockname.name); |
| 2331 | } else | 2331 | } else |
| 2332 | dlm_move_lockres_to_recovery_list(dlm, | 2332 | dlm_move_lockres_to_recovery_list(dlm, |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 1d6d1d22c471..e73c833fc2a1 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
| @@ -94,24 +94,26 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res) | |||
| 94 | { | 94 | { |
| 95 | int bit; | 95 | int bit; |
| 96 | 96 | ||
| 97 | assert_spin_locked(&res->spinlock); | ||
| 98 | |||
| 97 | if (__dlm_lockres_has_locks(res)) | 99 | if (__dlm_lockres_has_locks(res)) |
| 98 | return 0; | 100 | return 0; |
| 99 | 101 | ||
| 102 | /* Locks are in the process of being created */ | ||
| 103 | if (res->inflight_locks) | ||
| 104 | return 0; | ||
| 105 | |||
| 100 | if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) | 106 | if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) |
| 101 | return 0; | 107 | return 0; |
| 102 | 108 | ||
| 103 | if (res->state & DLM_LOCK_RES_RECOVERING) | 109 | if (res->state & DLM_LOCK_RES_RECOVERING) |
| 104 | return 0; | 110 | return 0; |
| 105 | 111 | ||
| 112 | /* Another node has this resource with this node as the master */ | ||
| 106 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | 113 | bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); |
| 107 | if (bit < O2NM_MAX_NODES) | 114 | if (bit < O2NM_MAX_NODES) |
| 108 | return 0; | 115 | return 0; |
| 109 | 116 | ||
| 110 | /* | ||
| 111 | * since the bit for dlm->node_num is not set, inflight_locks better | ||
| 112 | * be zero | ||
| 113 | */ | ||
| 114 | BUG_ON(res->inflight_locks != 0); | ||
| 115 | return 1; | 117 | return 1; |
| 116 | } | 118 | } |
| 117 | 119 | ||
| @@ -185,8 +187,6 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm, | |||
| 185 | /* clear our bit from the master's refmap, ignore errors */ | 187 | /* clear our bit from the master's refmap, ignore errors */ |
| 186 | ret = dlm_drop_lockres_ref(dlm, res); | 188 | ret = dlm_drop_lockres_ref(dlm, res); |
| 187 | if (ret < 0) { | 189 | if (ret < 0) { |
| 188 | mlog(ML_ERROR, "%s: deref %.*s failed %d\n", dlm->name, | ||
| 189 | res->lockname.len, res->lockname.name, ret); | ||
| 190 | if (!dlm_is_host_down(ret)) | 190 | if (!dlm_is_host_down(ret)) |
| 191 | BUG(); | 191 | BUG(); |
| 192 | } | 192 | } |
| @@ -209,7 +209,7 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm, | |||
| 209 | BUG(); | 209 | BUG(); |
| 210 | } | 210 | } |
| 211 | 211 | ||
| 212 | __dlm_unhash_lockres(res); | 212 | __dlm_unhash_lockres(dlm, res); |
| 213 | 213 | ||
| 214 | /* lockres is not in the hash now. drop the flag and wake up | 214 | /* lockres is not in the hash now. drop the flag and wake up |
| 215 | * any processes waiting in dlm_get_lock_resource. */ | 215 | * any processes waiting in dlm_get_lock_resource. */ |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index e1ed5e502ff2..81a4cd22f80b 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -1692,7 +1692,7 @@ int ocfs2_open_lock(struct inode *inode) | |||
| 1692 | mlog(0, "inode %llu take PRMODE open lock\n", | 1692 | mlog(0, "inode %llu take PRMODE open lock\n", |
| 1693 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 1693 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
| 1694 | 1694 | ||
| 1695 | if (ocfs2_mount_local(osb)) | 1695 | if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) |
| 1696 | goto out; | 1696 | goto out; |
| 1697 | 1697 | ||
| 1698 | lockres = &OCFS2_I(inode)->ip_open_lockres; | 1698 | lockres = &OCFS2_I(inode)->ip_open_lockres; |
| @@ -1718,6 +1718,12 @@ int ocfs2_try_open_lock(struct inode *inode, int write) | |||
| 1718 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1718 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
| 1719 | write ? "EXMODE" : "PRMODE"); | 1719 | write ? "EXMODE" : "PRMODE"); |
| 1720 | 1720 | ||
| 1721 | if (ocfs2_is_hard_readonly(osb)) { | ||
| 1722 | if (write) | ||
| 1723 | status = -EROFS; | ||
| 1724 | goto out; | ||
| 1725 | } | ||
| 1726 | |||
| 1721 | if (ocfs2_mount_local(osb)) | 1727 | if (ocfs2_mount_local(osb)) |
| 1722 | goto out; | 1728 | goto out; |
| 1723 | 1729 | ||
| @@ -2298,7 +2304,7 @@ int ocfs2_inode_lock_full_nested(struct inode *inode, | |||
| 2298 | if (ocfs2_is_hard_readonly(osb)) { | 2304 | if (ocfs2_is_hard_readonly(osb)) { |
| 2299 | if (ex) | 2305 | if (ex) |
| 2300 | status = -EROFS; | 2306 | status = -EROFS; |
| 2301 | goto bail; | 2307 | goto getbh; |
| 2302 | } | 2308 | } |
| 2303 | 2309 | ||
| 2304 | if (ocfs2_mount_local(osb)) | 2310 | if (ocfs2_mount_local(osb)) |
| @@ -2356,7 +2362,7 @@ local: | |||
| 2356 | mlog_errno(status); | 2362 | mlog_errno(status); |
| 2357 | goto bail; | 2363 | goto bail; |
| 2358 | } | 2364 | } |
| 2359 | 2365 | getbh: | |
| 2360 | if (ret_bh) { | 2366 | if (ret_bh) { |
| 2361 | status = ocfs2_assign_bh(inode, ret_bh, local_bh); | 2367 | status = ocfs2_assign_bh(inode, ret_bh, local_bh); |
| 2362 | if (status < 0) { | 2368 | if (status < 0) { |
| @@ -2628,8 +2634,11 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex) | |||
| 2628 | 2634 | ||
| 2629 | BUG_ON(!dl); | 2635 | BUG_ON(!dl); |
| 2630 | 2636 | ||
| 2631 | if (ocfs2_is_hard_readonly(osb)) | 2637 | if (ocfs2_is_hard_readonly(osb)) { |
| 2632 | return -EROFS; | 2638 | if (ex) |
| 2639 | return -EROFS; | ||
| 2640 | return 0; | ||
| 2641 | } | ||
| 2633 | 2642 | ||
| 2634 | if (ocfs2_mount_local(osb)) | 2643 | if (ocfs2_mount_local(osb)) |
| 2635 | return 0; | 2644 | return 0; |
| @@ -2647,7 +2656,7 @@ void ocfs2_dentry_unlock(struct dentry *dentry, int ex) | |||
| 2647 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | 2656 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; |
| 2648 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); | 2657 | struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); |
| 2649 | 2658 | ||
| 2650 | if (!ocfs2_mount_local(osb)) | 2659 | if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) |
| 2651 | ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); | 2660 | ocfs2_cluster_unlock(osb, &dl->dl_lockres, level); |
| 2652 | } | 2661 | } |
| 2653 | 2662 | ||
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 23457b491e8c..2f5b92ef0e53 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c | |||
| @@ -832,6 +832,102 @@ out: | |||
| 832 | return ret; | 832 | return ret; |
| 833 | } | 833 | } |
| 834 | 834 | ||
| 835 | int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin) | ||
| 836 | { | ||
| 837 | struct inode *inode = file->f_mapping->host; | ||
| 838 | int ret; | ||
| 839 | unsigned int is_last = 0, is_data = 0; | ||
| 840 | u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
| 841 | u32 cpos, cend, clen, hole_size; | ||
| 842 | u64 extoff, extlen; | ||
| 843 | struct buffer_head *di_bh = NULL; | ||
| 844 | struct ocfs2_extent_rec rec; | ||
| 845 | |||
| 846 | BUG_ON(origin != SEEK_DATA && origin != SEEK_HOLE); | ||
| 847 | |||
| 848 | ret = ocfs2_inode_lock(inode, &di_bh, 0); | ||
| 849 | if (ret) { | ||
| 850 | mlog_errno(ret); | ||
| 851 | goto out; | ||
| 852 | } | ||
| 853 | |||
| 854 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 855 | |||
| 856 | if (*offset >= inode->i_size) { | ||
| 857 | ret = -ENXIO; | ||
| 858 | goto out_unlock; | ||
| 859 | } | ||
| 860 | |||
| 861 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
| 862 | if (origin == SEEK_HOLE) | ||
| 863 | *offset = inode->i_size; | ||
| 864 | goto out_unlock; | ||
| 865 | } | ||
| 866 | |||
| 867 | clen = 0; | ||
| 868 | cpos = *offset >> cs_bits; | ||
| 869 | cend = ocfs2_clusters_for_bytes(inode->i_sb, inode->i_size); | ||
| 870 | |||
| 871 | while (cpos < cend && !is_last) { | ||
| 872 | ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, | ||
| 873 | &rec, &is_last); | ||
| 874 | if (ret) { | ||
| 875 | mlog_errno(ret); | ||
| 876 | goto out_unlock; | ||
| 877 | } | ||
| 878 | |||
| 879 | extoff = cpos; | ||
| 880 | extoff <<= cs_bits; | ||
| 881 | |||
| 882 | if (rec.e_blkno == 0ULL) { | ||
| 883 | clen = hole_size; | ||
| 884 | is_data = 0; | ||
| 885 | } else { | ||
| 886 | clen = le16_to_cpu(rec.e_leaf_clusters) - | ||
| 887 | (cpos - le32_to_cpu(rec.e_cpos)); | ||
| 888 | is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ? 0 : 1; | ||
| 889 | } | ||
| 890 | |||
| 891 | if ((!is_data && origin == SEEK_HOLE) || | ||
| 892 | (is_data && origin == SEEK_DATA)) { | ||
| 893 | if (extoff > *offset) | ||
| 894 | *offset = extoff; | ||
| 895 | goto out_unlock; | ||
| 896 | } | ||
| 897 | |||
| 898 | if (!is_last) | ||
| 899 | cpos += clen; | ||
| 900 | } | ||
| 901 | |||
| 902 | if (origin == SEEK_HOLE) { | ||
| 903 | extoff = cpos; | ||
| 904 | extoff <<= cs_bits; | ||
| 905 | extlen = clen; | ||
| 906 | extlen <<= cs_bits; | ||
| 907 | |||
| 908 | if ((extoff + extlen) > inode->i_size) | ||
| 909 | extlen = inode->i_size - extoff; | ||
| 910 | extoff += extlen; | ||
| 911 | if (extoff > *offset) | ||
| 912 | *offset = extoff; | ||
| 913 | goto out_unlock; | ||
| 914 | } | ||
| 915 | |||
| 916 | ret = -ENXIO; | ||
| 917 | |||
| 918 | out_unlock: | ||
| 919 | |||
| 920 | brelse(di_bh); | ||
| 921 | |||
| 922 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 923 | |||
| 924 | ocfs2_inode_unlock(inode, 0); | ||
| 925 | out: | ||
| 926 | if (ret && ret != -ENXIO) | ||
| 927 | ret = -ENXIO; | ||
| 928 | return ret; | ||
| 929 | } | ||
| 930 | |||
| 835 | int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, | 931 | int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, |
| 836 | struct buffer_head *bhs[], int flags, | 932 | struct buffer_head *bhs[], int flags, |
| 837 | int (*validate)(struct super_block *sb, | 933 | int (*validate)(struct super_block *sb, |
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h index e79d41c2c909..67ea57d2fd59 100644 --- a/fs/ocfs2/extent_map.h +++ b/fs/ocfs2/extent_map.h | |||
| @@ -53,6 +53,8 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, | |||
| 53 | int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 53 | int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
| 54 | u64 map_start, u64 map_len); | 54 | u64 map_start, u64 map_len); |
| 55 | 55 | ||
| 56 | int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin); | ||
| 57 | |||
| 56 | int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, | 58 | int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, |
| 57 | u32 *p_cluster, u32 *num_clusters, | 59 | u32 *p_cluster, u32 *num_clusters, |
| 58 | struct ocfs2_extent_list *el, | 60 | struct ocfs2_extent_list *el, |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index de4ea1af041b..6e396683c3d4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -1950,6 +1950,9 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | |||
| 1950 | if (ret < 0) | 1950 | if (ret < 0) |
| 1951 | mlog_errno(ret); | 1951 | mlog_errno(ret); |
| 1952 | 1952 | ||
| 1953 | if (file->f_flags & O_SYNC) | ||
| 1954 | handle->h_sync = 1; | ||
| 1955 | |||
| 1953 | ocfs2_commit_trans(osb, handle); | 1956 | ocfs2_commit_trans(osb, handle); |
| 1954 | 1957 | ||
| 1955 | out_inode_unlock: | 1958 | out_inode_unlock: |
| @@ -2052,6 +2055,23 @@ out: | |||
| 2052 | return ret; | 2055 | return ret; |
| 2053 | } | 2056 | } |
| 2054 | 2057 | ||
| 2058 | static void ocfs2_aiodio_wait(struct inode *inode) | ||
| 2059 | { | ||
| 2060 | wait_queue_head_t *wq = ocfs2_ioend_wq(inode); | ||
| 2061 | |||
| 2062 | wait_event(*wq, (atomic_read(&OCFS2_I(inode)->ip_unaligned_aio) == 0)); | ||
| 2063 | } | ||
| 2064 | |||
| 2065 | static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos) | ||
| 2066 | { | ||
| 2067 | int blockmask = inode->i_sb->s_blocksize - 1; | ||
| 2068 | loff_t final_size = pos + count; | ||
| 2069 | |||
| 2070 | if ((pos & blockmask) || (final_size & blockmask)) | ||
| 2071 | return 1; | ||
| 2072 | return 0; | ||
| 2073 | } | ||
| 2074 | |||
| 2055 | static int ocfs2_prepare_inode_for_refcount(struct inode *inode, | 2075 | static int ocfs2_prepare_inode_for_refcount(struct inode *inode, |
| 2056 | struct file *file, | 2076 | struct file *file, |
| 2057 | loff_t pos, size_t count, | 2077 | loff_t pos, size_t count, |
| @@ -2230,6 +2250,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
| 2230 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2250 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 2231 | int full_coherency = !(osb->s_mount_opt & | 2251 | int full_coherency = !(osb->s_mount_opt & |
| 2232 | OCFS2_MOUNT_COHERENCY_BUFFERED); | 2252 | OCFS2_MOUNT_COHERENCY_BUFFERED); |
| 2253 | int unaligned_dio = 0; | ||
| 2233 | 2254 | ||
| 2234 | trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, | 2255 | trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, |
| 2235 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 2256 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
| @@ -2297,6 +2318,10 @@ relock: | |||
| 2297 | goto out; | 2318 | goto out; |
| 2298 | } | 2319 | } |
| 2299 | 2320 | ||
| 2321 | if (direct_io && !is_sync_kiocb(iocb)) | ||
| 2322 | unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_left, | ||
| 2323 | *ppos); | ||
| 2324 | |||
| 2300 | /* | 2325 | /* |
| 2301 | * We can't complete the direct I/O as requested, fall back to | 2326 | * We can't complete the direct I/O as requested, fall back to |
| 2302 | * buffered I/O. | 2327 | * buffered I/O. |
| @@ -2311,6 +2336,18 @@ relock: | |||
| 2311 | goto relock; | 2336 | goto relock; |
| 2312 | } | 2337 | } |
| 2313 | 2338 | ||
| 2339 | if (unaligned_dio) { | ||
| 2340 | /* | ||
| 2341 | * Wait on previous unaligned aio to complete before | ||
| 2342 | * proceeding. | ||
| 2343 | */ | ||
| 2344 | ocfs2_aiodio_wait(inode); | ||
| 2345 | |||
| 2346 | /* Mark the iocb as needing a decrement in ocfs2_dio_end_io */ | ||
| 2347 | atomic_inc(&OCFS2_I(inode)->ip_unaligned_aio); | ||
| 2348 | ocfs2_iocb_set_unaligned_aio(iocb); | ||
| 2349 | } | ||
| 2350 | |||
| 2314 | /* | 2351 | /* |
| 2315 | * To later detect whether a journal commit for sync writes is | 2352 | * To later detect whether a journal commit for sync writes is |
| 2316 | * necessary, we sample i_size, and cluster count here. | 2353 | * necessary, we sample i_size, and cluster count here. |
| @@ -2382,8 +2419,12 @@ out_dio: | |||
| 2382 | if ((ret == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { | 2419 | if ((ret == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { |
| 2383 | rw_level = -1; | 2420 | rw_level = -1; |
| 2384 | have_alloc_sem = 0; | 2421 | have_alloc_sem = 0; |
| 2422 | unaligned_dio = 0; | ||
| 2385 | } | 2423 | } |
| 2386 | 2424 | ||
| 2425 | if (unaligned_dio) | ||
| 2426 | atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio); | ||
| 2427 | |||
| 2387 | out: | 2428 | out: |
| 2388 | if (rw_level != -1) | 2429 | if (rw_level != -1) |
| 2389 | ocfs2_rw_unlock(inode, rw_level); | 2430 | ocfs2_rw_unlock(inode, rw_level); |
| @@ -2591,6 +2632,57 @@ bail: | |||
| 2591 | return ret; | 2632 | return ret; |
| 2592 | } | 2633 | } |
| 2593 | 2634 | ||
| 2635 | /* Refer generic_file_llseek_unlocked() */ | ||
| 2636 | static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int origin) | ||
| 2637 | { | ||
| 2638 | struct inode *inode = file->f_mapping->host; | ||
| 2639 | int ret = 0; | ||
| 2640 | |||
| 2641 | mutex_lock(&inode->i_mutex); | ||
| 2642 | |||
| 2643 | switch (origin) { | ||
| 2644 | case SEEK_SET: | ||
| 2645 | break; | ||
| 2646 | case SEEK_END: | ||
| 2647 | offset += inode->i_size; | ||
| 2648 | break; | ||
| 2649 | case SEEK_CUR: | ||
| 2650 | if (offset == 0) { | ||
| 2651 | offset = file->f_pos; | ||
| 2652 | goto out; | ||
| 2653 | } | ||
| 2654 | offset += file->f_pos; | ||
| 2655 | break; | ||
| 2656 | case SEEK_DATA: | ||
| 2657 | case SEEK_HOLE: | ||
| 2658 | ret = ocfs2_seek_data_hole_offset(file, &offset, origin); | ||
| 2659 | if (ret) | ||
| 2660 | goto out; | ||
| 2661 | break; | ||
| 2662 | default: | ||
| 2663 | ret = -EINVAL; | ||
| 2664 | goto out; | ||
| 2665 | } | ||
| 2666 | |||
| 2667 | if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | ||
| 2668 | ret = -EINVAL; | ||
| 2669 | if (!ret && offset > inode->i_sb->s_maxbytes) | ||
| 2670 | ret = -EINVAL; | ||
| 2671 | if (ret) | ||
| 2672 | goto out; | ||
| 2673 | |||
| 2674 | if (offset != file->f_pos) { | ||
| 2675 | file->f_pos = offset; | ||
| 2676 | file->f_version = 0; | ||
| 2677 | } | ||
| 2678 | |||
| 2679 | out: | ||
| 2680 | mutex_unlock(&inode->i_mutex); | ||
| 2681 | if (ret) | ||
| 2682 | return ret; | ||
| 2683 | return offset; | ||
| 2684 | } | ||
| 2685 | |||
| 2594 | const struct inode_operations ocfs2_file_iops = { | 2686 | const struct inode_operations ocfs2_file_iops = { |
| 2595 | .setattr = ocfs2_setattr, | 2687 | .setattr = ocfs2_setattr, |
| 2596 | .getattr = ocfs2_getattr, | 2688 | .getattr = ocfs2_getattr, |
| @@ -2615,7 +2707,7 @@ const struct inode_operations ocfs2_special_file_iops = { | |||
| 2615 | * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks! | 2707 | * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks! |
| 2616 | */ | 2708 | */ |
| 2617 | const struct file_operations ocfs2_fops = { | 2709 | const struct file_operations ocfs2_fops = { |
| 2618 | .llseek = generic_file_llseek, | 2710 | .llseek = ocfs2_file_llseek, |
| 2619 | .read = do_sync_read, | 2711 | .read = do_sync_read, |
| 2620 | .write = do_sync_write, | 2712 | .write = do_sync_write, |
| 2621 | .mmap = ocfs2_mmap, | 2713 | .mmap = ocfs2_mmap, |
| @@ -2663,7 +2755,7 @@ const struct file_operations ocfs2_dops = { | |||
| 2663 | * the cluster. | 2755 | * the cluster. |
| 2664 | */ | 2756 | */ |
| 2665 | const struct file_operations ocfs2_fops_no_plocks = { | 2757 | const struct file_operations ocfs2_fops_no_plocks = { |
| 2666 | .llseek = generic_file_llseek, | 2758 | .llseek = ocfs2_file_llseek, |
| 2667 | .read = do_sync_read, | 2759 | .read = do_sync_read, |
| 2668 | .write = do_sync_write, | 2760 | .write = do_sync_write, |
| 2669 | .mmap = ocfs2_mmap, | 2761 | .mmap = ocfs2_mmap, |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index a22d2c098890..17454a904d7b 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
| @@ -951,7 +951,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode, | |||
| 951 | trace_ocfs2_cleanup_delete_inode( | 951 | trace_ocfs2_cleanup_delete_inode( |
| 952 | (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data); | 952 | (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data); |
| 953 | if (sync_data) | 953 | if (sync_data) |
| 954 | write_inode_now(inode, 1); | 954 | filemap_write_and_wait(inode->i_mapping); |
| 955 | truncate_inode_pages(&inode->i_data, 0); | 955 | truncate_inode_pages(&inode->i_data, 0); |
| 956 | } | 956 | } |
| 957 | 957 | ||
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 1c508b149b3a..88924a3133fa 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
| @@ -43,6 +43,9 @@ struct ocfs2_inode_info | |||
| 43 | /* protects extended attribute changes on this inode */ | 43 | /* protects extended attribute changes on this inode */ |
| 44 | struct rw_semaphore ip_xattr_sem; | 44 | struct rw_semaphore ip_xattr_sem; |
| 45 | 45 | ||
| 46 | /* Number of outstanding AIO's which are not page aligned */ | ||
| 47 | atomic_t ip_unaligned_aio; | ||
| 48 | |||
| 46 | /* These fields are protected by ip_lock */ | 49 | /* These fields are protected by ip_lock */ |
| 47 | spinlock_t ip_lock; | 50 | spinlock_t ip_lock; |
| 48 | u32 ip_open_count; | 51 | u32 ip_open_count; |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index bc91072b7219..726ff265b296 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
| @@ -122,7 +122,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, | |||
| 122 | if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) & | 122 | if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) & |
| 123 | (OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) { | 123 | (OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) { |
| 124 | if (!capable(CAP_LINUX_IMMUTABLE)) | 124 | if (!capable(CAP_LINUX_IMMUTABLE)) |
| 125 | goto bail_unlock; | 125 | goto bail_commit; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | ocfs2_inode->ip_attr = flags; | 128 | ocfs2_inode->ip_attr = flags; |
| @@ -132,6 +132,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, | |||
| 132 | if (status < 0) | 132 | if (status < 0) |
| 133 | mlog_errno(status); | 133 | mlog_errno(status); |
| 134 | 134 | ||
| 135 | bail_commit: | ||
| 135 | ocfs2_commit_trans(osb, handle); | 136 | ocfs2_commit_trans(osb, handle); |
| 136 | bail_unlock: | 137 | bail_unlock: |
| 137 | ocfs2_inode_unlock(inode, 1); | 138 | ocfs2_inode_unlock(inode, 1); |
| @@ -381,7 +382,7 @@ int ocfs2_info_handle_freeinode(struct inode *inode, | |||
| 381 | if (!oifi) { | 382 | if (!oifi) { |
| 382 | status = -ENOMEM; | 383 | status = -ENOMEM; |
| 383 | mlog_errno(status); | 384 | mlog_errno(status); |
| 384 | goto bail; | 385 | goto out_err; |
| 385 | } | 386 | } |
| 386 | 387 | ||
| 387 | if (o2info_from_user(*oifi, req)) | 388 | if (o2info_from_user(*oifi, req)) |
| @@ -431,7 +432,7 @@ bail: | |||
| 431 | o2info_set_request_error(&oifi->ifi_req, req); | 432 | o2info_set_request_error(&oifi->ifi_req, req); |
| 432 | 433 | ||
| 433 | kfree(oifi); | 434 | kfree(oifi); |
| 434 | 435 | out_err: | |
| 435 | return status; | 436 | return status; |
| 436 | } | 437 | } |
| 437 | 438 | ||
| @@ -666,7 +667,7 @@ int ocfs2_info_handle_freefrag(struct inode *inode, | |||
| 666 | if (!oiff) { | 667 | if (!oiff) { |
| 667 | status = -ENOMEM; | 668 | status = -ENOMEM; |
| 668 | mlog_errno(status); | 669 | mlog_errno(status); |
| 669 | goto bail; | 670 | goto out_err; |
| 670 | } | 671 | } |
| 671 | 672 | ||
| 672 | if (o2info_from_user(*oiff, req)) | 673 | if (o2info_from_user(*oiff, req)) |
| @@ -716,7 +717,7 @@ bail: | |||
| 716 | o2info_set_request_error(&oiff->iff_req, req); | 717 | o2info_set_request_error(&oiff->iff_req, req); |
| 717 | 718 | ||
| 718 | kfree(oiff); | 719 | kfree(oiff); |
| 719 | 720 | out_err: | |
| 720 | return status; | 721 | return status; |
| 721 | } | 722 | } |
| 722 | 723 | ||
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 295d56454e8b..0a42ae96dca7 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -1544,9 +1544,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
| 1544 | /* we need to run complete recovery for offline orphan slots */ | 1544 | /* we need to run complete recovery for offline orphan slots */ |
| 1545 | ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); | 1545 | ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); |
| 1546 | 1546 | ||
| 1547 | mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n", | 1547 | printk(KERN_NOTICE "ocfs2: Begin replay journal (node %d, slot %d) on "\ |
| 1548 | node_num, slot_num, | 1548 | "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev), |
| 1549 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 1549 | MINOR(osb->sb->s_dev)); |
| 1550 | 1550 | ||
| 1551 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | 1551 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); |
| 1552 | 1552 | ||
| @@ -1601,6 +1601,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
| 1601 | 1601 | ||
| 1602 | jbd2_journal_destroy(journal); | 1602 | jbd2_journal_destroy(journal); |
| 1603 | 1603 | ||
| 1604 | printk(KERN_NOTICE "ocfs2: End replay journal (node %d, slot %d) on "\ | ||
| 1605 | "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev), | ||
| 1606 | MINOR(osb->sb->s_dev)); | ||
| 1604 | done: | 1607 | done: |
| 1605 | /* drop the lock on this nodes journal */ | 1608 | /* drop the lock on this nodes journal */ |
| 1606 | if (got_lock) | 1609 | if (got_lock) |
| @@ -1808,6 +1811,20 @@ static inline unsigned long ocfs2_orphan_scan_timeout(void) | |||
| 1808 | * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This | 1811 | * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This |
| 1809 | * is done to catch any orphans that are left over in orphan directories. | 1812 | * is done to catch any orphans that are left over in orphan directories. |
| 1810 | * | 1813 | * |
| 1814 | * It scans all slots, even ones that are in use. It does so to handle the | ||
| 1815 | * case described below: | ||
| 1816 | * | ||
| 1817 | * Node 1 has an inode it was using. The dentry went away due to memory | ||
| 1818 | * pressure. Node 1 closes the inode, but it's on the free list. The node | ||
| 1819 | * has the open lock. | ||
| 1820 | * Node 2 unlinks the inode. It grabs the dentry lock to notify others, | ||
| 1821 | * but node 1 has no dentry and doesn't get the message. It trylocks the | ||
| 1822 | * open lock, sees that another node has a PR, and does nothing. | ||
| 1823 | * Later node 2 runs its orphan dir. It igets the inode, trylocks the | ||
| 1824 | * open lock, sees the PR still, and does nothing. | ||
| 1825 | * Basically, we have to trigger an orphan iput on node 1. The only way | ||
| 1826 | * for this to happen is if node 1 runs node 2's orphan dir. | ||
| 1827 | * | ||
| 1811 | * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT | 1828 | * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT |
| 1812 | * seconds. It gets an EX lock on os_lockres and checks sequence number | 1829 | * seconds. It gets an EX lock on os_lockres and checks sequence number |
| 1813 | * stored in LVB. If the sequence number has changed, it means some other | 1830 | * stored in LVB. If the sequence number has changed, it means some other |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 68cf2f6d3c6a..a3385b63ff5e 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
| @@ -441,10 +441,11 @@ static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir, | |||
| 441 | #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) | 441 | #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) |
| 442 | 442 | ||
| 443 | /* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota | 443 | /* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota |
| 444 | * update on dir + index leaf + dx root update for free list */ | 444 | * update on dir + index leaf + dx root update for free list + |
| 445 | * previous dirblock update in the free list */ | ||
| 445 | static inline int ocfs2_link_credits(struct super_block *sb) | 446 | static inline int ocfs2_link_credits(struct super_block *sb) |
| 446 | { | 447 | { |
| 447 | return 2*OCFS2_INODE_UPDATE_CREDITS + 3 + | 448 | return 2*OCFS2_INODE_UPDATE_CREDITS + 4 + |
| 448 | ocfs2_quota_trans_credits(sb); | 449 | ocfs2_quota_trans_credits(sb); |
| 449 | } | 450 | } |
| 450 | 451 | ||
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 3e9393ca39eb..9cd41083e991 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
| @@ -61,7 +61,7 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) | |||
| 61 | static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, | 61 | static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, |
| 62 | struct page *page) | 62 | struct page *page) |
| 63 | { | 63 | { |
| 64 | int ret; | 64 | int ret = VM_FAULT_NOPAGE; |
| 65 | struct inode *inode = file->f_path.dentry->d_inode; | 65 | struct inode *inode = file->f_path.dentry->d_inode; |
| 66 | struct address_space *mapping = inode->i_mapping; | 66 | struct address_space *mapping = inode->i_mapping; |
| 67 | loff_t pos = page_offset(page); | 67 | loff_t pos = page_offset(page); |
| @@ -71,32 +71,25 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, | |||
| 71 | void *fsdata; | 71 | void *fsdata; |
| 72 | loff_t size = i_size_read(inode); | 72 | loff_t size = i_size_read(inode); |
| 73 | 73 | ||
| 74 | /* | ||
| 75 | * Another node might have truncated while we were waiting on | ||
| 76 | * cluster locks. | ||
| 77 | * We don't check size == 0 before the shift. This is borrowed | ||
| 78 | * from do_generic_file_read. | ||
| 79 | */ | ||
| 80 | last_index = (size - 1) >> PAGE_CACHE_SHIFT; | 74 | last_index = (size - 1) >> PAGE_CACHE_SHIFT; |
| 81 | if (unlikely(!size || page->index > last_index)) { | ||
| 82 | ret = -EINVAL; | ||
| 83 | goto out; | ||
| 84 | } | ||
| 85 | 75 | ||
| 86 | /* | 76 | /* |
| 87 | * The i_size check above doesn't catch the case where nodes | 77 | * There are cases that lead to the page no longer bebongs to the |
| 88 | * truncated and then re-extended the file. We'll re-check the | 78 | * mapping. |
| 89 | * page mapping after taking the page lock inside of | 79 | * 1) pagecache truncates locally due to memory pressure. |
| 90 | * ocfs2_write_begin_nolock(). | 80 | * 2) pagecache truncates when another is taking EX lock against |
| 81 | * inode lock. see ocfs2_data_convert_worker. | ||
| 82 | * | ||
| 83 | * The i_size check doesn't catch the case where nodes truncated and | ||
| 84 | * then re-extended the file. We'll re-check the page mapping after | ||
| 85 | * taking the page lock inside of ocfs2_write_begin_nolock(). | ||
| 86 | * | ||
| 87 | * Let VM retry with these cases. | ||
| 91 | */ | 88 | */ |
| 92 | if (!PageUptodate(page) || page->mapping != inode->i_mapping) { | 89 | if ((page->mapping != inode->i_mapping) || |
| 93 | /* | 90 | (!PageUptodate(page)) || |
| 94 | * the page has been umapped in ocfs2_data_downconvert_worker. | 91 | (page_offset(page) >= size)) |
| 95 | * So return 0 here and let VFS retry. | ||
| 96 | */ | ||
| 97 | ret = 0; | ||
| 98 | goto out; | 92 | goto out; |
| 99 | } | ||
| 100 | 93 | ||
| 101 | /* | 94 | /* |
| 102 | * Call ocfs2_write_begin() and ocfs2_write_end() to take | 95 | * Call ocfs2_write_begin() and ocfs2_write_end() to take |
| @@ -116,17 +109,21 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, | |||
| 116 | if (ret) { | 109 | if (ret) { |
| 117 | if (ret != -ENOSPC) | 110 | if (ret != -ENOSPC) |
| 118 | mlog_errno(ret); | 111 | mlog_errno(ret); |
| 112 | if (ret == -ENOMEM) | ||
| 113 | ret = VM_FAULT_OOM; | ||
| 114 | else | ||
| 115 | ret = VM_FAULT_SIGBUS; | ||
| 119 | goto out; | 116 | goto out; |
| 120 | } | 117 | } |
| 121 | 118 | ||
| 122 | ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, | 119 | if (!locked_page) { |
| 123 | fsdata); | 120 | ret = VM_FAULT_NOPAGE; |
| 124 | if (ret < 0) { | ||
| 125 | mlog_errno(ret); | ||
| 126 | goto out; | 121 | goto out; |
| 127 | } | 122 | } |
| 123 | ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, | ||
| 124 | fsdata); | ||
| 128 | BUG_ON(ret != len); | 125 | BUG_ON(ret != len); |
| 129 | ret = 0; | 126 | ret = VM_FAULT_LOCKED; |
| 130 | out: | 127 | out: |
| 131 | return ret; | 128 | return ret; |
| 132 | } | 129 | } |
| @@ -168,8 +165,6 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 168 | 165 | ||
| 169 | out: | 166 | out: |
| 170 | ocfs2_unblock_signals(&oldset); | 167 | ocfs2_unblock_signals(&oldset); |
| 171 | if (ret) | ||
| 172 | ret = VM_FAULT_SIGBUS; | ||
| 173 | return ret; | 168 | return ret; |
| 174 | } | 169 | } |
| 175 | 170 | ||
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index d53cb706f14c..184c76b8c293 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c | |||
| @@ -745,7 +745,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, | |||
| 745 | */ | 745 | */ |
| 746 | ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop, | 746 | ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop, |
| 747 | new_phys_cpos); | 747 | new_phys_cpos); |
| 748 | if (!new_phys_cpos) { | 748 | if (!*new_phys_cpos) { |
| 749 | ret = -ENOSPC; | 749 | ret = -ENOSPC; |
| 750 | goto out_commit; | 750 | goto out_commit; |
| 751 | } | 751 | } |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 409285854f64..d355e6e36b36 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
| @@ -836,18 +836,65 @@ static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb, | |||
| 836 | 836 | ||
| 837 | static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) | 837 | static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) |
| 838 | { | 838 | { |
| 839 | __test_and_set_bit_le(bit, bitmap); | 839 | __set_bit_le(bit, bitmap); |
| 840 | } | 840 | } |
| 841 | #define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) | 841 | #define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) |
| 842 | 842 | ||
| 843 | static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) | 843 | static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) |
| 844 | { | 844 | { |
| 845 | __test_and_clear_bit_le(bit, bitmap); | 845 | __clear_bit_le(bit, bitmap); |
| 846 | } | 846 | } |
| 847 | #define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) | 847 | #define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) |
| 848 | 848 | ||
| 849 | #define ocfs2_test_bit test_bit_le | 849 | #define ocfs2_test_bit test_bit_le |
| 850 | #define ocfs2_find_next_zero_bit find_next_zero_bit_le | 850 | #define ocfs2_find_next_zero_bit find_next_zero_bit_le |
| 851 | #define ocfs2_find_next_bit find_next_bit_le | 851 | #define ocfs2_find_next_bit find_next_bit_le |
| 852 | |||
| 853 | static inline void *correct_addr_and_bit_unaligned(int *bit, void *addr) | ||
| 854 | { | ||
| 855 | #if BITS_PER_LONG == 64 | ||
| 856 | *bit += ((unsigned long) addr & 7UL) << 3; | ||
| 857 | addr = (void *) ((unsigned long) addr & ~7UL); | ||
| 858 | #elif BITS_PER_LONG == 32 | ||
| 859 | *bit += ((unsigned long) addr & 3UL) << 3; | ||
| 860 | addr = (void *) ((unsigned long) addr & ~3UL); | ||
| 861 | #else | ||
| 862 | #error "how many bits you are?!" | ||
| 863 | #endif | ||
| 864 | return addr; | ||
| 865 | } | ||
| 866 | |||
| 867 | static inline void ocfs2_set_bit_unaligned(int bit, void *bitmap) | ||
| 868 | { | ||
| 869 | bitmap = correct_addr_and_bit_unaligned(&bit, bitmap); | ||
| 870 | ocfs2_set_bit(bit, bitmap); | ||
| 871 | } | ||
| 872 | |||
| 873 | static inline void ocfs2_clear_bit_unaligned(int bit, void *bitmap) | ||
| 874 | { | ||
| 875 | bitmap = correct_addr_and_bit_unaligned(&bit, bitmap); | ||
| 876 | ocfs2_clear_bit(bit, bitmap); | ||
| 877 | } | ||
| 878 | |||
| 879 | static inline int ocfs2_test_bit_unaligned(int bit, void *bitmap) | ||
| 880 | { | ||
| 881 | bitmap = correct_addr_and_bit_unaligned(&bit, bitmap); | ||
| 882 | return ocfs2_test_bit(bit, bitmap); | ||
| 883 | } | ||
| 884 | |||
| 885 | static inline int ocfs2_find_next_zero_bit_unaligned(void *bitmap, int max, | ||
| 886 | int start) | ||
| 887 | { | ||
| 888 | int fix = 0, ret, tmpmax; | ||
| 889 | bitmap = correct_addr_and_bit_unaligned(&fix, bitmap); | ||
| 890 | tmpmax = max + fix; | ||
| 891 | start += fix; | ||
| 892 | |||
| 893 | ret = ocfs2_find_next_zero_bit(bitmap, tmpmax, start) - fix; | ||
| 894 | if (ret > max) | ||
| 895 | return max; | ||
| 896 | return ret; | ||
| 897 | } | ||
| 898 | |||
| 852 | #endif /* OCFS2_H */ | 899 | #endif /* OCFS2_H */ |
| 853 | 900 | ||
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index dc8007fc9247..f100bf70a906 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c | |||
| @@ -404,7 +404,9 @@ struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery( | |||
| 404 | int status = 0; | 404 | int status = 0; |
| 405 | struct ocfs2_quota_recovery *rec; | 405 | struct ocfs2_quota_recovery *rec; |
| 406 | 406 | ||
| 407 | mlog(ML_NOTICE, "Beginning quota recovery in slot %u\n", slot_num); | 407 | printk(KERN_NOTICE "ocfs2: Beginning quota recovery on device (%s) for " |
| 408 | "slot %u\n", osb->dev_str, slot_num); | ||
| 409 | |||
| 408 | rec = ocfs2_alloc_quota_recovery(); | 410 | rec = ocfs2_alloc_quota_recovery(); |
| 409 | if (!rec) | 411 | if (!rec) |
| 410 | return ERR_PTR(-ENOMEM); | 412 | return ERR_PTR(-ENOMEM); |
| @@ -549,8 +551,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, | |||
| 549 | goto out_commit; | 551 | goto out_commit; |
| 550 | } | 552 | } |
| 551 | lock_buffer(qbh); | 553 | lock_buffer(qbh); |
| 552 | WARN_ON(!ocfs2_test_bit(bit, dchunk->dqc_bitmap)); | 554 | WARN_ON(!ocfs2_test_bit_unaligned(bit, dchunk->dqc_bitmap)); |
| 553 | ocfs2_clear_bit(bit, dchunk->dqc_bitmap); | 555 | ocfs2_clear_bit_unaligned(bit, dchunk->dqc_bitmap); |
| 554 | le32_add_cpu(&dchunk->dqc_free, 1); | 556 | le32_add_cpu(&dchunk->dqc_free, 1); |
| 555 | unlock_buffer(qbh); | 557 | unlock_buffer(qbh); |
| 556 | ocfs2_journal_dirty(handle, qbh); | 558 | ocfs2_journal_dirty(handle, qbh); |
| @@ -596,7 +598,9 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, | |||
| 596 | struct inode *lqinode; | 598 | struct inode *lqinode; |
| 597 | unsigned int flags; | 599 | unsigned int flags; |
| 598 | 600 | ||
| 599 | mlog(ML_NOTICE, "Finishing quota recovery in slot %u\n", slot_num); | 601 | printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for " |
| 602 | "slot %u\n", osb->dev_str, slot_num); | ||
| 603 | |||
| 600 | mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); | 604 | mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); |
| 601 | for (type = 0; type < MAXQUOTAS; type++) { | 605 | for (type = 0; type < MAXQUOTAS; type++) { |
| 602 | if (list_empty(&(rec->r_list[type]))) | 606 | if (list_empty(&(rec->r_list[type]))) |
| @@ -612,8 +616,9 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, | |||
| 612 | /* Someone else is holding the lock? Then he must be | 616 | /* Someone else is holding the lock? Then he must be |
| 613 | * doing the recovery. Just skip the file... */ | 617 | * doing the recovery. Just skip the file... */ |
| 614 | if (status == -EAGAIN) { | 618 | if (status == -EAGAIN) { |
| 615 | mlog(ML_NOTICE, "skipping quota recovery for slot %d " | 619 | printk(KERN_NOTICE "ocfs2: Skipping quota recovery on " |
| 616 | "because quota file is locked.\n", slot_num); | 620 | "device (%s) for slot %d because quota file is " |
| 621 | "locked.\n", osb->dev_str, slot_num); | ||
| 617 | status = 0; | 622 | status = 0; |
| 618 | goto out_put; | 623 | goto out_put; |
| 619 | } else if (status < 0) { | 624 | } else if (status < 0) { |
| @@ -944,7 +949,7 @@ static struct ocfs2_quota_chunk *ocfs2_find_free_entry(struct super_block *sb, | |||
| 944 | * ol_quota_entries_per_block(sb); | 949 | * ol_quota_entries_per_block(sb); |
| 945 | } | 950 | } |
| 946 | 951 | ||
| 947 | found = ocfs2_find_next_zero_bit(dchunk->dqc_bitmap, len, 0); | 952 | found = ocfs2_find_next_zero_bit_unaligned(dchunk->dqc_bitmap, len, 0); |
| 948 | /* We failed? */ | 953 | /* We failed? */ |
| 949 | if (found == len) { | 954 | if (found == len) { |
| 950 | mlog(ML_ERROR, "Did not find empty entry in chunk %d with %u" | 955 | mlog(ML_ERROR, "Did not find empty entry in chunk %d with %u" |
| @@ -1208,7 +1213,7 @@ static void olq_alloc_dquot(struct buffer_head *bh, void *private) | |||
| 1208 | struct ocfs2_local_disk_chunk *dchunk; | 1213 | struct ocfs2_local_disk_chunk *dchunk; |
| 1209 | 1214 | ||
| 1210 | dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; | 1215 | dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; |
| 1211 | ocfs2_set_bit(*offset, dchunk->dqc_bitmap); | 1216 | ocfs2_set_bit_unaligned(*offset, dchunk->dqc_bitmap); |
| 1212 | le32_add_cpu(&dchunk->dqc_free, -1); | 1217 | le32_add_cpu(&dchunk->dqc_free, -1); |
| 1213 | } | 1218 | } |
| 1214 | 1219 | ||
| @@ -1289,7 +1294,7 @@ int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) | |||
| 1289 | (od->dq_chunk->qc_headerbh->b_data); | 1294 | (od->dq_chunk->qc_headerbh->b_data); |
| 1290 | /* Mark structure as freed */ | 1295 | /* Mark structure as freed */ |
| 1291 | lock_buffer(od->dq_chunk->qc_headerbh); | 1296 | lock_buffer(od->dq_chunk->qc_headerbh); |
| 1292 | ocfs2_clear_bit(offset, dchunk->dqc_bitmap); | 1297 | ocfs2_clear_bit_unaligned(offset, dchunk->dqc_bitmap); |
| 1293 | le32_add_cpu(&dchunk->dqc_free, 1); | 1298 | le32_add_cpu(&dchunk->dqc_free, 1); |
| 1294 | unlock_buffer(od->dq_chunk->qc_headerbh); | 1299 | unlock_buffer(od->dq_chunk->qc_headerbh); |
| 1295 | ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); | 1300 | ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index 26fc0014d509..1424c151cccc 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
| @@ -493,8 +493,8 @@ int ocfs2_find_slot(struct ocfs2_super *osb) | |||
| 493 | goto bail; | 493 | goto bail; |
| 494 | } | 494 | } |
| 495 | } else | 495 | } else |
| 496 | mlog(ML_NOTICE, "slot %d is already allocated to this node!\n", | 496 | printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " |
| 497 | slot); | 497 | "allocated to this node!\n", slot, osb->dev_str); |
| 498 | 498 | ||
| 499 | ocfs2_set_slot(si, slot, osb->node_num); | 499 | ocfs2_set_slot(si, slot, osb->node_num); |
| 500 | osb->slot_num = slot; | 500 | osb->slot_num = slot; |
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 19965b00c43c..94368017edb3 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include "cluster/masklog.h" | 28 | #include "cluster/masklog.h" |
| 29 | #include "cluster/nodemanager.h" | 29 | #include "cluster/nodemanager.h" |
| 30 | #include "cluster/heartbeat.h" | 30 | #include "cluster/heartbeat.h" |
| 31 | #include "cluster/tcp.h" | ||
| 31 | 32 | ||
| 32 | #include "stackglue.h" | 33 | #include "stackglue.h" |
| 33 | 34 | ||
| @@ -256,6 +257,61 @@ static void o2cb_dump_lksb(struct ocfs2_dlm_lksb *lksb) | |||
| 256 | } | 257 | } |
| 257 | 258 | ||
| 258 | /* | 259 | /* |
| 260 | * Check if this node is heartbeating and is connected to all other | ||
| 261 | * heartbeating nodes. | ||
| 262 | */ | ||
| 263 | static int o2cb_cluster_check(void) | ||
| 264 | { | ||
| 265 | u8 node_num; | ||
| 266 | int i; | ||
| 267 | unsigned long hbmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 268 | unsigned long netmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 269 | |||
| 270 | node_num = o2nm_this_node(); | ||
| 271 | if (node_num == O2NM_MAX_NODES) { | ||
| 272 | printk(KERN_ERR "o2cb: This node has not been configured.\n"); | ||
| 273 | return -EINVAL; | ||
| 274 | } | ||
| 275 | |||
| 276 | /* | ||
| 277 | * o2dlm expects o2net sockets to be created. If not, then | ||
| 278 | * dlm_join_domain() fails with a stack of errors which are both cryptic | ||
| 279 | * and incomplete. The idea here is to detect upfront whether we have | ||
| 280 | * managed to connect to all nodes or not. If not, then list the nodes | ||
| 281 | * to allow the user to check the configuration (incorrect IP, firewall, | ||
| 282 | * etc.) Yes, this is racy. But its not the end of the world. | ||
| 283 | */ | ||
| 284 | #define O2CB_MAP_STABILIZE_COUNT 60 | ||
| 285 | for (i = 0; i < O2CB_MAP_STABILIZE_COUNT; ++i) { | ||
| 286 | o2hb_fill_node_map(hbmap, sizeof(hbmap)); | ||
| 287 | if (!test_bit(node_num, hbmap)) { | ||
| 288 | printk(KERN_ERR "o2cb: %s heartbeat has not been " | ||
| 289 | "started.\n", (o2hb_global_heartbeat_active() ? | ||
| 290 | "Global" : "Local")); | ||
| 291 | return -EINVAL; | ||
| 292 | } | ||
| 293 | o2net_fill_node_map(netmap, sizeof(netmap)); | ||
| 294 | /* Force set the current node to allow easy compare */ | ||
| 295 | set_bit(node_num, netmap); | ||
| 296 | if (!memcmp(hbmap, netmap, sizeof(hbmap))) | ||
| 297 | return 0; | ||
| 298 | if (i < O2CB_MAP_STABILIZE_COUNT) | ||
| 299 | msleep(1000); | ||
| 300 | } | ||
| 301 | |||
| 302 | printk(KERN_ERR "o2cb: This node could not connect to nodes:"); | ||
| 303 | i = -1; | ||
| 304 | while ((i = find_next_bit(hbmap, O2NM_MAX_NODES, | ||
| 305 | i + 1)) < O2NM_MAX_NODES) { | ||
| 306 | if (!test_bit(i, netmap)) | ||
| 307 | printk(" %u", i); | ||
| 308 | } | ||
| 309 | printk(".\n"); | ||
| 310 | |||
| 311 | return -ENOTCONN; | ||
| 312 | } | ||
| 313 | |||
| 314 | /* | ||
| 259 | * Called from the dlm when it's about to evict a node. This is how the | 315 | * Called from the dlm when it's about to evict a node. This is how the |
| 260 | * classic stack signals node death. | 316 | * classic stack signals node death. |
| 261 | */ | 317 | */ |
| @@ -263,8 +319,8 @@ static void o2dlm_eviction_cb(int node_num, void *data) | |||
| 263 | { | 319 | { |
| 264 | struct ocfs2_cluster_connection *conn = data; | 320 | struct ocfs2_cluster_connection *conn = data; |
| 265 | 321 | ||
| 266 | mlog(ML_NOTICE, "o2dlm has evicted node %d from group %.*s\n", | 322 | printk(KERN_NOTICE "o2cb: o2dlm has evicted node %d from domain %.*s\n", |
| 267 | node_num, conn->cc_namelen, conn->cc_name); | 323 | node_num, conn->cc_namelen, conn->cc_name); |
| 268 | 324 | ||
| 269 | conn->cc_recovery_handler(node_num, conn->cc_recovery_data); | 325 | conn->cc_recovery_handler(node_num, conn->cc_recovery_data); |
| 270 | } | 326 | } |
| @@ -280,12 +336,11 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
| 280 | BUG_ON(conn == NULL); | 336 | BUG_ON(conn == NULL); |
| 281 | BUG_ON(conn->cc_proto == NULL); | 337 | BUG_ON(conn->cc_proto == NULL); |
| 282 | 338 | ||
| 283 | /* for now we only have one cluster/node, make sure we see it | 339 | /* Ensure cluster stack is up and all nodes are connected */ |
| 284 | * in the heartbeat universe */ | 340 | rc = o2cb_cluster_check(); |
| 285 | if (!o2hb_check_local_node_heartbeating()) { | 341 | if (rc) { |
| 286 | if (o2hb_global_heartbeat_active()) | 342 | printk(KERN_ERR "o2cb: Cluster check failed. Fix errors " |
| 287 | mlog(ML_ERROR, "Global heartbeat not started\n"); | 343 | "before retrying.\n"); |
| 288 | rc = -EINVAL; | ||
| 289 | goto out; | 344 | goto out; |
| 290 | } | 345 | } |
| 291 | 346 | ||
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 56f61027236b..4994f8b0e604 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -54,6 +54,7 @@ | |||
| 54 | #include "ocfs1_fs_compat.h" | 54 | #include "ocfs1_fs_compat.h" |
| 55 | 55 | ||
| 56 | #include "alloc.h" | 56 | #include "alloc.h" |
| 57 | #include "aops.h" | ||
| 57 | #include "blockcheck.h" | 58 | #include "blockcheck.h" |
| 58 | #include "dlmglue.h" | 59 | #include "dlmglue.h" |
| 59 | #include "export.h" | 60 | #include "export.h" |
| @@ -1107,9 +1108,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 1107 | 1108 | ||
| 1108 | ocfs2_set_ro_flag(osb, 1); | 1109 | ocfs2_set_ro_flag(osb, 1); |
| 1109 | 1110 | ||
| 1110 | printk(KERN_NOTICE "Readonly device detected. No cluster " | 1111 | printk(KERN_NOTICE "ocfs2: Readonly device (%s) detected. " |
| 1111 | "services will be utilized for this mount. Recovery " | 1112 | "Cluster services will not be used for this mount. " |
| 1112 | "will be skipped.\n"); | 1113 | "Recovery will be skipped.\n", osb->dev_str); |
| 1113 | } | 1114 | } |
| 1114 | 1115 | ||
| 1115 | if (!ocfs2_is_hard_readonly(osb)) { | 1116 | if (!ocfs2_is_hard_readonly(osb)) { |
| @@ -1616,12 +1617,17 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 1616 | return 0; | 1617 | return 0; |
| 1617 | } | 1618 | } |
| 1618 | 1619 | ||
| 1620 | wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ]; | ||
| 1621 | |||
| 1619 | static int __init ocfs2_init(void) | 1622 | static int __init ocfs2_init(void) |
| 1620 | { | 1623 | { |
| 1621 | int status; | 1624 | int status, i; |
| 1622 | 1625 | ||
| 1623 | ocfs2_print_version(); | 1626 | ocfs2_print_version(); |
| 1624 | 1627 | ||
| 1628 | for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++) | ||
| 1629 | init_waitqueue_head(&ocfs2__ioend_wq[i]); | ||
| 1630 | |||
| 1625 | status = init_ocfs2_uptodate_cache(); | 1631 | status = init_ocfs2_uptodate_cache(); |
| 1626 | if (status < 0) { | 1632 | if (status < 0) { |
| 1627 | mlog_errno(status); | 1633 | mlog_errno(status); |
| @@ -1760,7 +1766,7 @@ static void ocfs2_inode_init_once(void *data) | |||
| 1760 | ocfs2_extent_map_init(&oi->vfs_inode); | 1766 | ocfs2_extent_map_init(&oi->vfs_inode); |
| 1761 | INIT_LIST_HEAD(&oi->ip_io_markers); | 1767 | INIT_LIST_HEAD(&oi->ip_io_markers); |
| 1762 | oi->ip_dir_start_lookup = 0; | 1768 | oi->ip_dir_start_lookup = 0; |
| 1763 | 1769 | atomic_set(&oi->ip_unaligned_aio, 0); | |
| 1764 | init_rwsem(&oi->ip_alloc_sem); | 1770 | init_rwsem(&oi->ip_alloc_sem); |
| 1765 | init_rwsem(&oi->ip_xattr_sem); | 1771 | init_rwsem(&oi->ip_xattr_sem); |
| 1766 | mutex_init(&oi->ip_io_mutex); | 1772 | mutex_init(&oi->ip_io_mutex); |
| @@ -1974,7 +1980,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
| 1974 | * If we failed before we got a uuid_str yet, we can't stop | 1980 | * If we failed before we got a uuid_str yet, we can't stop |
| 1975 | * heartbeat. Otherwise, do it. | 1981 | * heartbeat. Otherwise, do it. |
| 1976 | */ | 1982 | */ |
| 1977 | if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str) | 1983 | if (!mnt_err && !ocfs2_mount_local(osb) && osb->uuid_str && |
| 1984 | !ocfs2_is_hard_readonly(osb)) | ||
| 1978 | hangup_needed = 1; | 1985 | hangup_needed = 1; |
| 1979 | 1986 | ||
| 1980 | if (osb->cconn) | 1987 | if (osb->cconn) |
| @@ -2353,7 +2360,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2353 | mlog_errno(status); | 2360 | mlog_errno(status); |
| 2354 | goto bail; | 2361 | goto bail; |
| 2355 | } | 2362 | } |
| 2356 | cleancache_init_shared_fs((char *)&uuid_net_key, sb); | 2363 | cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb); |
| 2357 | 2364 | ||
| 2358 | bail: | 2365 | bail: |
| 2359 | return status; | 2366 | return status; |
| @@ -2462,8 +2469,8 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) | |||
| 2462 | goto finally; | 2469 | goto finally; |
| 2463 | } | 2470 | } |
| 2464 | } else { | 2471 | } else { |
| 2465 | mlog(ML_NOTICE, "File system was not unmounted cleanly, " | 2472 | printk(KERN_NOTICE "ocfs2: File system on device (%s) was not " |
| 2466 | "recovering volume.\n"); | 2473 | "unmounted cleanly, recovering it.\n", osb->dev_str); |
| 2467 | } | 2474 | } |
| 2468 | 2475 | ||
| 2469 | local = ocfs2_mount_local(osb); | 2476 | local = ocfs2_mount_local(osb); |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 194fb22ef79d..aa9e8777b09a 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
| @@ -2376,16 +2376,18 @@ static int ocfs2_remove_value_outside(struct inode*inode, | |||
| 2376 | } | 2376 | } |
| 2377 | 2377 | ||
| 2378 | ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); | 2378 | ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); |
| 2379 | if (ret < 0) { | ||
| 2380 | mlog_errno(ret); | ||
| 2381 | break; | ||
| 2382 | } | ||
| 2383 | 2379 | ||
| 2384 | ocfs2_commit_trans(osb, ctxt.handle); | 2380 | ocfs2_commit_trans(osb, ctxt.handle); |
| 2385 | if (ctxt.meta_ac) { | 2381 | if (ctxt.meta_ac) { |
| 2386 | ocfs2_free_alloc_context(ctxt.meta_ac); | 2382 | ocfs2_free_alloc_context(ctxt.meta_ac); |
| 2387 | ctxt.meta_ac = NULL; | 2383 | ctxt.meta_ac = NULL; |
| 2388 | } | 2384 | } |
| 2385 | |||
| 2386 | if (ret < 0) { | ||
| 2387 | mlog_errno(ret); | ||
| 2388 | break; | ||
| 2389 | } | ||
| 2390 | |||
| 2389 | } | 2391 | } |
| 2390 | 2392 | ||
| 2391 | if (ctxt.meta_ac) | 2393 | if (ctxt.meta_ac) |
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index b6c4b3795c4a..76e4266d2e7e 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c | |||
| @@ -42,6 +42,8 @@ xfs_acl_from_disk(struct xfs_acl *aclp) | |||
| 42 | int count, i; | 42 | int count, i; |
| 43 | 43 | ||
| 44 | count = be32_to_cpu(aclp->acl_cnt); | 44 | count = be32_to_cpu(aclp->acl_cnt); |
| 45 | if (count > XFS_ACL_MAX_ENTRIES) | ||
| 46 | return ERR_PTR(-EFSCORRUPTED); | ||
| 45 | 47 | ||
| 46 | acl = posix_acl_alloc(count, GFP_KERNEL); | 48 | acl = posix_acl_alloc(count, GFP_KERNEL); |
| 47 | if (!acl) | 49 | if (!acl) |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index d4906e7c9787..c1b55e596551 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
| @@ -110,6 +110,7 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags) | |||
| 110 | /* | 110 | /* |
| 111 | * Query whether the requested number of additional bytes of extended | 111 | * Query whether the requested number of additional bytes of extended |
| 112 | * attribute space will be able to fit inline. | 112 | * attribute space will be able to fit inline. |
| 113 | * | ||
| 113 | * Returns zero if not, else the di_forkoff fork offset to be used in the | 114 | * Returns zero if not, else the di_forkoff fork offset to be used in the |
| 114 | * literal area for attribute data once the new bytes have been added. | 115 | * literal area for attribute data once the new bytes have been added. |
| 115 | * | 116 | * |
| @@ -122,7 +123,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) | |||
| 122 | int offset; | 123 | int offset; |
| 123 | int minforkoff; /* lower limit on valid forkoff locations */ | 124 | int minforkoff; /* lower limit on valid forkoff locations */ |
| 124 | int maxforkoff; /* upper limit on valid forkoff locations */ | 125 | int maxforkoff; /* upper limit on valid forkoff locations */ |
| 125 | int dsize; | 126 | int dsize; |
| 126 | xfs_mount_t *mp = dp->i_mount; | 127 | xfs_mount_t *mp = dp->i_mount; |
| 127 | 128 | ||
| 128 | offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */ | 129 | offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */ |
| @@ -136,47 +137,60 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) | |||
| 136 | return (offset >= minforkoff) ? minforkoff : 0; | 137 | return (offset >= minforkoff) ? minforkoff : 0; |
| 137 | } | 138 | } |
| 138 | 139 | ||
| 139 | if (!(mp->m_flags & XFS_MOUNT_ATTR2)) { | 140 | /* |
| 140 | if (bytes <= XFS_IFORK_ASIZE(dp)) | 141 | * If the requested numbers of bytes is smaller or equal to the |
| 141 | return dp->i_d.di_forkoff; | 142 | * current attribute fork size we can always proceed. |
| 143 | * | ||
| 144 | * Note that if_bytes in the data fork might actually be larger than | ||
| 145 | * the current data fork size is due to delalloc extents. In that | ||
| 146 | * case either the extent count will go down when they are converted | ||
| 147 | * to real extents, or the delalloc conversion will take care of the | ||
| 148 | * literal area rebalancing. | ||
| 149 | */ | ||
| 150 | if (bytes <= XFS_IFORK_ASIZE(dp)) | ||
| 151 | return dp->i_d.di_forkoff; | ||
| 152 | |||
| 153 | /* | ||
| 154 | * For attr2 we can try to move the forkoff if there is space in the | ||
| 155 | * literal area, but for the old format we are done if there is no | ||
| 156 | * space in the fixed attribute fork. | ||
| 157 | */ | ||
| 158 | if (!(mp->m_flags & XFS_MOUNT_ATTR2)) | ||
| 142 | return 0; | 159 | return 0; |
| 143 | } | ||
| 144 | 160 | ||
| 145 | dsize = dp->i_df.if_bytes; | 161 | dsize = dp->i_df.if_bytes; |
| 146 | 162 | ||
| 147 | switch (dp->i_d.di_format) { | 163 | switch (dp->i_d.di_format) { |
| 148 | case XFS_DINODE_FMT_EXTENTS: | 164 | case XFS_DINODE_FMT_EXTENTS: |
| 149 | /* | 165 | /* |
| 150 | * If there is no attr fork and the data fork is extents, | 166 | * If there is no attr fork and the data fork is extents, |
| 151 | * determine if creating the default attr fork will result | 167 | * determine if creating the default attr fork will result |
| 152 | * in the extents form migrating to btree. If so, the | 168 | * in the extents form migrating to btree. If so, the |
| 153 | * minimum offset only needs to be the space required for | 169 | * minimum offset only needs to be the space required for |
| 154 | * the btree root. | 170 | * the btree root. |
| 155 | */ | 171 | */ |
| 156 | if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > | 172 | if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > |
| 157 | xfs_default_attroffset(dp)) | 173 | xfs_default_attroffset(dp)) |
| 158 | dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS); | 174 | dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS); |
| 159 | break; | 175 | break; |
| 160 | |||
| 161 | case XFS_DINODE_FMT_BTREE: | 176 | case XFS_DINODE_FMT_BTREE: |
| 162 | /* | 177 | /* |
| 163 | * If have data btree then keep forkoff if we have one, | 178 | * If we have a data btree then keep forkoff if we have one, |
| 164 | * otherwise we are adding a new attr, so then we set | 179 | * otherwise we are adding a new attr, so then we set |
| 165 | * minforkoff to where the btree root can finish so we have | 180 | * minforkoff to where the btree root can finish so we have |
| 166 | * plenty of room for attrs | 181 | * plenty of room for attrs |
| 167 | */ | 182 | */ |
| 168 | if (dp->i_d.di_forkoff) { | 183 | if (dp->i_d.di_forkoff) { |
| 169 | if (offset < dp->i_d.di_forkoff) | 184 | if (offset < dp->i_d.di_forkoff) |
| 170 | return 0; | 185 | return 0; |
| 171 | else | 186 | return dp->i_d.di_forkoff; |
| 172 | return dp->i_d.di_forkoff; | 187 | } |
| 173 | } else | 188 | dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot); |
| 174 | dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot); | ||
| 175 | break; | 189 | break; |
| 176 | } | 190 | } |
| 177 | 191 | ||
| 178 | /* | 192 | /* |
| 179 | * A data fork btree root must have space for at least | 193 | * A data fork btree root must have space for at least |
| 180 | * MINDBTPTRS key/ptr pairs if the data fork is small or empty. | 194 | * MINDBTPTRS key/ptr pairs if the data fork is small or empty. |
| 181 | */ | 195 | */ |
| 182 | minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS)); | 196 | minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS)); |
| @@ -186,10 +200,10 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes) | |||
| 186 | maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); | 200 | maxforkoff = XFS_LITINO(mp) - XFS_BMDR_SPACE_CALC(MINABTPTRS); |
| 187 | maxforkoff = maxforkoff >> 3; /* rounded down */ | 201 | maxforkoff = maxforkoff >> 3; /* rounded down */ |
| 188 | 202 | ||
| 189 | if (offset >= minforkoff && offset < maxforkoff) | ||
| 190 | return offset; | ||
| 191 | if (offset >= maxforkoff) | 203 | if (offset >= maxforkoff) |
| 192 | return maxforkoff; | 204 | return maxforkoff; |
| 205 | if (offset >= minforkoff) | ||
| 206 | return offset; | ||
| 193 | return 0; | 207 | return 0; |
| 194 | } | 208 | } |
| 195 | 209 | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c0237c602f11..755ee8164880 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
| @@ -2835,6 +2835,27 @@ corrupt_out: | |||
| 2835 | return XFS_ERROR(EFSCORRUPTED); | 2835 | return XFS_ERROR(EFSCORRUPTED); |
| 2836 | } | 2836 | } |
| 2837 | 2837 | ||
| 2838 | void | ||
| 2839 | xfs_promote_inode( | ||
| 2840 | struct xfs_inode *ip) | ||
| 2841 | { | ||
| 2842 | struct xfs_buf *bp; | ||
| 2843 | |||
| 2844 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); | ||
| 2845 | |||
| 2846 | bp = xfs_incore(ip->i_mount->m_ddev_targp, ip->i_imap.im_blkno, | ||
| 2847 | ip->i_imap.im_len, XBF_TRYLOCK); | ||
| 2848 | if (!bp) | ||
| 2849 | return; | ||
| 2850 | |||
| 2851 | if (XFS_BUF_ISDELAYWRITE(bp)) { | ||
| 2852 | xfs_buf_delwri_promote(bp); | ||
| 2853 | wake_up_process(ip->i_mount->m_ddev_targp->bt_task); | ||
| 2854 | } | ||
| 2855 | |||
| 2856 | xfs_buf_relse(bp); | ||
| 2857 | } | ||
| 2858 | |||
| 2838 | /* | 2859 | /* |
| 2839 | * Return a pointer to the extent record at file index idx. | 2860 | * Return a pointer to the extent record at file index idx. |
| 2840 | */ | 2861 | */ |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 760140d1dd66..b4cd4739f98e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
| @@ -498,6 +498,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); | |||
| 498 | void xfs_iext_realloc(xfs_inode_t *, int, int); | 498 | void xfs_iext_realloc(xfs_inode_t *, int, int); |
| 499 | void xfs_iunpin_wait(xfs_inode_t *); | 499 | void xfs_iunpin_wait(xfs_inode_t *); |
| 500 | int xfs_iflush(xfs_inode_t *, uint); | 500 | int xfs_iflush(xfs_inode_t *, uint); |
| 501 | void xfs_promote_inode(struct xfs_inode *); | ||
| 501 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 502 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |
| 502 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); | 503 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); |
| 503 | 504 | ||
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index aa3dc1a4d53d..be5c51d8f757 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
| @@ -770,6 +770,17 @@ restart: | |||
| 770 | if (!xfs_iflock_nowait(ip)) { | 770 | if (!xfs_iflock_nowait(ip)) { |
| 771 | if (!(sync_mode & SYNC_WAIT)) | 771 | if (!(sync_mode & SYNC_WAIT)) |
| 772 | goto out; | 772 | goto out; |
| 773 | |||
| 774 | /* | ||
| 775 | * If we only have a single dirty inode in a cluster there is | ||
| 776 | * a fair chance that the AIL push may have pushed it into | ||
| 777 | * the buffer, but xfsbufd won't touch it until 30 seconds | ||
| 778 | * from now, and thus we will lock up here. | ||
| 779 | * | ||
| 780 | * Promote the inode buffer to the front of the delwri list | ||
| 781 | * and wake up xfsbufd now. | ||
| 782 | */ | ||
| 783 | xfs_promote_inode(ip); | ||
| 773 | xfs_iflock(ip); | 784 | xfs_iflock(ip); |
| 774 | } | 785 | } |
| 775 | 786 | ||
