diff options
39 files changed, 1894 insertions, 173 deletions
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 1f7ae144f6d8..5393e6611691 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt | |||
| @@ -87,3 +87,10 @@ dir_resv_level= (*) By default, directory reservations will scale with file | |||
| 87 | reservations - users should rarely need to change this | 87 | reservations - users should rarely need to change this |
| 88 | value. If allocation reservations are turned off, this | 88 | value. If allocation reservations are turned off, this |
| 89 | option will have no effect. | 89 | option will have no effect. |
| 90 | coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode | ||
| 91 | lock will be taken to force other nodes drop cache, | ||
| 92 | therefore full cluster coherency is guaranteed even | ||
| 93 | for O_DIRECT writes. | ||
| 94 | coherency=buffered Allow concurrent O_DIRECT writes without EX lock among | ||
| 95 | nodes, which gains high performance at risk of getting | ||
| 96 | stale data on other nodes. | ||
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 5dbf4dba03c4..a367dd044280 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
| @@ -1849,8 +1849,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
| 1849 | goto failed_mount; | 1849 | goto failed_mount; |
| 1850 | } | 1850 | } |
| 1851 | 1851 | ||
| 1852 | if (le32_to_cpu(es->s_blocks_count) > | 1852 | if (generic_check_addressable(sb->s_blocksize_bits, |
| 1853 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { | 1853 | le32_to_cpu(es->s_blocks_count))) { |
| 1854 | ext3_msg(sb, KERN_ERR, | 1854 | ext3_msg(sb, KERN_ERR, |
| 1855 | "error: filesystem is too large to mount safely"); | 1855 | "error: filesystem is too large to mount safely"); |
| 1856 | if (sizeof(sector_t) < 8) | 1856 | if (sizeof(sector_t) < 8) |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 26147746c272..7f47c366bf15 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -2831,15 +2831,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2831 | * Test whether we have more sectors than will fit in sector_t, | 2831 | * Test whether we have more sectors than will fit in sector_t, |
| 2832 | * and whether the max offset is addressable by the page cache. | 2832 | * and whether the max offset is addressable by the page cache. |
| 2833 | */ | 2833 | */ |
| 2834 | if ((ext4_blocks_count(es) > | 2834 | ret = generic_check_addressable(sb->s_blocksize_bits, |
| 2835 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || | 2835 | ext4_blocks_count(es)); |
| 2836 | (ext4_blocks_count(es) > | 2836 | if (ret) { |
| 2837 | (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { | ||
| 2838 | ext4_msg(sb, KERN_ERR, "filesystem" | 2837 | ext4_msg(sb, KERN_ERR, "filesystem" |
| 2839 | " too large to mount safely on this system"); | 2838 | " too large to mount safely on this system"); |
| 2840 | if (sizeof(sector_t) < 8) | 2839 | if (sizeof(sector_t) < 8) |
| 2841 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); | 2840 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); |
| 2842 | ret = -EFBIG; | ||
| 2843 | goto failed_mount; | 2841 | goto failed_mount; |
| 2844 | } | 2842 | } |
| 2845 | 2843 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0e8014ea6b94..262419f83d80 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -1371,6 +1371,10 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, | |||
| 1371 | 1371 | ||
| 1372 | if (!compat && !ro && !incompat) | 1372 | if (!compat && !ro && !incompat) |
| 1373 | return 1; | 1373 | return 1; |
| 1374 | /* Load journal superblock if it is not loaded yet. */ | ||
| 1375 | if (journal->j_format_version == 0 && | ||
| 1376 | journal_get_superblock(journal) != 0) | ||
| 1377 | return 0; | ||
| 1374 | if (journal->j_format_version == 1) | 1378 | if (journal->j_format_version == 1) |
| 1375 | return 0; | 1379 | return 0; |
| 1376 | 1380 | ||
diff --git a/fs/libfs.c b/fs/libfs.c index 0a9da95317f7..62baa0387d6e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
| @@ -913,6 +913,35 @@ int generic_file_fsync(struct file *file, int datasync) | |||
| 913 | } | 913 | } |
| 914 | EXPORT_SYMBOL(generic_file_fsync); | 914 | EXPORT_SYMBOL(generic_file_fsync); |
| 915 | 915 | ||
| 916 | /** | ||
| 917 | * generic_check_addressable - Check addressability of file system | ||
| 918 | * @blocksize_bits: log of file system block size | ||
| 919 | * @num_blocks: number of blocks in file system | ||
| 920 | * | ||
| 921 | * Determine whether a file system with @num_blocks blocks (and a | ||
| 922 | * block size of 2**@blocksize_bits) is addressable by the sector_t | ||
| 923 | * and page cache of the system. Return 0 if so and -EFBIG otherwise. | ||
| 924 | */ | ||
| 925 | int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) | ||
| 926 | { | ||
| 927 | u64 last_fs_block = num_blocks - 1; | ||
| 928 | u64 last_fs_page = | ||
| 929 | last_fs_block >> (PAGE_CACHE_SHIFT - blocksize_bits); | ||
| 930 | |||
| 931 | if (unlikely(num_blocks == 0)) | ||
| 932 | return 0; | ||
| 933 | |||
| 934 | if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT)) | ||
| 935 | return -EINVAL; | ||
| 936 | |||
| 937 | if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) || | ||
| 938 | (last_fs_page > (pgoff_t)(~0ULL))) { | ||
| 939 | return -EFBIG; | ||
| 940 | } | ||
| 941 | return 0; | ||
| 942 | } | ||
| 943 | EXPORT_SYMBOL(generic_check_addressable); | ||
| 944 | |||
| 916 | /* | 945 | /* |
| 917 | * No-op implementation of ->fsync for in-memory filesystems. | 946 | * No-op implementation of ->fsync for in-memory filesystems. |
| 918 | */ | 947 | */ |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 0de69c9a08be..5cfeee118158 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -883,8 +883,8 @@ struct ocfs2_write_ctxt { | |||
| 883 | * out in so that future reads from that region will get | 883 | * out in so that future reads from that region will get |
| 884 | * zero's. | 884 | * zero's. |
| 885 | */ | 885 | */ |
| 886 | struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; | ||
| 887 | unsigned int w_num_pages; | 886 | unsigned int w_num_pages; |
| 887 | struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; | ||
| 888 | struct page *w_target_page; | 888 | struct page *w_target_page; |
| 889 | 889 | ||
| 890 | /* | 890 | /* |
| @@ -1642,7 +1642,8 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, | |||
| 1642 | return ret; | 1642 | return ret; |
| 1643 | } | 1643 | } |
| 1644 | 1644 | ||
| 1645 | int ocfs2_write_begin_nolock(struct address_space *mapping, | 1645 | int ocfs2_write_begin_nolock(struct file *filp, |
| 1646 | struct address_space *mapping, | ||
| 1646 | loff_t pos, unsigned len, unsigned flags, | 1647 | loff_t pos, unsigned len, unsigned flags, |
| 1647 | struct page **pagep, void **fsdata, | 1648 | struct page **pagep, void **fsdata, |
| 1648 | struct buffer_head *di_bh, struct page *mmap_page) | 1649 | struct buffer_head *di_bh, struct page *mmap_page) |
| @@ -1692,7 +1693,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
| 1692 | mlog_errno(ret); | 1693 | mlog_errno(ret); |
| 1693 | goto out; | 1694 | goto out; |
| 1694 | } else if (ret == 1) { | 1695 | } else if (ret == 1) { |
| 1695 | ret = ocfs2_refcount_cow(inode, di_bh, | 1696 | ret = ocfs2_refcount_cow(inode, filp, di_bh, |
| 1696 | wc->w_cpos, wc->w_clen, UINT_MAX); | 1697 | wc->w_cpos, wc->w_clen, UINT_MAX); |
| 1697 | if (ret) { | 1698 | if (ret) { |
| 1698 | mlog_errno(ret); | 1699 | mlog_errno(ret); |
| @@ -1854,7 +1855,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
| 1854 | */ | 1855 | */ |
| 1855 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 1856 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
| 1856 | 1857 | ||
| 1857 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, | 1858 | ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep, |
| 1858 | fsdata, di_bh, NULL); | 1859 | fsdata, di_bh, NULL); |
| 1859 | if (ret) { | 1860 | if (ret) { |
| 1860 | mlog_errno(ret); | 1861 | mlog_errno(ret); |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index c48e93ffc513..7606f663da6d 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
| @@ -48,7 +48,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping, | |||
| 48 | loff_t pos, unsigned len, unsigned copied, | 48 | loff_t pos, unsigned len, unsigned copied, |
| 49 | struct page *page, void *fsdata); | 49 | struct page *page, void *fsdata); |
| 50 | 50 | ||
| 51 | int ocfs2_write_begin_nolock(struct address_space *mapping, | 51 | int ocfs2_write_begin_nolock(struct file *filp, |
| 52 | struct address_space *mapping, | ||
| 52 | loff_t pos, unsigned len, unsigned flags, | 53 | loff_t pos, unsigned len, unsigned flags, |
| 53 | struct page **pagep, void **fsdata, | 54 | struct page **pagep, void **fsdata, |
| 54 | struct buffer_head *di_bh, struct page *mmap_page); | 55 | struct buffer_head *di_bh, struct page *mmap_page); |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 41d5f1f92d56..52c7557f3e25 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
| @@ -62,10 +62,51 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | |||
| 62 | static LIST_HEAD(o2hb_node_events); | 62 | static LIST_HEAD(o2hb_node_events); |
| 63 | static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); | 63 | static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); |
| 64 | 64 | ||
| 65 | /* | ||
| 66 | * In global heartbeat, we maintain a series of region bitmaps. | ||
| 67 | * - o2hb_region_bitmap allows us to limit the region number to max region. | ||
| 68 | * - o2hb_live_region_bitmap tracks live regions (seen steady iterations). | ||
| 69 | * - o2hb_quorum_region_bitmap tracks live regions that have seen all nodes | ||
| 70 | * heartbeat on it. | ||
| 71 | * - o2hb_failed_region_bitmap tracks the regions that have seen io timeouts. | ||
| 72 | */ | ||
| 73 | static unsigned long o2hb_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
| 74 | static unsigned long o2hb_live_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
| 75 | static unsigned long o2hb_quorum_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
| 76 | static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
| 77 | |||
| 78 | #define O2HB_DB_TYPE_LIVENODES 0 | ||
| 79 | #define O2HB_DB_TYPE_LIVEREGIONS 1 | ||
| 80 | #define O2HB_DB_TYPE_QUORUMREGIONS 2 | ||
| 81 | #define O2HB_DB_TYPE_FAILEDREGIONS 3 | ||
| 82 | #define O2HB_DB_TYPE_REGION_LIVENODES 4 | ||
| 83 | #define O2HB_DB_TYPE_REGION_NUMBER 5 | ||
| 84 | #define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6 | ||
| 85 | struct o2hb_debug_buf { | ||
| 86 | int db_type; | ||
| 87 | int db_size; | ||
| 88 | int db_len; | ||
| 89 | void *db_data; | ||
| 90 | }; | ||
| 91 | |||
| 92 | static struct o2hb_debug_buf *o2hb_db_livenodes; | ||
| 93 | static struct o2hb_debug_buf *o2hb_db_liveregions; | ||
| 94 | static struct o2hb_debug_buf *o2hb_db_quorumregions; | ||
| 95 | static struct o2hb_debug_buf *o2hb_db_failedregions; | ||
| 96 | |||
| 65 | #define O2HB_DEBUG_DIR "o2hb" | 97 | #define O2HB_DEBUG_DIR "o2hb" |
| 66 | #define O2HB_DEBUG_LIVENODES "livenodes" | 98 | #define O2HB_DEBUG_LIVENODES "livenodes" |
| 99 | #define O2HB_DEBUG_LIVEREGIONS "live_regions" | ||
| 100 | #define O2HB_DEBUG_QUORUMREGIONS "quorum_regions" | ||
| 101 | #define O2HB_DEBUG_FAILEDREGIONS "failed_regions" | ||
| 102 | #define O2HB_DEBUG_REGION_NUMBER "num" | ||
| 103 | #define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms" | ||
| 104 | |||
| 67 | static struct dentry *o2hb_debug_dir; | 105 | static struct dentry *o2hb_debug_dir; |
| 68 | static struct dentry *o2hb_debug_livenodes; | 106 | static struct dentry *o2hb_debug_livenodes; |
| 107 | static struct dentry *o2hb_debug_liveregions; | ||
| 108 | static struct dentry *o2hb_debug_quorumregions; | ||
| 109 | static struct dentry *o2hb_debug_failedregions; | ||
| 69 | 110 | ||
| 70 | static LIST_HEAD(o2hb_all_regions); | 111 | static LIST_HEAD(o2hb_all_regions); |
| 71 | 112 | ||
| @@ -77,7 +118,19 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type); | |||
| 77 | 118 | ||
| 78 | #define O2HB_DEFAULT_BLOCK_BITS 9 | 119 | #define O2HB_DEFAULT_BLOCK_BITS 9 |
| 79 | 120 | ||
| 121 | enum o2hb_heartbeat_modes { | ||
| 122 | O2HB_HEARTBEAT_LOCAL = 0, | ||
| 123 | O2HB_HEARTBEAT_GLOBAL, | ||
| 124 | O2HB_HEARTBEAT_NUM_MODES, | ||
| 125 | }; | ||
| 126 | |||
| 127 | char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = { | ||
| 128 | "local", /* O2HB_HEARTBEAT_LOCAL */ | ||
| 129 | "global", /* O2HB_HEARTBEAT_GLOBAL */ | ||
| 130 | }; | ||
| 131 | |||
| 80 | unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; | 132 | unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; |
| 133 | unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; | ||
| 81 | 134 | ||
| 82 | /* Only sets a new threshold if there are no active regions. | 135 | /* Only sets a new threshold if there are no active regions. |
| 83 | * | 136 | * |
| @@ -94,6 +147,22 @@ static void o2hb_dead_threshold_set(unsigned int threshold) | |||
| 94 | } | 147 | } |
| 95 | } | 148 | } |
| 96 | 149 | ||
| 150 | static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode) | ||
| 151 | { | ||
| 152 | int ret = -1; | ||
| 153 | |||
| 154 | if (hb_mode < O2HB_HEARTBEAT_NUM_MODES) { | ||
| 155 | spin_lock(&o2hb_live_lock); | ||
| 156 | if (list_empty(&o2hb_all_regions)) { | ||
| 157 | o2hb_heartbeat_mode = hb_mode; | ||
| 158 | ret = 0; | ||
| 159 | } | ||
| 160 | spin_unlock(&o2hb_live_lock); | ||
| 161 | } | ||
| 162 | |||
| 163 | return ret; | ||
| 164 | } | ||
| 165 | |||
| 97 | struct o2hb_node_event { | 166 | struct o2hb_node_event { |
| 98 | struct list_head hn_item; | 167 | struct list_head hn_item; |
| 99 | enum o2hb_callback_type hn_event_type; | 168 | enum o2hb_callback_type hn_event_type; |
| @@ -135,6 +204,18 @@ struct o2hb_region { | |||
| 135 | struct block_device *hr_bdev; | 204 | struct block_device *hr_bdev; |
| 136 | struct o2hb_disk_slot *hr_slots; | 205 | struct o2hb_disk_slot *hr_slots; |
| 137 | 206 | ||
| 207 | /* live node map of this region */ | ||
| 208 | unsigned long hr_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 209 | unsigned int hr_region_num; | ||
| 210 | |||
| 211 | struct dentry *hr_debug_dir; | ||
| 212 | struct dentry *hr_debug_livenodes; | ||
| 213 | struct dentry *hr_debug_regnum; | ||
| 214 | struct dentry *hr_debug_elapsed_time; | ||
| 215 | struct o2hb_debug_buf *hr_db_livenodes; | ||
| 216 | struct o2hb_debug_buf *hr_db_regnum; | ||
| 217 | struct o2hb_debug_buf *hr_db_elapsed_time; | ||
| 218 | |||
| 138 | /* let the person setting up hb wait for it to return until it | 219 | /* let the person setting up hb wait for it to return until it |
| 139 | * has reached a 'steady' state. This will be fixed when we have | 220 | * has reached a 'steady' state. This will be fixed when we have |
| 140 | * a more complete api that doesn't lead to this sort of fragility. */ | 221 | * a more complete api that doesn't lead to this sort of fragility. */ |
| @@ -163,8 +244,19 @@ struct o2hb_bio_wait_ctxt { | |||
| 163 | int wc_error; | 244 | int wc_error; |
| 164 | }; | 245 | }; |
| 165 | 246 | ||
| 247 | static int o2hb_pop_count(void *map, int count) | ||
| 248 | { | ||
| 249 | int i = -1, pop = 0; | ||
| 250 | |||
| 251 | while ((i = find_next_bit(map, count, i + 1)) < count) | ||
| 252 | pop++; | ||
| 253 | return pop; | ||
| 254 | } | ||
| 255 | |||
| 166 | static void o2hb_write_timeout(struct work_struct *work) | 256 | static void o2hb_write_timeout(struct work_struct *work) |
| 167 | { | 257 | { |
| 258 | int failed, quorum; | ||
| 259 | unsigned long flags; | ||
| 168 | struct o2hb_region *reg = | 260 | struct o2hb_region *reg = |
| 169 | container_of(work, struct o2hb_region, | 261 | container_of(work, struct o2hb_region, |
| 170 | hr_write_timeout_work.work); | 262 | hr_write_timeout_work.work); |
| @@ -172,6 +264,28 @@ static void o2hb_write_timeout(struct work_struct *work) | |||
| 172 | mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " | 264 | mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " |
| 173 | "milliseconds\n", reg->hr_dev_name, | 265 | "milliseconds\n", reg->hr_dev_name, |
| 174 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); | 266 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); |
| 267 | |||
| 268 | if (o2hb_global_heartbeat_active()) { | ||
| 269 | spin_lock_irqsave(&o2hb_live_lock, flags); | ||
| 270 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
| 271 | set_bit(reg->hr_region_num, o2hb_failed_region_bitmap); | ||
| 272 | failed = o2hb_pop_count(&o2hb_failed_region_bitmap, | ||
| 273 | O2NM_MAX_REGIONS); | ||
| 274 | quorum = o2hb_pop_count(&o2hb_quorum_region_bitmap, | ||
| 275 | O2NM_MAX_REGIONS); | ||
| 276 | spin_unlock_irqrestore(&o2hb_live_lock, flags); | ||
| 277 | |||
| 278 | mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n", | ||
| 279 | quorum, failed); | ||
| 280 | |||
| 281 | /* | ||
| 282 | * Fence if the number of failed regions >= half the number | ||
| 283 | * of quorum regions | ||
| 284 | */ | ||
| 285 | if ((failed << 1) < quorum) | ||
| 286 | return; | ||
| 287 | } | ||
| 288 | |||
| 175 | o2quo_disk_timeout(); | 289 | o2quo_disk_timeout(); |
| 176 | } | 290 | } |
| 177 | 291 | ||
| @@ -180,6 +294,11 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg) | |||
| 180 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", | 294 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", |
| 181 | O2HB_MAX_WRITE_TIMEOUT_MS); | 295 | O2HB_MAX_WRITE_TIMEOUT_MS); |
| 182 | 296 | ||
| 297 | if (o2hb_global_heartbeat_active()) { | ||
| 298 | spin_lock(&o2hb_live_lock); | ||
| 299 | clear_bit(reg->hr_region_num, o2hb_failed_region_bitmap); | ||
| 300 | spin_unlock(&o2hb_live_lock); | ||
| 301 | } | ||
| 183 | cancel_delayed_work(®->hr_write_timeout_work); | 302 | cancel_delayed_work(®->hr_write_timeout_work); |
| 184 | reg->hr_last_timeout_start = jiffies; | 303 | reg->hr_last_timeout_start = jiffies; |
| 185 | schedule_delayed_work(®->hr_write_timeout_work, | 304 | schedule_delayed_work(®->hr_write_timeout_work, |
| @@ -513,6 +632,8 @@ static void o2hb_queue_node_event(struct o2hb_node_event *event, | |||
| 513 | { | 632 | { |
| 514 | assert_spin_locked(&o2hb_live_lock); | 633 | assert_spin_locked(&o2hb_live_lock); |
| 515 | 634 | ||
| 635 | BUG_ON((!node) && (type != O2HB_NODE_DOWN_CB)); | ||
| 636 | |||
| 516 | event->hn_event_type = type; | 637 | event->hn_event_type = type; |
| 517 | event->hn_node = node; | 638 | event->hn_node = node; |
| 518 | event->hn_node_num = node_num; | 639 | event->hn_node_num = node_num; |
| @@ -554,6 +675,35 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) | |||
| 554 | o2nm_node_put(node); | 675 | o2nm_node_put(node); |
| 555 | } | 676 | } |
| 556 | 677 | ||
| 678 | static void o2hb_set_quorum_device(struct o2hb_region *reg, | ||
| 679 | struct o2hb_disk_slot *slot) | ||
| 680 | { | ||
| 681 | assert_spin_locked(&o2hb_live_lock); | ||
| 682 | |||
| 683 | if (!o2hb_global_heartbeat_active()) | ||
| 684 | return; | ||
| 685 | |||
| 686 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
| 687 | return; | ||
| 688 | |||
| 689 | /* | ||
| 690 | * A region can be added to the quorum only when it sees all | ||
| 691 | * live nodes heartbeat on it. In other words, the region has been | ||
| 692 | * added to all nodes. | ||
| 693 | */ | ||
| 694 | if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, | ||
| 695 | sizeof(o2hb_live_node_bitmap))) | ||
| 696 | return; | ||
| 697 | |||
| 698 | if (slot->ds_changed_samples < O2HB_LIVE_THRESHOLD) | ||
| 699 | return; | ||
| 700 | |||
| 701 | printk(KERN_NOTICE "o2hb: Region %s is now a quorum device\n", | ||
| 702 | config_item_name(®->hr_item)); | ||
| 703 | |||
| 704 | set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); | ||
| 705 | } | ||
| 706 | |||
| 557 | static int o2hb_check_slot(struct o2hb_region *reg, | 707 | static int o2hb_check_slot(struct o2hb_region *reg, |
| 558 | struct o2hb_disk_slot *slot) | 708 | struct o2hb_disk_slot *slot) |
| 559 | { | 709 | { |
| @@ -565,14 +715,22 @@ static int o2hb_check_slot(struct o2hb_region *reg, | |||
| 565 | u64 cputime; | 715 | u64 cputime; |
| 566 | unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; | 716 | unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; |
| 567 | unsigned int slot_dead_ms; | 717 | unsigned int slot_dead_ms; |
| 718 | int tmp; | ||
| 568 | 719 | ||
| 569 | memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); | 720 | memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); |
| 570 | 721 | ||
| 571 | /* Is this correct? Do we assume that the node doesn't exist | 722 | /* |
| 572 | * if we're not configured for him? */ | 723 | * If a node is no longer configured but is still in the livemap, we |
| 724 | * may need to clear that bit from the livemap. | ||
| 725 | */ | ||
| 573 | node = o2nm_get_node_by_num(slot->ds_node_num); | 726 | node = o2nm_get_node_by_num(slot->ds_node_num); |
| 574 | if (!node) | 727 | if (!node) { |
| 575 | return 0; | 728 | spin_lock(&o2hb_live_lock); |
| 729 | tmp = test_bit(slot->ds_node_num, o2hb_live_node_bitmap); | ||
| 730 | spin_unlock(&o2hb_live_lock); | ||
| 731 | if (!tmp) | ||
| 732 | return 0; | ||
| 733 | } | ||
| 576 | 734 | ||
| 577 | if (!o2hb_verify_crc(reg, hb_block)) { | 735 | if (!o2hb_verify_crc(reg, hb_block)) { |
| 578 | /* all paths from here will drop o2hb_live_lock for | 736 | /* all paths from here will drop o2hb_live_lock for |
| @@ -639,8 +797,12 @@ fire_callbacks: | |||
| 639 | mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n", | 797 | mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n", |
| 640 | slot->ds_node_num, (long long)slot->ds_last_generation); | 798 | slot->ds_node_num, (long long)slot->ds_last_generation); |
| 641 | 799 | ||
| 800 | set_bit(slot->ds_node_num, reg->hr_live_node_bitmap); | ||
| 801 | |||
| 642 | /* first on the list generates a callback */ | 802 | /* first on the list generates a callback */ |
| 643 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { | 803 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { |
| 804 | mlog(ML_HEARTBEAT, "o2hb: Add node %d to live nodes " | ||
| 805 | "bitmap\n", slot->ds_node_num); | ||
| 644 | set_bit(slot->ds_node_num, o2hb_live_node_bitmap); | 806 | set_bit(slot->ds_node_num, o2hb_live_node_bitmap); |
| 645 | 807 | ||
| 646 | o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node, | 808 | o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node, |
| @@ -684,13 +846,18 @@ fire_callbacks: | |||
| 684 | mlog(ML_HEARTBEAT, "Node %d left my region\n", | 846 | mlog(ML_HEARTBEAT, "Node %d left my region\n", |
| 685 | slot->ds_node_num); | 847 | slot->ds_node_num); |
| 686 | 848 | ||
| 849 | clear_bit(slot->ds_node_num, reg->hr_live_node_bitmap); | ||
| 850 | |||
| 687 | /* last off the live_slot generates a callback */ | 851 | /* last off the live_slot generates a callback */ |
| 688 | list_del_init(&slot->ds_live_item); | 852 | list_del_init(&slot->ds_live_item); |
| 689 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { | 853 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { |
| 854 | mlog(ML_HEARTBEAT, "o2hb: Remove node %d from live " | ||
| 855 | "nodes bitmap\n", slot->ds_node_num); | ||
| 690 | clear_bit(slot->ds_node_num, o2hb_live_node_bitmap); | 856 | clear_bit(slot->ds_node_num, o2hb_live_node_bitmap); |
| 691 | 857 | ||
| 692 | o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node, | 858 | /* node can be null */ |
| 693 | slot->ds_node_num); | 859 | o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, |
| 860 | node, slot->ds_node_num); | ||
| 694 | 861 | ||
| 695 | changed = 1; | 862 | changed = 1; |
| 696 | } | 863 | } |
| @@ -706,11 +873,14 @@ fire_callbacks: | |||
| 706 | slot->ds_equal_samples = 0; | 873 | slot->ds_equal_samples = 0; |
| 707 | } | 874 | } |
| 708 | out: | 875 | out: |
| 876 | o2hb_set_quorum_device(reg, slot); | ||
| 877 | |||
| 709 | spin_unlock(&o2hb_live_lock); | 878 | spin_unlock(&o2hb_live_lock); |
| 710 | 879 | ||
| 711 | o2hb_run_event_list(&event); | 880 | o2hb_run_event_list(&event); |
| 712 | 881 | ||
| 713 | o2nm_node_put(node); | 882 | if (node) |
| 883 | o2nm_node_put(node); | ||
| 714 | return changed; | 884 | return changed; |
| 715 | } | 885 | } |
| 716 | 886 | ||
| @@ -737,6 +907,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
| 737 | { | 907 | { |
| 738 | int i, ret, highest_node, change = 0; | 908 | int i, ret, highest_node, change = 0; |
| 739 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 909 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 910 | unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
| 740 | struct o2hb_bio_wait_ctxt write_wc; | 911 | struct o2hb_bio_wait_ctxt write_wc; |
| 741 | 912 | ||
| 742 | ret = o2nm_configured_node_map(configured_nodes, | 913 | ret = o2nm_configured_node_map(configured_nodes, |
| @@ -746,6 +917,17 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
| 746 | return ret; | 917 | return ret; |
| 747 | } | 918 | } |
| 748 | 919 | ||
| 920 | /* | ||
| 921 | * If a node is not configured but is in the livemap, we still need | ||
| 922 | * to read the slot so as to be able to remove it from the livemap. | ||
| 923 | */ | ||
| 924 | o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap)); | ||
| 925 | i = -1; | ||
| 926 | while ((i = find_next_bit(live_node_bitmap, | ||
| 927 | O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { | ||
| 928 | set_bit(i, configured_nodes); | ||
| 929 | } | ||
| 930 | |||
| 749 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); | 931 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); |
| 750 | if (highest_node >= O2NM_MAX_NODES) { | 932 | if (highest_node >= O2NM_MAX_NODES) { |
| 751 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); | 933 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); |
| @@ -917,21 +1099,59 @@ static int o2hb_thread(void *data) | |||
| 917 | #ifdef CONFIG_DEBUG_FS | 1099 | #ifdef CONFIG_DEBUG_FS |
| 918 | static int o2hb_debug_open(struct inode *inode, struct file *file) | 1100 | static int o2hb_debug_open(struct inode *inode, struct file *file) |
| 919 | { | 1101 | { |
| 1102 | struct o2hb_debug_buf *db = inode->i_private; | ||
| 1103 | struct o2hb_region *reg; | ||
| 920 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 1104 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
| 921 | char *buf = NULL; | 1105 | char *buf = NULL; |
| 922 | int i = -1; | 1106 | int i = -1; |
| 923 | int out = 0; | 1107 | int out = 0; |
| 924 | 1108 | ||
| 1109 | /* max_nodes should be the largest bitmap we pass here */ | ||
| 1110 | BUG_ON(sizeof(map) < db->db_size); | ||
| 1111 | |||
| 925 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | 1112 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
| 926 | if (!buf) | 1113 | if (!buf) |
| 927 | goto bail; | 1114 | goto bail; |
| 928 | 1115 | ||
| 929 | o2hb_fill_node_map(map, sizeof(map)); | 1116 | switch (db->db_type) { |
| 1117 | case O2HB_DB_TYPE_LIVENODES: | ||
| 1118 | case O2HB_DB_TYPE_LIVEREGIONS: | ||
| 1119 | case O2HB_DB_TYPE_QUORUMREGIONS: | ||
| 1120 | case O2HB_DB_TYPE_FAILEDREGIONS: | ||
| 1121 | spin_lock(&o2hb_live_lock); | ||
| 1122 | memcpy(map, db->db_data, db->db_size); | ||
| 1123 | spin_unlock(&o2hb_live_lock); | ||
| 1124 | break; | ||
| 1125 | |||
| 1126 | case O2HB_DB_TYPE_REGION_LIVENODES: | ||
| 1127 | spin_lock(&o2hb_live_lock); | ||
| 1128 | reg = (struct o2hb_region *)db->db_data; | ||
| 1129 | memcpy(map, reg->hr_live_node_bitmap, db->db_size); | ||
| 1130 | spin_unlock(&o2hb_live_lock); | ||
| 1131 | break; | ||
| 1132 | |||
| 1133 | case O2HB_DB_TYPE_REGION_NUMBER: | ||
| 1134 | reg = (struct o2hb_region *)db->db_data; | ||
| 1135 | out += snprintf(buf + out, PAGE_SIZE - out, "%d\n", | ||
| 1136 | reg->hr_region_num); | ||
| 1137 | goto done; | ||
| 1138 | |||
| 1139 | case O2HB_DB_TYPE_REGION_ELAPSED_TIME: | ||
| 1140 | reg = (struct o2hb_region *)db->db_data; | ||
| 1141 | out += snprintf(buf + out, PAGE_SIZE - out, "%u\n", | ||
| 1142 | jiffies_to_msecs(jiffies - | ||
| 1143 | reg->hr_last_timeout_start)); | ||
| 1144 | goto done; | ||
| 1145 | |||
| 1146 | default: | ||
| 1147 | goto done; | ||
| 1148 | } | ||
| 930 | 1149 | ||
| 931 | while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) | 1150 | while ((i = find_next_bit(map, db->db_len, i + 1)) < db->db_len) |
| 932 | out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); | 1151 | out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); |
| 933 | out += snprintf(buf + out, PAGE_SIZE - out, "\n"); | 1152 | out += snprintf(buf + out, PAGE_SIZE - out, "\n"); |
| 934 | 1153 | ||
| 1154 | done: | ||
| 935 | i_size_write(inode, out); | 1155 | i_size_write(inode, out); |
| 936 | 1156 | ||
| 937 | file->private_data = buf; | 1157 | file->private_data = buf; |
| @@ -978,10 +1198,104 @@ static const struct file_operations o2hb_debug_fops = { | |||
| 978 | 1198 | ||
| 979 | void o2hb_exit(void) | 1199 | void o2hb_exit(void) |
| 980 | { | 1200 | { |
| 981 | if (o2hb_debug_livenodes) | 1201 | kfree(o2hb_db_livenodes); |
| 982 | debugfs_remove(o2hb_debug_livenodes); | 1202 | kfree(o2hb_db_liveregions); |
| 983 | if (o2hb_debug_dir) | 1203 | kfree(o2hb_db_quorumregions); |
| 984 | debugfs_remove(o2hb_debug_dir); | 1204 | kfree(o2hb_db_failedregions); |
| 1205 | debugfs_remove(o2hb_debug_failedregions); | ||
| 1206 | debugfs_remove(o2hb_debug_quorumregions); | ||
| 1207 | debugfs_remove(o2hb_debug_liveregions); | ||
| 1208 | debugfs_remove(o2hb_debug_livenodes); | ||
| 1209 | debugfs_remove(o2hb_debug_dir); | ||
| 1210 | } | ||
| 1211 | |||
| 1212 | static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, | ||
| 1213 | struct o2hb_debug_buf **db, int db_len, | ||
| 1214 | int type, int size, int len, void *data) | ||
| 1215 | { | ||
| 1216 | *db = kmalloc(db_len, GFP_KERNEL); | ||
| 1217 | if (!*db) | ||
| 1218 | return NULL; | ||
| 1219 | |||
| 1220 | (*db)->db_type = type; | ||
| 1221 | (*db)->db_size = size; | ||
| 1222 | (*db)->db_len = len; | ||
| 1223 | (*db)->db_data = data; | ||
| 1224 | |||
| 1225 | return debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db, | ||
| 1226 | &o2hb_debug_fops); | ||
| 1227 | } | ||
| 1228 | |||
| 1229 | static int o2hb_debug_init(void) | ||
| 1230 | { | ||
| 1231 | int ret = -ENOMEM; | ||
| 1232 | |||
| 1233 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | ||
| 1234 | if (!o2hb_debug_dir) { | ||
| 1235 | mlog_errno(ret); | ||
| 1236 | goto bail; | ||
| 1237 | } | ||
| 1238 | |||
| 1239 | o2hb_debug_livenodes = o2hb_debug_create(O2HB_DEBUG_LIVENODES, | ||
| 1240 | o2hb_debug_dir, | ||
| 1241 | &o2hb_db_livenodes, | ||
| 1242 | sizeof(*o2hb_db_livenodes), | ||
| 1243 | O2HB_DB_TYPE_LIVENODES, | ||
| 1244 | sizeof(o2hb_live_node_bitmap), | ||
| 1245 | O2NM_MAX_NODES, | ||
| 1246 | o2hb_live_node_bitmap); | ||
| 1247 | if (!o2hb_debug_livenodes) { | ||
| 1248 | mlog_errno(ret); | ||
| 1249 | goto bail; | ||
| 1250 | } | ||
| 1251 | |||
| 1252 | o2hb_debug_liveregions = o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, | ||
| 1253 | o2hb_debug_dir, | ||
| 1254 | &o2hb_db_liveregions, | ||
| 1255 | sizeof(*o2hb_db_liveregions), | ||
| 1256 | O2HB_DB_TYPE_LIVEREGIONS, | ||
| 1257 | sizeof(o2hb_live_region_bitmap), | ||
| 1258 | O2NM_MAX_REGIONS, | ||
| 1259 | o2hb_live_region_bitmap); | ||
| 1260 | if (!o2hb_debug_liveregions) { | ||
| 1261 | mlog_errno(ret); | ||
| 1262 | goto bail; | ||
| 1263 | } | ||
| 1264 | |||
| 1265 | o2hb_debug_quorumregions = | ||
| 1266 | o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, | ||
| 1267 | o2hb_debug_dir, | ||
| 1268 | &o2hb_db_quorumregions, | ||
| 1269 | sizeof(*o2hb_db_quorumregions), | ||
| 1270 | O2HB_DB_TYPE_QUORUMREGIONS, | ||
| 1271 | sizeof(o2hb_quorum_region_bitmap), | ||
| 1272 | O2NM_MAX_REGIONS, | ||
| 1273 | o2hb_quorum_region_bitmap); | ||
| 1274 | if (!o2hb_debug_quorumregions) { | ||
| 1275 | mlog_errno(ret); | ||
| 1276 | goto bail; | ||
| 1277 | } | ||
| 1278 | |||
| 1279 | o2hb_debug_failedregions = | ||
| 1280 | o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, | ||
| 1281 | o2hb_debug_dir, | ||
| 1282 | &o2hb_db_failedregions, | ||
| 1283 | sizeof(*o2hb_db_failedregions), | ||
| 1284 | O2HB_DB_TYPE_FAILEDREGIONS, | ||
| 1285 | sizeof(o2hb_failed_region_bitmap), | ||
| 1286 | O2NM_MAX_REGIONS, | ||
| 1287 | o2hb_failed_region_bitmap); | ||
| 1288 | if (!o2hb_debug_failedregions) { | ||
| 1289 | mlog_errno(ret); | ||
| 1290 | goto bail; | ||
| 1291 | } | ||
| 1292 | |||
| 1293 | ret = 0; | ||
| 1294 | bail: | ||
| 1295 | if (ret) | ||
| 1296 | o2hb_exit(); | ||
| 1297 | |||
| 1298 | return ret; | ||
| 985 | } | 1299 | } |
| 986 | 1300 | ||
| 987 | int o2hb_init(void) | 1301 | int o2hb_init(void) |
| @@ -997,24 +1311,12 @@ int o2hb_init(void) | |||
| 997 | INIT_LIST_HEAD(&o2hb_node_events); | 1311 | INIT_LIST_HEAD(&o2hb_node_events); |
| 998 | 1312 | ||
| 999 | memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); | 1313 | memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); |
| 1314 | memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap)); | ||
| 1315 | memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap)); | ||
| 1316 | memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap)); | ||
| 1317 | memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap)); | ||
| 1000 | 1318 | ||
| 1001 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | 1319 | return o2hb_debug_init(); |
| 1002 | if (!o2hb_debug_dir) { | ||
| 1003 | mlog_errno(-ENOMEM); | ||
| 1004 | return -ENOMEM; | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES, | ||
| 1008 | S_IFREG|S_IRUSR, | ||
| 1009 | o2hb_debug_dir, NULL, | ||
| 1010 | &o2hb_debug_fops); | ||
| 1011 | if (!o2hb_debug_livenodes) { | ||
| 1012 | mlog_errno(-ENOMEM); | ||
| 1013 | debugfs_remove(o2hb_debug_dir); | ||
| 1014 | return -ENOMEM; | ||
| 1015 | } | ||
| 1016 | |||
| 1017 | return 0; | ||
| 1018 | } | 1320 | } |
| 1019 | 1321 | ||
| 1020 | /* if we're already in a callback then we're already serialized by the sem */ | 1322 | /* if we're already in a callback then we're already serialized by the sem */ |
| @@ -1078,6 +1380,13 @@ static void o2hb_region_release(struct config_item *item) | |||
| 1078 | if (reg->hr_slots) | 1380 | if (reg->hr_slots) |
| 1079 | kfree(reg->hr_slots); | 1381 | kfree(reg->hr_slots); |
| 1080 | 1382 | ||
| 1383 | kfree(reg->hr_db_regnum); | ||
| 1384 | kfree(reg->hr_db_livenodes); | ||
| 1385 | debugfs_remove(reg->hr_debug_livenodes); | ||
| 1386 | debugfs_remove(reg->hr_debug_regnum); | ||
| 1387 | debugfs_remove(reg->hr_debug_elapsed_time); | ||
| 1388 | debugfs_remove(reg->hr_debug_dir); | ||
| 1389 | |||
| 1081 | spin_lock(&o2hb_live_lock); | 1390 | spin_lock(&o2hb_live_lock); |
| 1082 | list_del(®->hr_all_item); | 1391 | list_del(®->hr_all_item); |
| 1083 | spin_unlock(&o2hb_live_lock); | 1392 | spin_unlock(&o2hb_live_lock); |
| @@ -1441,6 +1750,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
| 1441 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ | 1750 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ |
| 1442 | spin_lock(&o2hb_live_lock); | 1751 | spin_lock(&o2hb_live_lock); |
| 1443 | hb_task = reg->hr_task; | 1752 | hb_task = reg->hr_task; |
| 1753 | if (o2hb_global_heartbeat_active()) | ||
| 1754 | set_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
| 1444 | spin_unlock(&o2hb_live_lock); | 1755 | spin_unlock(&o2hb_live_lock); |
| 1445 | 1756 | ||
| 1446 | if (hb_task) | 1757 | if (hb_task) |
| @@ -1448,6 +1759,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
| 1448 | else | 1759 | else |
| 1449 | ret = -EIO; | 1760 | ret = -EIO; |
| 1450 | 1761 | ||
| 1762 | if (hb_task && o2hb_global_heartbeat_active()) | ||
| 1763 | printk(KERN_NOTICE "o2hb: Heartbeat started on region %s\n", | ||
| 1764 | config_item_name(®->hr_item)); | ||
| 1765 | |||
| 1451 | out: | 1766 | out: |
| 1452 | if (filp) | 1767 | if (filp) |
| 1453 | fput(filp); | 1768 | fput(filp); |
| @@ -1586,21 +1901,94 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group | |||
| 1586 | : NULL; | 1901 | : NULL; |
| 1587 | } | 1902 | } |
| 1588 | 1903 | ||
| 1904 | static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | ||
| 1905 | { | ||
| 1906 | int ret = -ENOMEM; | ||
| 1907 | |||
| 1908 | reg->hr_debug_dir = | ||
| 1909 | debugfs_create_dir(config_item_name(®->hr_item), dir); | ||
| 1910 | if (!reg->hr_debug_dir) { | ||
| 1911 | mlog_errno(ret); | ||
| 1912 | goto bail; | ||
| 1913 | } | ||
| 1914 | |||
| 1915 | reg->hr_debug_livenodes = | ||
| 1916 | o2hb_debug_create(O2HB_DEBUG_LIVENODES, | ||
| 1917 | reg->hr_debug_dir, | ||
| 1918 | &(reg->hr_db_livenodes), | ||
| 1919 | sizeof(*(reg->hr_db_livenodes)), | ||
| 1920 | O2HB_DB_TYPE_REGION_LIVENODES, | ||
| 1921 | sizeof(reg->hr_live_node_bitmap), | ||
| 1922 | O2NM_MAX_NODES, reg); | ||
| 1923 | if (!reg->hr_debug_livenodes) { | ||
| 1924 | mlog_errno(ret); | ||
| 1925 | goto bail; | ||
| 1926 | } | ||
| 1927 | |||
| 1928 | reg->hr_debug_regnum = | ||
| 1929 | o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER, | ||
| 1930 | reg->hr_debug_dir, | ||
| 1931 | &(reg->hr_db_regnum), | ||
| 1932 | sizeof(*(reg->hr_db_regnum)), | ||
| 1933 | O2HB_DB_TYPE_REGION_NUMBER, | ||
| 1934 | 0, O2NM_MAX_NODES, reg); | ||
| 1935 | if (!reg->hr_debug_regnum) { | ||
| 1936 | mlog_errno(ret); | ||
| 1937 | goto bail; | ||
| 1938 | } | ||
| 1939 | |||
| 1940 | reg->hr_debug_elapsed_time = | ||
| 1941 | o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME, | ||
| 1942 | reg->hr_debug_dir, | ||
| 1943 | &(reg->hr_db_elapsed_time), | ||
| 1944 | sizeof(*(reg->hr_db_elapsed_time)), | ||
| 1945 | O2HB_DB_TYPE_REGION_ELAPSED_TIME, | ||
| 1946 | 0, 0, reg); | ||
| 1947 | if (!reg->hr_debug_elapsed_time) { | ||
| 1948 | mlog_errno(ret); | ||
| 1949 | goto bail; | ||
| 1950 | } | ||
| 1951 | |||
| 1952 | ret = 0; | ||
| 1953 | bail: | ||
| 1954 | return ret; | ||
| 1955 | } | ||
| 1956 | |||
| 1589 | static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, | 1957 | static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, |
| 1590 | const char *name) | 1958 | const char *name) |
| 1591 | { | 1959 | { |
| 1592 | struct o2hb_region *reg = NULL; | 1960 | struct o2hb_region *reg = NULL; |
| 1961 | int ret; | ||
| 1593 | 1962 | ||
| 1594 | reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); | 1963 | reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); |
| 1595 | if (reg == NULL) | 1964 | if (reg == NULL) |
| 1596 | return ERR_PTR(-ENOMEM); | 1965 | return ERR_PTR(-ENOMEM); |
| 1597 | 1966 | ||
| 1598 | config_item_init_type_name(®->hr_item, name, &o2hb_region_type); | 1967 | if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) |
| 1968 | return ERR_PTR(-ENAMETOOLONG); | ||
| 1599 | 1969 | ||
| 1600 | spin_lock(&o2hb_live_lock); | 1970 | spin_lock(&o2hb_live_lock); |
| 1971 | reg->hr_region_num = 0; | ||
| 1972 | if (o2hb_global_heartbeat_active()) { | ||
| 1973 | reg->hr_region_num = find_first_zero_bit(o2hb_region_bitmap, | ||
| 1974 | O2NM_MAX_REGIONS); | ||
| 1975 | if (reg->hr_region_num >= O2NM_MAX_REGIONS) { | ||
| 1976 | spin_unlock(&o2hb_live_lock); | ||
| 1977 | return ERR_PTR(-EFBIG); | ||
| 1978 | } | ||
| 1979 | set_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
| 1980 | } | ||
| 1601 | list_add_tail(®->hr_all_item, &o2hb_all_regions); | 1981 | list_add_tail(®->hr_all_item, &o2hb_all_regions); |
| 1602 | spin_unlock(&o2hb_live_lock); | 1982 | spin_unlock(&o2hb_live_lock); |
| 1603 | 1983 | ||
| 1984 | config_item_init_type_name(®->hr_item, name, &o2hb_region_type); | ||
| 1985 | |||
| 1986 | ret = o2hb_debug_region_init(reg, o2hb_debug_dir); | ||
| 1987 | if (ret) { | ||
| 1988 | config_item_put(®->hr_item); | ||
| 1989 | return ERR_PTR(ret); | ||
| 1990 | } | ||
| 1991 | |||
| 1604 | return ®->hr_item; | 1992 | return ®->hr_item; |
| 1605 | } | 1993 | } |
| 1606 | 1994 | ||
| @@ -1612,6 +2000,10 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
| 1612 | 2000 | ||
| 1613 | /* stop the thread when the user removes the region dir */ | 2001 | /* stop the thread when the user removes the region dir */ |
| 1614 | spin_lock(&o2hb_live_lock); | 2002 | spin_lock(&o2hb_live_lock); |
| 2003 | if (o2hb_global_heartbeat_active()) { | ||
| 2004 | clear_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
| 2005 | clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
| 2006 | } | ||
| 1615 | hb_task = reg->hr_task; | 2007 | hb_task = reg->hr_task; |
| 1616 | reg->hr_task = NULL; | 2008 | reg->hr_task = NULL; |
| 1617 | spin_unlock(&o2hb_live_lock); | 2009 | spin_unlock(&o2hb_live_lock); |
| @@ -1628,6 +2020,9 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
| 1628 | wake_up(&o2hb_steady_queue); | 2020 | wake_up(&o2hb_steady_queue); |
| 1629 | } | 2021 | } |
| 1630 | 2022 | ||
| 2023 | if (o2hb_global_heartbeat_active()) | ||
| 2024 | printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", | ||
| 2025 | config_item_name(®->hr_item)); | ||
| 1631 | config_item_put(item); | 2026 | config_item_put(item); |
| 1632 | } | 2027 | } |
| 1633 | 2028 | ||
| @@ -1688,6 +2083,41 @@ static ssize_t o2hb_heartbeat_group_threshold_store(struct o2hb_heartbeat_group | |||
| 1688 | return count; | 2083 | return count; |
| 1689 | } | 2084 | } |
| 1690 | 2085 | ||
| 2086 | static | ||
| 2087 | ssize_t o2hb_heartbeat_group_mode_show(struct o2hb_heartbeat_group *group, | ||
| 2088 | char *page) | ||
| 2089 | { | ||
| 2090 | return sprintf(page, "%s\n", | ||
| 2091 | o2hb_heartbeat_mode_desc[o2hb_heartbeat_mode]); | ||
| 2092 | } | ||
| 2093 | |||
| 2094 | static | ||
| 2095 | ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, | ||
| 2096 | const char *page, size_t count) | ||
| 2097 | { | ||
| 2098 | unsigned int i; | ||
| 2099 | int ret; | ||
| 2100 | size_t len; | ||
| 2101 | |||
| 2102 | len = (page[count - 1] == '\n') ? count - 1 : count; | ||
| 2103 | if (!len) | ||
| 2104 | return -EINVAL; | ||
| 2105 | |||
| 2106 | for (i = 0; i < O2HB_HEARTBEAT_NUM_MODES; ++i) { | ||
| 2107 | if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) | ||
| 2108 | continue; | ||
| 2109 | |||
| 2110 | ret = o2hb_global_hearbeat_mode_set(i); | ||
| 2111 | if (!ret) | ||
| 2112 | printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", | ||
| 2113 | o2hb_heartbeat_mode_desc[i]); | ||
| 2114 | return count; | ||
| 2115 | } | ||
| 2116 | |||
| 2117 | return -EINVAL; | ||
| 2118 | |||
| 2119 | } | ||
| 2120 | |||
| 1691 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { | 2121 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { |
| 1692 | .attr = { .ca_owner = THIS_MODULE, | 2122 | .attr = { .ca_owner = THIS_MODULE, |
| 1693 | .ca_name = "dead_threshold", | 2123 | .ca_name = "dead_threshold", |
| @@ -1696,8 +2126,17 @@ static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold | |||
| 1696 | .store = o2hb_heartbeat_group_threshold_store, | 2126 | .store = o2hb_heartbeat_group_threshold_store, |
| 1697 | }; | 2127 | }; |
| 1698 | 2128 | ||
| 2129 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_mode = { | ||
| 2130 | .attr = { .ca_owner = THIS_MODULE, | ||
| 2131 | .ca_name = "mode", | ||
| 2132 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
| 2133 | .show = o2hb_heartbeat_group_mode_show, | ||
| 2134 | .store = o2hb_heartbeat_group_mode_store, | ||
| 2135 | }; | ||
| 2136 | |||
| 1699 | static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { | 2137 | static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { |
| 1700 | &o2hb_heartbeat_group_attr_threshold.attr, | 2138 | &o2hb_heartbeat_group_attr_threshold.attr, |
| 2139 | &o2hb_heartbeat_group_attr_mode.attr, | ||
| 1701 | NULL, | 2140 | NULL, |
| 1702 | }; | 2141 | }; |
| 1703 | 2142 | ||
| @@ -1963,3 +2402,34 @@ void o2hb_stop_all_regions(void) | |||
| 1963 | spin_unlock(&o2hb_live_lock); | 2402 | spin_unlock(&o2hb_live_lock); |
| 1964 | } | 2403 | } |
| 1965 | EXPORT_SYMBOL_GPL(o2hb_stop_all_regions); | 2404 | EXPORT_SYMBOL_GPL(o2hb_stop_all_regions); |
| 2405 | |||
| 2406 | int o2hb_get_all_regions(char *region_uuids, u8 max_regions) | ||
| 2407 | { | ||
| 2408 | struct o2hb_region *reg; | ||
| 2409 | int numregs = 0; | ||
| 2410 | char *p; | ||
| 2411 | |||
| 2412 | spin_lock(&o2hb_live_lock); | ||
| 2413 | |||
| 2414 | p = region_uuids; | ||
| 2415 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | ||
| 2416 | mlog(0, "Region: %s\n", config_item_name(®->hr_item)); | ||
| 2417 | if (numregs < max_regions) { | ||
| 2418 | memcpy(p, config_item_name(®->hr_item), | ||
| 2419 | O2HB_MAX_REGION_NAME_LEN); | ||
| 2420 | p += O2HB_MAX_REGION_NAME_LEN; | ||
| 2421 | } | ||
| 2422 | numregs++; | ||
| 2423 | } | ||
| 2424 | |||
| 2425 | spin_unlock(&o2hb_live_lock); | ||
| 2426 | |||
| 2427 | return numregs; | ||
| 2428 | } | ||
| 2429 | EXPORT_SYMBOL_GPL(o2hb_get_all_regions); | ||
| 2430 | |||
| 2431 | int o2hb_global_heartbeat_active(void) | ||
| 2432 | { | ||
| 2433 | return (o2hb_heartbeat_mode == O2HB_HEARTBEAT_GLOBAL); | ||
| 2434 | } | ||
| 2435 | EXPORT_SYMBOL(o2hb_global_heartbeat_active); | ||
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 2f1649253b49..00ad8e8fea51 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h | |||
| @@ -31,6 +31,8 @@ | |||
| 31 | 31 | ||
| 32 | #define O2HB_REGION_TIMEOUT_MS 2000 | 32 | #define O2HB_REGION_TIMEOUT_MS 2000 |
| 33 | 33 | ||
| 34 | #define O2HB_MAX_REGION_NAME_LEN 32 | ||
| 35 | |||
| 34 | /* number of changes to be seen as live */ | 36 | /* number of changes to be seen as live */ |
| 35 | #define O2HB_LIVE_THRESHOLD 2 | 37 | #define O2HB_LIVE_THRESHOLD 2 |
| 36 | /* number of equal samples to be seen as dead */ | 38 | /* number of equal samples to be seen as dead */ |
| @@ -81,5 +83,7 @@ int o2hb_check_node_heartbeating(u8 node_num); | |||
| 81 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); | 83 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); |
| 82 | int o2hb_check_local_node_heartbeating(void); | 84 | int o2hb_check_local_node_heartbeating(void); |
| 83 | void o2hb_stop_all_regions(void); | 85 | void o2hb_stop_all_regions(void); |
| 86 | int o2hb_get_all_regions(char *region_uuids, u8 numregions); | ||
| 87 | int o2hb_global_heartbeat_active(void); | ||
| 84 | 88 | ||
| 85 | #endif /* O2CLUSTER_HEARTBEAT_H */ | 89 | #endif /* O2CLUSTER_HEARTBEAT_H */ |
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index fd96e2a2fa56..ea2ed9f56c94 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
| @@ -119,7 +119,8 @@ | |||
| 119 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ | 119 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ |
| 120 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ | 120 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ |
| 121 | #define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ | 121 | #define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ |
| 122 | #define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ | 122 | #define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ |
| 123 | #define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */ | ||
| 123 | 124 | ||
| 124 | #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) | 125 | #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) |
| 125 | #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) | 126 | #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) |
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index ed0c9f367fed..bb240647ca5f 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
| @@ -711,6 +711,8 @@ static struct config_item *o2nm_node_group_make_item(struct config_group *group, | |||
| 711 | config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); | 711 | config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); |
| 712 | spin_lock_init(&node->nd_lock); | 712 | spin_lock_init(&node->nd_lock); |
| 713 | 713 | ||
| 714 | mlog(ML_CLUSTER, "o2nm: Registering node %s\n", name); | ||
| 715 | |||
| 714 | return &node->nd_item; | 716 | return &node->nd_item; |
| 715 | } | 717 | } |
| 716 | 718 | ||
| @@ -744,6 +746,9 @@ static void o2nm_node_group_drop_item(struct config_group *group, | |||
| 744 | } | 746 | } |
| 745 | write_unlock(&cluster->cl_nodes_lock); | 747 | write_unlock(&cluster->cl_nodes_lock); |
| 746 | 748 | ||
| 749 | mlog(ML_CLUSTER, "o2nm: Unregistered node %s\n", | ||
| 750 | config_item_name(&node->nd_item)); | ||
| 751 | |||
| 747 | config_item_put(item); | 752 | config_item_put(item); |
| 748 | } | 753 | } |
| 749 | 754 | ||
diff --git a/fs/ocfs2/cluster/ocfs2_nodemanager.h b/fs/ocfs2/cluster/ocfs2_nodemanager.h index 5b9854bad571..49b594325bec 100644 --- a/fs/ocfs2/cluster/ocfs2_nodemanager.h +++ b/fs/ocfs2/cluster/ocfs2_nodemanager.h | |||
| @@ -36,4 +36,10 @@ | |||
| 36 | /* host name, group name, cluster name all 64 bytes */ | 36 | /* host name, group name, cluster name all 64 bytes */ |
| 37 | #define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN | 37 | #define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN |
| 38 | 38 | ||
| 39 | /* | ||
| 40 | * Maximum number of global heartbeat regions allowed. | ||
| 41 | * **CAUTION** Changing this number will break dlm compatibility. | ||
| 42 | */ | ||
| 43 | #define O2NM_MAX_REGIONS 32 | ||
| 44 | |||
| 39 | #endif /* _OCFS2_NODEMANAGER_H */ | 45 | #endif /* _OCFS2_NODEMANAGER_H */ |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index cbe2f057cc28..9aa426e42123 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
| @@ -1696,6 +1696,9 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num, | |||
| 1696 | { | 1696 | { |
| 1697 | o2quo_hb_down(node_num); | 1697 | o2quo_hb_down(node_num); |
| 1698 | 1698 | ||
| 1699 | if (!node) | ||
| 1700 | return; | ||
| 1701 | |||
| 1699 | if (node_num != o2nm_this_node()) | 1702 | if (node_num != o2nm_this_node()) |
| 1700 | o2net_disconnect_node(node); | 1703 | o2net_disconnect_node(node); |
| 1701 | 1704 | ||
| @@ -1709,6 +1712,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
| 1709 | 1712 | ||
| 1710 | o2quo_hb_up(node_num); | 1713 | o2quo_hb_up(node_num); |
| 1711 | 1714 | ||
| 1715 | BUG_ON(!node); | ||
| 1716 | |||
| 1712 | /* ensure an immediate connect attempt */ | 1717 | /* ensure an immediate connect attempt */ |
| 1713 | nn->nn_last_connect_attempt = jiffies - | 1718 | nn->nn_last_connect_attempt = jiffies - |
| 1714 | (msecs_to_jiffies(o2net_reconnect_delay()) + 1); | 1719 | (msecs_to_jiffies(o2net_reconnect_delay()) + 1); |
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index b4957c7d9fe2..edaded48e7e9 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
| @@ -40,6 +40,14 @@ | |||
| 40 | #include "inode.h" | 40 | #include "inode.h" |
| 41 | #include "super.h" | 41 | #include "super.h" |
| 42 | 42 | ||
| 43 | void ocfs2_dentry_attach_gen(struct dentry *dentry) | ||
| 44 | { | ||
| 45 | unsigned long gen = | ||
| 46 | OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; | ||
| 47 | BUG_ON(dentry->d_inode); | ||
| 48 | dentry->d_fsdata = (void *)gen; | ||
| 49 | } | ||
| 50 | |||
| 43 | 51 | ||
| 44 | static int ocfs2_dentry_revalidate(struct dentry *dentry, | 52 | static int ocfs2_dentry_revalidate(struct dentry *dentry, |
| 45 | struct nameidata *nd) | 53 | struct nameidata *nd) |
| @@ -51,11 +59,20 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, | |||
| 51 | mlog_entry("(0x%p, '%.*s')\n", dentry, | 59 | mlog_entry("(0x%p, '%.*s')\n", dentry, |
| 52 | dentry->d_name.len, dentry->d_name.name); | 60 | dentry->d_name.len, dentry->d_name.name); |
| 53 | 61 | ||
| 54 | /* Never trust a negative dentry - force a new lookup. */ | 62 | /* For a negative dentry - |
| 63 | * check the generation number of the parent and compare with the | ||
| 64 | * one stored in the inode. | ||
| 65 | */ | ||
| 55 | if (inode == NULL) { | 66 | if (inode == NULL) { |
| 56 | mlog(0, "negative dentry: %.*s\n", dentry->d_name.len, | 67 | unsigned long gen = (unsigned long) dentry->d_fsdata; |
| 57 | dentry->d_name.name); | 68 | unsigned long pgen = |
| 58 | goto bail; | 69 | OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; |
| 70 | mlog(0, "negative dentry: %.*s parent gen: %lu " | ||
| 71 | "dentry gen: %lu\n", | ||
| 72 | dentry->d_name.len, dentry->d_name.name, pgen, gen); | ||
| 73 | if (gen != pgen) | ||
| 74 | goto bail; | ||
| 75 | goto valid; | ||
| 59 | } | 76 | } |
| 60 | 77 | ||
| 61 | BUG_ON(!osb); | 78 | BUG_ON(!osb); |
| @@ -96,6 +113,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, | |||
| 96 | goto bail; | 113 | goto bail; |
| 97 | } | 114 | } |
| 98 | 115 | ||
| 116 | valid: | ||
| 99 | ret = 1; | 117 | ret = 1; |
| 100 | 118 | ||
| 101 | bail: | 119 | bail: |
| @@ -227,6 +245,12 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, | |||
| 227 | if (!inode) | 245 | if (!inode) |
| 228 | return 0; | 246 | return 0; |
| 229 | 247 | ||
| 248 | if (!dentry->d_inode && dentry->d_fsdata) { | ||
| 249 | /* Converting a negative dentry to positive | ||
| 250 | Clear dentry->d_fsdata */ | ||
| 251 | dentry->d_fsdata = dl = NULL; | ||
| 252 | } | ||
| 253 | |||
| 230 | if (dl) { | 254 | if (dl) { |
| 231 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, | 255 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, |
| 232 | " \"%.*s\": old parent: %llu, new: %llu\n", | 256 | " \"%.*s\": old parent: %llu, new: %llu\n", |
| @@ -452,6 +476,7 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) | |||
| 452 | 476 | ||
| 453 | out: | 477 | out: |
| 454 | iput(inode); | 478 | iput(inode); |
| 479 | ocfs2_dentry_attach_gen(dentry); | ||
| 455 | } | 480 | } |
| 456 | 481 | ||
| 457 | /* | 482 | /* |
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h index f5dd1789acf1..b79eff709958 100644 --- a/fs/ocfs2/dcache.h +++ b/fs/ocfs2/dcache.h | |||
| @@ -64,5 +64,6 @@ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, | |||
| 64 | struct inode *old_dir, struct inode *new_dir); | 64 | struct inode *old_dir, struct inode *new_dir); |
| 65 | 65 | ||
| 66 | extern spinlock_t dentry_attach_lock; | 66 | extern spinlock_t dentry_attach_lock; |
| 67 | void ocfs2_dentry_attach_gen(struct dentry *dentry); | ||
| 67 | 68 | ||
| 68 | #endif /* OCFS2_DCACHE_H */ | 69 | #endif /* OCFS2_DCACHE_H */ |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 765298908f1d..b36d0bf77a5a 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
| @@ -445,7 +445,9 @@ enum { | |||
| 445 | DLM_LOCK_REQUEST_MSG, /* 515 */ | 445 | DLM_LOCK_REQUEST_MSG, /* 515 */ |
| 446 | DLM_RECO_DATA_DONE_MSG, /* 516 */ | 446 | DLM_RECO_DATA_DONE_MSG, /* 516 */ |
| 447 | DLM_BEGIN_RECO_MSG, /* 517 */ | 447 | DLM_BEGIN_RECO_MSG, /* 517 */ |
| 448 | DLM_FINALIZE_RECO_MSG /* 518 */ | 448 | DLM_FINALIZE_RECO_MSG, /* 518 */ |
| 449 | DLM_QUERY_REGION, /* 519 */ | ||
| 450 | DLM_QUERY_NODEINFO, /* 520 */ | ||
| 449 | }; | 451 | }; |
| 450 | 452 | ||
| 451 | struct dlm_reco_node_data | 453 | struct dlm_reco_node_data |
| @@ -727,6 +729,31 @@ struct dlm_cancel_join | |||
| 727 | u8 domain[O2NM_MAX_NAME_LEN]; | 729 | u8 domain[O2NM_MAX_NAME_LEN]; |
| 728 | }; | 730 | }; |
| 729 | 731 | ||
| 732 | struct dlm_query_region { | ||
| 733 | u8 qr_node; | ||
| 734 | u8 qr_numregions; | ||
| 735 | u8 qr_namelen; | ||
| 736 | u8 pad1; | ||
| 737 | u8 qr_domain[O2NM_MAX_NAME_LEN]; | ||
| 738 | u8 qr_regions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_REGIONS]; | ||
| 739 | }; | ||
| 740 | |||
| 741 | struct dlm_node_info { | ||
| 742 | u8 ni_nodenum; | ||
| 743 | u8 pad1; | ||
| 744 | u16 ni_ipv4_port; | ||
| 745 | u32 ni_ipv4_address; | ||
| 746 | }; | ||
| 747 | |||
| 748 | struct dlm_query_nodeinfo { | ||
| 749 | u8 qn_nodenum; | ||
| 750 | u8 qn_numnodes; | ||
| 751 | u8 qn_namelen; | ||
| 752 | u8 pad1; | ||
| 753 | u8 qn_domain[O2NM_MAX_NAME_LEN]; | ||
| 754 | struct dlm_node_info qn_nodes[O2NM_MAX_NODES]; | ||
| 755 | }; | ||
| 756 | |||
| 730 | struct dlm_exit_domain | 757 | struct dlm_exit_domain |
| 731 | { | 758 | { |
| 732 | u8 node_idx; | 759 | u8 node_idx; |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 901ca52bf86b..272ec8631a51 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
| @@ -493,7 +493,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
| 493 | struct hlist_head *bucket; | 493 | struct hlist_head *bucket; |
| 494 | struct hlist_node *list; | 494 | struct hlist_node *list; |
| 495 | int i, out = 0; | 495 | int i, out = 0; |
| 496 | unsigned long total = 0, longest = 0, bktcnt; | 496 | unsigned long total = 0, longest = 0, bucket_count = 0; |
| 497 | 497 | ||
| 498 | out += snprintf(db->buf + out, db->len - out, | 498 | out += snprintf(db->buf + out, db->len - out, |
| 499 | "Dumping MLEs for Domain: %s\n", dlm->name); | 499 | "Dumping MLEs for Domain: %s\n", dlm->name); |
| @@ -505,13 +505,13 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
| 505 | mle = hlist_entry(list, struct dlm_master_list_entry, | 505 | mle = hlist_entry(list, struct dlm_master_list_entry, |
| 506 | master_hash_node); | 506 | master_hash_node); |
| 507 | ++total; | 507 | ++total; |
| 508 | ++bktcnt; | 508 | ++bucket_count; |
| 509 | if (db->len - out < 200) | 509 | if (db->len - out < 200) |
| 510 | continue; | 510 | continue; |
| 511 | out += dump_mle(mle, db->buf + out, db->len - out); | 511 | out += dump_mle(mle, db->buf + out, db->len - out); |
| 512 | } | 512 | } |
| 513 | longest = max(longest, bktcnt); | 513 | longest = max(longest, bucket_count); |
| 514 | bktcnt = 0; | 514 | bucket_count = 0; |
| 515 | } | 515 | } |
| 516 | spin_unlock(&dlm->master_lock); | 516 | spin_unlock(&dlm->master_lock); |
| 517 | 517 | ||
| @@ -782,7 +782,9 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
| 782 | 782 | ||
| 783 | /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ | 783 | /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ |
| 784 | out += snprintf(db->buf + out, db->len - out, | 784 | out += snprintf(db->buf + out, db->len - out, |
| 785 | "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key); | 785 | "Domain: %s Key: 0x%08x Protocol: %d.%d\n", |
| 786 | dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major, | ||
| 787 | dlm->dlm_locking_proto.pv_minor); | ||
| 786 | 788 | ||
| 787 | /* Thread Pid: xxx Node: xxx State: xxxxx */ | 789 | /* Thread Pid: xxx Node: xxx State: xxxxx */ |
| 788 | out += snprintf(db->buf + out, db->len - out, | 790 | out += snprintf(db->buf + out, db->len - out, |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 11a5c87fd7f7..58a93b953735 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
| @@ -128,10 +128,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); | |||
| 128 | * will have a negotiated version with the same major number and a minor | 128 | * will have a negotiated version with the same major number and a minor |
| 129 | * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should | 129 | * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should |
| 130 | * be used to determine what a running domain is actually using. | 130 | * be used to determine what a running domain is actually using. |
| 131 | * | ||
| 132 | * New in version 1.1: | ||
| 133 | * - Message DLM_QUERY_REGION added to support global heartbeat | ||
| 134 | * - Message DLM_QUERY_NODEINFO added to allow online node removes | ||
| 131 | */ | 135 | */ |
| 132 | static const struct dlm_protocol_version dlm_protocol = { | 136 | static const struct dlm_protocol_version dlm_protocol = { |
| 133 | .pv_major = 1, | 137 | .pv_major = 1, |
| 134 | .pv_minor = 0, | 138 | .pv_minor = 1, |
| 135 | }; | 139 | }; |
| 136 | 140 | ||
| 137 | #define DLM_DOMAIN_BACKOFF_MS 200 | 141 | #define DLM_DOMAIN_BACKOFF_MS 200 |
| @@ -142,6 +146,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 142 | void **ret_data); | 146 | void **ret_data); |
| 143 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, | 147 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
| 144 | void **ret_data); | 148 | void **ret_data); |
| 149 | static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | ||
| 150 | void *data, void **ret_data); | ||
| 145 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, | 151 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, |
| 146 | void **ret_data); | 152 | void **ret_data); |
| 147 | static int dlm_protocol_compare(struct dlm_protocol_version *existing, | 153 | static int dlm_protocol_compare(struct dlm_protocol_version *existing, |
| @@ -921,6 +927,370 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, | |||
| 921 | return 0; | 927 | return 0; |
| 922 | } | 928 | } |
| 923 | 929 | ||
| 930 | static int dlm_match_regions(struct dlm_ctxt *dlm, | ||
| 931 | struct dlm_query_region *qr) | ||
| 932 | { | ||
| 933 | char *local = NULL, *remote = qr->qr_regions; | ||
| 934 | char *l, *r; | ||
| 935 | int localnr, i, j, foundit; | ||
| 936 | int status = 0; | ||
| 937 | |||
| 938 | if (!o2hb_global_heartbeat_active()) { | ||
| 939 | if (qr->qr_numregions) { | ||
| 940 | mlog(ML_ERROR, "Domain %s: Joining node %d has global " | ||
| 941 | "heartbeat enabled but local node %d does not\n", | ||
| 942 | qr->qr_domain, qr->qr_node, dlm->node_num); | ||
| 943 | status = -EINVAL; | ||
| 944 | } | ||
| 945 | goto bail; | ||
| 946 | } | ||
| 947 | |||
| 948 | if (o2hb_global_heartbeat_active() && !qr->qr_numregions) { | ||
| 949 | mlog(ML_ERROR, "Domain %s: Local node %d has global " | ||
| 950 | "heartbeat enabled but joining node %d does not\n", | ||
| 951 | qr->qr_domain, dlm->node_num, qr->qr_node); | ||
| 952 | status = -EINVAL; | ||
| 953 | goto bail; | ||
| 954 | } | ||
| 955 | |||
| 956 | r = remote; | ||
| 957 | for (i = 0; i < qr->qr_numregions; ++i) { | ||
| 958 | mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r); | ||
| 959 | r += O2HB_MAX_REGION_NAME_LEN; | ||
| 960 | } | ||
| 961 | |||
| 962 | local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); | ||
| 963 | if (!local) { | ||
| 964 | status = -ENOMEM; | ||
| 965 | goto bail; | ||
| 966 | } | ||
| 967 | |||
| 968 | localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS); | ||
| 969 | |||
| 970 | /* compare local regions with remote */ | ||
| 971 | l = local; | ||
| 972 | for (i = 0; i < localnr; ++i) { | ||
| 973 | foundit = 0; | ||
| 974 | r = remote; | ||
| 975 | for (j = 0; j <= qr->qr_numregions; ++j) { | ||
| 976 | if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) { | ||
| 977 | foundit = 1; | ||
| 978 | break; | ||
| 979 | } | ||
| 980 | r += O2HB_MAX_REGION_NAME_LEN; | ||
| 981 | } | ||
| 982 | if (!foundit) { | ||
| 983 | status = -EINVAL; | ||
| 984 | mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " | ||
| 985 | "in local node %d but not in joining node %d\n", | ||
| 986 | qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, l, | ||
| 987 | dlm->node_num, qr->qr_node); | ||
| 988 | goto bail; | ||
| 989 | } | ||
| 990 | l += O2HB_MAX_REGION_NAME_LEN; | ||
| 991 | } | ||
| 992 | |||
| 993 | /* compare remote with local regions */ | ||
| 994 | r = remote; | ||
| 995 | for (i = 0; i < qr->qr_numregions; ++i) { | ||
| 996 | foundit = 0; | ||
| 997 | l = local; | ||
| 998 | for (j = 0; j < localnr; ++j) { | ||
| 999 | if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) { | ||
| 1000 | foundit = 1; | ||
| 1001 | break; | ||
| 1002 | } | ||
| 1003 | l += O2HB_MAX_REGION_NAME_LEN; | ||
| 1004 | } | ||
| 1005 | if (!foundit) { | ||
| 1006 | status = -EINVAL; | ||
| 1007 | mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " | ||
| 1008 | "in joining node %d but not in local node %d\n", | ||
| 1009 | qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, r, | ||
| 1010 | qr->qr_node, dlm->node_num); | ||
| 1011 | goto bail; | ||
| 1012 | } | ||
| 1013 | r += O2HB_MAX_REGION_NAME_LEN; | ||
| 1014 | } | ||
| 1015 | |||
| 1016 | bail: | ||
| 1017 | kfree(local); | ||
| 1018 | |||
| 1019 | return status; | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map) | ||
| 1023 | { | ||
| 1024 | struct dlm_query_region *qr = NULL; | ||
| 1025 | int status, ret = 0, i; | ||
| 1026 | char *p; | ||
| 1027 | |||
| 1028 | if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) | ||
| 1029 | goto bail; | ||
| 1030 | |||
| 1031 | qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL); | ||
| 1032 | if (!qr) { | ||
| 1033 | ret = -ENOMEM; | ||
| 1034 | mlog_errno(ret); | ||
| 1035 | goto bail; | ||
| 1036 | } | ||
| 1037 | |||
| 1038 | qr->qr_node = dlm->node_num; | ||
| 1039 | qr->qr_namelen = strlen(dlm->name); | ||
| 1040 | memcpy(qr->qr_domain, dlm->name, qr->qr_namelen); | ||
| 1041 | /* if local hb, the numregions will be zero */ | ||
| 1042 | if (o2hb_global_heartbeat_active()) | ||
| 1043 | qr->qr_numregions = o2hb_get_all_regions(qr->qr_regions, | ||
| 1044 | O2NM_MAX_REGIONS); | ||
| 1045 | |||
| 1046 | p = qr->qr_regions; | ||
| 1047 | for (i = 0; i < qr->qr_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN) | ||
| 1048 | mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p); | ||
| 1049 | |||
| 1050 | i = -1; | ||
| 1051 | while ((i = find_next_bit(node_map, O2NM_MAX_NODES, | ||
| 1052 | i + 1)) < O2NM_MAX_NODES) { | ||
| 1053 | if (i == dlm->node_num) | ||
| 1054 | continue; | ||
| 1055 | |||
| 1056 | mlog(0, "Sending regions to node %d\n", i); | ||
| 1057 | |||
| 1058 | ret = o2net_send_message(DLM_QUERY_REGION, DLM_MOD_KEY, qr, | ||
| 1059 | sizeof(struct dlm_query_region), | ||
| 1060 | i, &status); | ||
| 1061 | if (ret >= 0) | ||
| 1062 | ret = status; | ||
| 1063 | if (ret) { | ||
| 1064 | mlog(ML_ERROR, "Region mismatch %d, node %d\n", | ||
| 1065 | ret, i); | ||
| 1066 | break; | ||
| 1067 | } | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | bail: | ||
| 1071 | kfree(qr); | ||
| 1072 | return ret; | ||
| 1073 | } | ||
| 1074 | |||
| 1075 | static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | ||
| 1076 | void *data, void **ret_data) | ||
| 1077 | { | ||
| 1078 | struct dlm_query_region *qr; | ||
| 1079 | struct dlm_ctxt *dlm = NULL; | ||
| 1080 | int status = 0; | ||
| 1081 | int locked = 0; | ||
| 1082 | |||
| 1083 | qr = (struct dlm_query_region *) msg->buf; | ||
| 1084 | |||
| 1085 | mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node, | ||
| 1086 | qr->qr_domain); | ||
| 1087 | |||
| 1088 | status = -EINVAL; | ||
| 1089 | |||
| 1090 | spin_lock(&dlm_domain_lock); | ||
| 1091 | dlm = __dlm_lookup_domain_full(qr->qr_domain, qr->qr_namelen); | ||
| 1092 | if (!dlm) { | ||
| 1093 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
| 1094 | "before join domain\n", qr->qr_node, qr->qr_domain); | ||
| 1095 | goto bail; | ||
| 1096 | } | ||
| 1097 | |||
| 1098 | spin_lock(&dlm->spinlock); | ||
| 1099 | locked = 1; | ||
| 1100 | if (dlm->joining_node != qr->qr_node) { | ||
| 1101 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
| 1102 | "but joining node is %d\n", qr->qr_node, qr->qr_domain, | ||
| 1103 | dlm->joining_node); | ||
| 1104 | goto bail; | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | /* Support for global heartbeat was added in 1.1 */ | ||
| 1108 | if (dlm->dlm_locking_proto.pv_major == 1 && | ||
| 1109 | dlm->dlm_locking_proto.pv_minor == 0) { | ||
| 1110 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
| 1111 | "but active dlm protocol is %d.%d\n", qr->qr_node, | ||
| 1112 | qr->qr_domain, dlm->dlm_locking_proto.pv_major, | ||
| 1113 | dlm->dlm_locking_proto.pv_minor); | ||
| 1114 | goto bail; | ||
| 1115 | } | ||
| 1116 | |||
| 1117 | status = dlm_match_regions(dlm, qr); | ||
| 1118 | |||
| 1119 | bail: | ||
| 1120 | if (locked) | ||
| 1121 | spin_unlock(&dlm->spinlock); | ||
| 1122 | spin_unlock(&dlm_domain_lock); | ||
| 1123 | |||
| 1124 | return status; | ||
| 1125 | } | ||
| 1126 | |||
| 1127 | static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn) | ||
| 1128 | { | ||
| 1129 | struct o2nm_node *local; | ||
| 1130 | struct dlm_node_info *remote; | ||
| 1131 | int i, j; | ||
| 1132 | int status = 0; | ||
| 1133 | |||
| 1134 | for (j = 0; j < qn->qn_numnodes; ++j) | ||
| 1135 | mlog(0, "Node %3d, %pI4:%u\n", qn->qn_nodes[j].ni_nodenum, | ||
| 1136 | &(qn->qn_nodes[j].ni_ipv4_address), | ||
| 1137 | ntohs(qn->qn_nodes[j].ni_ipv4_port)); | ||
| 1138 | |||
| 1139 | for (i = 0; i < O2NM_MAX_NODES && !status; ++i) { | ||
| 1140 | local = o2nm_get_node_by_num(i); | ||
| 1141 | remote = NULL; | ||
| 1142 | for (j = 0; j < qn->qn_numnodes; ++j) { | ||
| 1143 | if (qn->qn_nodes[j].ni_nodenum == i) { | ||
| 1144 | remote = &(qn->qn_nodes[j]); | ||
| 1145 | break; | ||
| 1146 | } | ||
| 1147 | } | ||
| 1148 | |||
| 1149 | if (!local && !remote) | ||
| 1150 | continue; | ||
| 1151 | |||
| 1152 | if ((local && !remote) || (!local && remote)) | ||
| 1153 | status = -EINVAL; | ||
| 1154 | |||
| 1155 | if (!status && | ||
| 1156 | ((remote->ni_nodenum != local->nd_num) || | ||
| 1157 | (remote->ni_ipv4_port != local->nd_ipv4_port) || | ||
| 1158 | (remote->ni_ipv4_address != local->nd_ipv4_address))) | ||
| 1159 | status = -EINVAL; | ||
| 1160 | |||
| 1161 | if (status) { | ||
| 1162 | if (remote && !local) | ||
| 1163 | mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " | ||
| 1164 | "registered in joining node %d but not in " | ||
| 1165 | "local node %d\n", qn->qn_domain, | ||
| 1166 | remote->ni_nodenum, | ||
| 1167 | &(remote->ni_ipv4_address), | ||
| 1168 | ntohs(remote->ni_ipv4_port), | ||
| 1169 | qn->qn_nodenum, dlm->node_num); | ||
| 1170 | if (local && !remote) | ||
| 1171 | mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " | ||
| 1172 | "registered in local node %d but not in " | ||
| 1173 | "joining node %d\n", qn->qn_domain, | ||
| 1174 | local->nd_num, &(local->nd_ipv4_address), | ||
| 1175 | ntohs(local->nd_ipv4_port), | ||
| 1176 | dlm->node_num, qn->qn_nodenum); | ||
| 1177 | BUG_ON((!local && !remote)); | ||
| 1178 | } | ||
| 1179 | |||
| 1180 | if (local) | ||
| 1181 | o2nm_node_put(local); | ||
| 1182 | } | ||
| 1183 | |||
| 1184 | return status; | ||
| 1185 | } | ||
| 1186 | |||
| 1187 | static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) | ||
| 1188 | { | ||
| 1189 | struct dlm_query_nodeinfo *qn = NULL; | ||
| 1190 | struct o2nm_node *node; | ||
| 1191 | int ret = 0, status, count, i; | ||
| 1192 | |||
| 1193 | if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) | ||
| 1194 | goto bail; | ||
| 1195 | |||
| 1196 | qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); | ||
| 1197 | if (!qn) { | ||
| 1198 | ret = -ENOMEM; | ||
| 1199 | mlog_errno(ret); | ||
| 1200 | goto bail; | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) { | ||
| 1204 | node = o2nm_get_node_by_num(i); | ||
| 1205 | if (!node) | ||
| 1206 | continue; | ||
| 1207 | qn->qn_nodes[count].ni_nodenum = node->nd_num; | ||
| 1208 | qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port; | ||
| 1209 | qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address; | ||
| 1210 | mlog(0, "Node %3d, %pI4:%u\n", node->nd_num, | ||
| 1211 | &(node->nd_ipv4_address), ntohs(node->nd_ipv4_port)); | ||
| 1212 | ++count; | ||
| 1213 | o2nm_node_put(node); | ||
| 1214 | } | ||
| 1215 | |||
| 1216 | qn->qn_nodenum = dlm->node_num; | ||
| 1217 | qn->qn_numnodes = count; | ||
| 1218 | qn->qn_namelen = strlen(dlm->name); | ||
| 1219 | memcpy(qn->qn_domain, dlm->name, qn->qn_namelen); | ||
| 1220 | |||
| 1221 | i = -1; | ||
| 1222 | while ((i = find_next_bit(node_map, O2NM_MAX_NODES, | ||
| 1223 | i + 1)) < O2NM_MAX_NODES) { | ||
| 1224 | if (i == dlm->node_num) | ||
| 1225 | continue; | ||
| 1226 | |||
| 1227 | mlog(0, "Sending nodeinfo to node %d\n", i); | ||
| 1228 | |||
| 1229 | ret = o2net_send_message(DLM_QUERY_NODEINFO, DLM_MOD_KEY, | ||
| 1230 | qn, sizeof(struct dlm_query_nodeinfo), | ||
| 1231 | i, &status); | ||
| 1232 | if (ret >= 0) | ||
| 1233 | ret = status; | ||
| 1234 | if (ret) { | ||
| 1235 | mlog(ML_ERROR, "node mismatch %d, node %d\n", ret, i); | ||
| 1236 | break; | ||
| 1237 | } | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | bail: | ||
| 1241 | kfree(qn); | ||
| 1242 | return ret; | ||
| 1243 | } | ||
| 1244 | |||
| 1245 | static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, | ||
| 1246 | void *data, void **ret_data) | ||
| 1247 | { | ||
| 1248 | struct dlm_query_nodeinfo *qn; | ||
| 1249 | struct dlm_ctxt *dlm = NULL; | ||
| 1250 | int locked = 0, status = -EINVAL; | ||
| 1251 | |||
| 1252 | qn = (struct dlm_query_nodeinfo *) msg->buf; | ||
| 1253 | |||
| 1254 | mlog(0, "Node %u queries nodes on domain %s\n", qn->qn_nodenum, | ||
| 1255 | qn->qn_domain); | ||
| 1256 | |||
| 1257 | spin_lock(&dlm_domain_lock); | ||
| 1258 | dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen); | ||
| 1259 | if (!dlm) { | ||
| 1260 | mlog(ML_ERROR, "Node %d queried nodes on domain %s before " | ||
| 1261 | "join domain\n", qn->qn_nodenum, qn->qn_domain); | ||
| 1262 | goto bail; | ||
| 1263 | } | ||
| 1264 | |||
| 1265 | spin_lock(&dlm->spinlock); | ||
| 1266 | locked = 1; | ||
| 1267 | if (dlm->joining_node != qn->qn_nodenum) { | ||
| 1268 | mlog(ML_ERROR, "Node %d queried nodes on domain %s but " | ||
| 1269 | "joining node is %d\n", qn->qn_nodenum, qn->qn_domain, | ||
| 1270 | dlm->joining_node); | ||
| 1271 | goto bail; | ||
| 1272 | } | ||
| 1273 | |||
| 1274 | /* Support for node query was added in 1.1 */ | ||
| 1275 | if (dlm->dlm_locking_proto.pv_major == 1 && | ||
| 1276 | dlm->dlm_locking_proto.pv_minor == 0) { | ||
| 1277 | mlog(ML_ERROR, "Node %d queried nodes on domain %s " | ||
| 1278 | "but active dlm protocol is %d.%d\n", qn->qn_nodenum, | ||
| 1279 | qn->qn_domain, dlm->dlm_locking_proto.pv_major, | ||
| 1280 | dlm->dlm_locking_proto.pv_minor); | ||
| 1281 | goto bail; | ||
| 1282 | } | ||
| 1283 | |||
| 1284 | status = dlm_match_nodes(dlm, qn); | ||
| 1285 | |||
| 1286 | bail: | ||
| 1287 | if (locked) | ||
| 1288 | spin_unlock(&dlm->spinlock); | ||
| 1289 | spin_unlock(&dlm_domain_lock); | ||
| 1290 | |||
| 1291 | return status; | ||
| 1292 | } | ||
| 1293 | |||
| 924 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, | 1294 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
| 925 | void **ret_data) | 1295 | void **ret_data) |
| 926 | { | 1296 | { |
| @@ -1241,6 +1611,20 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) | |||
| 1241 | set_bit(dlm->node_num, dlm->domain_map); | 1611 | set_bit(dlm->node_num, dlm->domain_map); |
| 1242 | spin_unlock(&dlm->spinlock); | 1612 | spin_unlock(&dlm->spinlock); |
| 1243 | 1613 | ||
| 1614 | /* Support for global heartbeat and node info was added in 1.1 */ | ||
| 1615 | if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) { | ||
| 1616 | status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map); | ||
| 1617 | if (status) { | ||
| 1618 | mlog_errno(status); | ||
| 1619 | goto bail; | ||
| 1620 | } | ||
| 1621 | status = dlm_send_regions(dlm, ctxt->yes_resp_map); | ||
| 1622 | if (status) { | ||
| 1623 | mlog_errno(status); | ||
| 1624 | goto bail; | ||
| 1625 | } | ||
| 1626 | } | ||
| 1627 | |||
| 1244 | dlm_send_join_asserts(dlm, ctxt->yes_resp_map); | 1628 | dlm_send_join_asserts(dlm, ctxt->yes_resp_map); |
| 1245 | 1629 | ||
| 1246 | /* Joined state *must* be set before the joining node | 1630 | /* Joined state *must* be set before the joining node |
| @@ -1807,7 +2191,21 @@ static int dlm_register_net_handlers(void) | |||
| 1807 | sizeof(struct dlm_cancel_join), | 2191 | sizeof(struct dlm_cancel_join), |
| 1808 | dlm_cancel_join_handler, | 2192 | dlm_cancel_join_handler, |
| 1809 | NULL, NULL, &dlm_join_handlers); | 2193 | NULL, NULL, &dlm_join_handlers); |
| 2194 | if (status) | ||
| 2195 | goto bail; | ||
| 2196 | |||
| 2197 | status = o2net_register_handler(DLM_QUERY_REGION, DLM_MOD_KEY, | ||
| 2198 | sizeof(struct dlm_query_region), | ||
| 2199 | dlm_query_region_handler, | ||
| 2200 | NULL, NULL, &dlm_join_handlers); | ||
| 1810 | 2201 | ||
| 2202 | if (status) | ||
| 2203 | goto bail; | ||
| 2204 | |||
| 2205 | status = o2net_register_handler(DLM_QUERY_NODEINFO, DLM_MOD_KEY, | ||
| 2206 | sizeof(struct dlm_query_nodeinfo), | ||
| 2207 | dlm_query_nodeinfo_handler, | ||
| 2208 | NULL, NULL, &dlm_join_handlers); | ||
| 1811 | bail: | 2209 | bail: |
| 1812 | if (status < 0) | 2210 | if (status < 0) |
| 1813 | dlm_unregister_net_handlers(); | 2211 | dlm_unregister_net_handlers(); |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 5e02a893f46e..e8d94d722ecb 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -3635,10 +3635,18 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
| 3635 | { | 3635 | { |
| 3636 | struct inode *inode; | 3636 | struct inode *inode; |
| 3637 | struct address_space *mapping; | 3637 | struct address_space *mapping; |
| 3638 | struct ocfs2_inode_info *oi; | ||
| 3638 | 3639 | ||
| 3639 | inode = ocfs2_lock_res_inode(lockres); | 3640 | inode = ocfs2_lock_res_inode(lockres); |
| 3640 | mapping = inode->i_mapping; | 3641 | mapping = inode->i_mapping; |
| 3641 | 3642 | ||
| 3643 | if (S_ISDIR(inode->i_mode)) { | ||
| 3644 | oi = OCFS2_I(inode); | ||
| 3645 | oi->ip_dir_lock_gen++; | ||
| 3646 | mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); | ||
| 3647 | goto out; | ||
| 3648 | } | ||
| 3649 | |||
| 3642 | if (!S_ISREG(inode->i_mode)) | 3650 | if (!S_ISREG(inode->i_mode)) |
| 3643 | goto out; | 3651 | goto out; |
| 3644 | 3652 | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9a03c151b5ce..9e8cc4346b76 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -64,12 +64,6 @@ | |||
| 64 | 64 | ||
| 65 | #include "buffer_head_io.h" | 65 | #include "buffer_head_io.h" |
| 66 | 66 | ||
| 67 | static int ocfs2_sync_inode(struct inode *inode) | ||
| 68 | { | ||
| 69 | filemap_fdatawrite(inode->i_mapping); | ||
| 70 | return sync_mapping_buffers(inode->i_mapping); | ||
| 71 | } | ||
| 72 | |||
| 73 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) | 67 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) |
| 74 | { | 68 | { |
| 75 | struct ocfs2_file_private *fp; | 69 | struct ocfs2_file_private *fp; |
| @@ -180,16 +174,12 @@ static int ocfs2_sync_file(struct file *file, int datasync) | |||
| 180 | { | 174 | { |
| 181 | int err = 0; | 175 | int err = 0; |
| 182 | journal_t *journal; | 176 | journal_t *journal; |
| 183 | struct dentry *dentry = file->f_path.dentry; | ||
| 184 | struct inode *inode = file->f_mapping->host; | 177 | struct inode *inode = file->f_mapping->host; |
| 185 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 178 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 186 | 179 | ||
| 187 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, | 180 | mlog_entry("(0x%p, %d, 0x%p, '%.*s')\n", file, datasync, |
| 188 | dentry->d_name.len, dentry->d_name.name); | 181 | file->f_path.dentry, file->f_path.dentry->d_name.len, |
| 189 | 182 | file->f_path.dentry->d_name.name); | |
| 190 | err = ocfs2_sync_inode(dentry->d_inode); | ||
| 191 | if (err) | ||
| 192 | goto bail; | ||
| 193 | 183 | ||
| 194 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { | 184 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { |
| 195 | /* | 185 | /* |
| @@ -370,7 +360,7 @@ static int ocfs2_cow_file_pos(struct inode *inode, | |||
| 370 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) | 360 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) |
| 371 | goto out; | 361 | goto out; |
| 372 | 362 | ||
| 373 | return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1); | 363 | return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); |
| 374 | 364 | ||
| 375 | out: | 365 | out: |
| 376 | return status; | 366 | return status; |
| @@ -913,8 +903,8 @@ static int ocfs2_zero_extend_get_range(struct inode *inode, | |||
| 913 | zero_clusters = last_cpos - zero_cpos; | 903 | zero_clusters = last_cpos - zero_cpos; |
| 914 | 904 | ||
| 915 | if (needs_cow) { | 905 | if (needs_cow) { |
| 916 | rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters, | 906 | rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, |
| 917 | UINT_MAX); | 907 | zero_clusters, UINT_MAX); |
| 918 | if (rc) { | 908 | if (rc) { |
| 919 | mlog_errno(rc); | 909 | mlog_errno(rc); |
| 920 | goto out; | 910 | goto out; |
| @@ -2062,6 +2052,7 @@ out: | |||
| 2062 | } | 2052 | } |
| 2063 | 2053 | ||
| 2064 | static int ocfs2_prepare_inode_for_refcount(struct inode *inode, | 2054 | static int ocfs2_prepare_inode_for_refcount(struct inode *inode, |
| 2055 | struct file *file, | ||
| 2065 | loff_t pos, size_t count, | 2056 | loff_t pos, size_t count, |
| 2066 | int *meta_level) | 2057 | int *meta_level) |
| 2067 | { | 2058 | { |
| @@ -2079,7 +2070,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, | |||
| 2079 | 2070 | ||
| 2080 | *meta_level = 1; | 2071 | *meta_level = 1; |
| 2081 | 2072 | ||
| 2082 | ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); | 2073 | ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); |
| 2083 | if (ret) | 2074 | if (ret) |
| 2084 | mlog_errno(ret); | 2075 | mlog_errno(ret); |
| 2085 | out: | 2076 | out: |
| @@ -2087,7 +2078,7 @@ out: | |||
| 2087 | return ret; | 2078 | return ret; |
| 2088 | } | 2079 | } |
| 2089 | 2080 | ||
| 2090 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | 2081 | static int ocfs2_prepare_inode_for_write(struct file *file, |
| 2091 | loff_t *ppos, | 2082 | loff_t *ppos, |
| 2092 | size_t count, | 2083 | size_t count, |
| 2093 | int appending, | 2084 | int appending, |
| @@ -2095,6 +2086,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
| 2095 | int *has_refcount) | 2086 | int *has_refcount) |
| 2096 | { | 2087 | { |
| 2097 | int ret = 0, meta_level = 0; | 2088 | int ret = 0, meta_level = 0; |
| 2089 | struct dentry *dentry = file->f_path.dentry; | ||
| 2098 | struct inode *inode = dentry->d_inode; | 2090 | struct inode *inode = dentry->d_inode; |
| 2099 | loff_t saved_pos, end; | 2091 | loff_t saved_pos, end; |
| 2100 | 2092 | ||
| @@ -2150,6 +2142,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
| 2150 | meta_level = -1; | 2142 | meta_level = -1; |
| 2151 | 2143 | ||
| 2152 | ret = ocfs2_prepare_inode_for_refcount(inode, | 2144 | ret = ocfs2_prepare_inode_for_refcount(inode, |
| 2145 | file, | ||
| 2153 | saved_pos, | 2146 | saved_pos, |
| 2154 | count, | 2147 | count, |
| 2155 | &meta_level); | 2148 | &meta_level); |
| @@ -2232,6 +2225,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
| 2232 | struct file *file = iocb->ki_filp; | 2225 | struct file *file = iocb->ki_filp; |
| 2233 | struct inode *inode = file->f_path.dentry->d_inode; | 2226 | struct inode *inode = file->f_path.dentry->d_inode; |
| 2234 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2227 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 2228 | int full_coherency = !(osb->s_mount_opt & | ||
| 2229 | OCFS2_MOUNT_COHERENCY_BUFFERED); | ||
| 2235 | 2230 | ||
| 2236 | mlog_entry("(0x%p, %u, '%.*s')\n", file, | 2231 | mlog_entry("(0x%p, %u, '%.*s')\n", file, |
| 2237 | (unsigned int)nr_segs, | 2232 | (unsigned int)nr_segs, |
| @@ -2255,16 +2250,39 @@ relock: | |||
| 2255 | have_alloc_sem = 1; | 2250 | have_alloc_sem = 1; |
| 2256 | } | 2251 | } |
| 2257 | 2252 | ||
| 2258 | /* concurrent O_DIRECT writes are allowed */ | 2253 | /* |
| 2259 | rw_level = !direct_io; | 2254 | * Concurrent O_DIRECT writes are allowed with |
| 2255 | * mount_option "coherency=buffered". | ||
| 2256 | */ | ||
| 2257 | rw_level = (!direct_io || full_coherency); | ||
| 2258 | |||
| 2260 | ret = ocfs2_rw_lock(inode, rw_level); | 2259 | ret = ocfs2_rw_lock(inode, rw_level); |
| 2261 | if (ret < 0) { | 2260 | if (ret < 0) { |
| 2262 | mlog_errno(ret); | 2261 | mlog_errno(ret); |
| 2263 | goto out_sems; | 2262 | goto out_sems; |
| 2264 | } | 2263 | } |
| 2265 | 2264 | ||
| 2265 | /* | ||
| 2266 | * O_DIRECT writes with "coherency=full" need to take EX cluster | ||
| 2267 | * inode_lock to guarantee coherency. | ||
| 2268 | */ | ||
| 2269 | if (direct_io && full_coherency) { | ||
| 2270 | /* | ||
| 2271 | * We need to take and drop the inode lock to force | ||
| 2272 | * other nodes to drop their caches. Buffered I/O | ||
| 2273 | * already does this in write_begin(). | ||
| 2274 | */ | ||
| 2275 | ret = ocfs2_inode_lock(inode, NULL, 1); | ||
| 2276 | if (ret < 0) { | ||
| 2277 | mlog_errno(ret); | ||
| 2278 | goto out_sems; | ||
| 2279 | } | ||
| 2280 | |||
| 2281 | ocfs2_inode_unlock(inode, 1); | ||
| 2282 | } | ||
| 2283 | |||
| 2266 | can_do_direct = direct_io; | 2284 | can_do_direct = direct_io; |
| 2267 | ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, | 2285 | ret = ocfs2_prepare_inode_for_write(file, ppos, |
| 2268 | iocb->ki_left, appending, | 2286 | iocb->ki_left, appending, |
| 2269 | &can_do_direct, &has_refcount); | 2287 | &can_do_direct, &has_refcount); |
| 2270 | if (ret < 0) { | 2288 | if (ret < 0) { |
| @@ -2312,17 +2330,6 @@ relock: | |||
| 2312 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, | 2330 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, |
| 2313 | ppos, count, ocount); | 2331 | ppos, count, ocount); |
| 2314 | if (written < 0) { | 2332 | if (written < 0) { |
| 2315 | /* | ||
| 2316 | * direct write may have instantiated a few | ||
| 2317 | * blocks outside i_size. Trim these off again. | ||
| 2318 | * Don't need i_size_read because we hold i_mutex. | ||
| 2319 | * | ||
| 2320 | * XXX(truncate): this looks buggy because ocfs2 did not | ||
| 2321 | * actually implement ->truncate. Take a look at | ||
| 2322 | * the new truncate sequence and update this accordingly | ||
| 2323 | */ | ||
| 2324 | if (*ppos + count > inode->i_size) | ||
| 2325 | truncate_setsize(inode, inode->i_size); | ||
| 2326 | ret = written; | 2333 | ret = written; |
| 2327 | goto out_dio; | 2334 | goto out_dio; |
| 2328 | } | 2335 | } |
| @@ -2394,7 +2401,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, | |||
| 2394 | { | 2401 | { |
| 2395 | int ret; | 2402 | int ret; |
| 2396 | 2403 | ||
| 2397 | ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, | 2404 | ret = ocfs2_prepare_inode_for_write(out, &sd->pos, |
| 2398 | sd->total_len, 0, NULL, NULL); | 2405 | sd->total_len, 0, NULL, NULL); |
| 2399 | if (ret < 0) { | 2406 | if (ret < 0) { |
| 2400 | mlog_errno(ret); | 2407 | mlog_errno(ret); |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index eece3e05d9d0..f935fd6600dd 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
| @@ -335,6 +335,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
| 335 | else | 335 | else |
| 336 | inode->i_fop = &ocfs2_dops_no_plocks; | 336 | inode->i_fop = &ocfs2_dops_no_plocks; |
| 337 | i_size_write(inode, le64_to_cpu(fe->i_size)); | 337 | i_size_write(inode, le64_to_cpu(fe->i_size)); |
| 338 | OCFS2_I(inode)->ip_dir_lock_gen = 1; | ||
| 338 | break; | 339 | break; |
| 339 | case S_IFLNK: | 340 | case S_IFLNK: |
| 340 | if (ocfs2_inode_is_fast_symlink(inode)) | 341 | if (ocfs2_inode_is_fast_symlink(inode)) |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 6de5a869db30..1c508b149b3a 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
| @@ -46,30 +46,28 @@ struct ocfs2_inode_info | |||
| 46 | /* These fields are protected by ip_lock */ | 46 | /* These fields are protected by ip_lock */ |
| 47 | spinlock_t ip_lock; | 47 | spinlock_t ip_lock; |
| 48 | u32 ip_open_count; | 48 | u32 ip_open_count; |
| 49 | u32 ip_clusters; | ||
| 50 | struct list_head ip_io_markers; | 49 | struct list_head ip_io_markers; |
| 50 | u32 ip_clusters; | ||
| 51 | 51 | ||
| 52 | u16 ip_dyn_features; | ||
| 52 | struct mutex ip_io_mutex; | 53 | struct mutex ip_io_mutex; |
| 53 | |||
| 54 | u32 ip_flags; /* see below */ | 54 | u32 ip_flags; /* see below */ |
| 55 | u32 ip_attr; /* inode attributes */ | 55 | u32 ip_attr; /* inode attributes */ |
| 56 | u16 ip_dyn_features; | ||
| 57 | 56 | ||
| 58 | /* protected by recovery_lock. */ | 57 | /* protected by recovery_lock. */ |
| 59 | struct inode *ip_next_orphan; | 58 | struct inode *ip_next_orphan; |
| 60 | 59 | ||
| 61 | u32 ip_dir_start_lookup; | ||
| 62 | |||
| 63 | struct ocfs2_caching_info ip_metadata_cache; | 60 | struct ocfs2_caching_info ip_metadata_cache; |
| 64 | |||
| 65 | struct ocfs2_extent_map ip_extent_map; | 61 | struct ocfs2_extent_map ip_extent_map; |
| 66 | |||
| 67 | struct inode vfs_inode; | 62 | struct inode vfs_inode; |
| 68 | struct jbd2_inode ip_jinode; | 63 | struct jbd2_inode ip_jinode; |
| 69 | 64 | ||
| 65 | u32 ip_dir_start_lookup; | ||
| 66 | |||
| 70 | /* Only valid if the inode is the dir. */ | 67 | /* Only valid if the inode is the dir. */ |
| 71 | u32 ip_last_used_slot; | 68 | u32 ip_last_used_slot; |
| 72 | u64 ip_last_used_group; | 69 | u64 ip_last_used_group; |
| 70 | u32 ip_dir_lock_gen; | ||
| 73 | 71 | ||
| 74 | struct ocfs2_alloc_reservation ip_la_data_resv; | 72 | struct ocfs2_alloc_reservation ip_la_data_resv; |
| 75 | }; | 73 | }; |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 7d9d9c132cef..7a4868196152 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
| @@ -26,6 +26,26 @@ | |||
| 26 | 26 | ||
| 27 | #include <linux/ext2_fs.h> | 27 | #include <linux/ext2_fs.h> |
| 28 | 28 | ||
| 29 | #define o2info_from_user(a, b) \ | ||
| 30 | copy_from_user(&(a), (b), sizeof(a)) | ||
| 31 | #define o2info_to_user(a, b) \ | ||
| 32 | copy_to_user((typeof(a) __user *)b, &(a), sizeof(a)) | ||
| 33 | |||
| 34 | /* | ||
| 35 | * This call is void because we are already reporting an error that may | ||
| 36 | * be -EFAULT. The error will be returned from the ioctl(2) call. It's | ||
| 37 | * just a best-effort to tell userspace that this request caused the error. | ||
| 38 | */ | ||
| 39 | static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq, | ||
| 40 | struct ocfs2_info_request __user *req) | ||
| 41 | { | ||
| 42 | kreq->ir_flags |= OCFS2_INFO_FL_ERROR; | ||
| 43 | (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); | ||
| 44 | } | ||
| 45 | |||
| 46 | #define o2info_set_request_error(a, b) \ | ||
| 47 | __o2info_set_request_error((struct ocfs2_info_request *)&(a), b) | ||
| 48 | |||
| 29 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) | 49 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) |
| 30 | { | 50 | { |
| 31 | int status; | 51 | int status; |
| @@ -109,6 +129,328 @@ bail: | |||
| 109 | return status; | 129 | return status; |
| 110 | } | 130 | } |
| 111 | 131 | ||
| 132 | int ocfs2_info_handle_blocksize(struct inode *inode, | ||
| 133 | struct ocfs2_info_request __user *req) | ||
| 134 | { | ||
| 135 | int status = -EFAULT; | ||
| 136 | struct ocfs2_info_blocksize oib; | ||
| 137 | |||
| 138 | if (o2info_from_user(oib, req)) | ||
| 139 | goto bail; | ||
| 140 | |||
| 141 | oib.ib_blocksize = inode->i_sb->s_blocksize; | ||
| 142 | oib.ib_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
| 143 | |||
| 144 | if (o2info_to_user(oib, req)) | ||
| 145 | goto bail; | ||
| 146 | |||
| 147 | status = 0; | ||
| 148 | bail: | ||
| 149 | if (status) | ||
| 150 | o2info_set_request_error(oib, req); | ||
| 151 | |||
| 152 | return status; | ||
| 153 | } | ||
| 154 | |||
| 155 | int ocfs2_info_handle_clustersize(struct inode *inode, | ||
| 156 | struct ocfs2_info_request __user *req) | ||
| 157 | { | ||
| 158 | int status = -EFAULT; | ||
| 159 | struct ocfs2_info_clustersize oic; | ||
| 160 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 161 | |||
| 162 | if (o2info_from_user(oic, req)) | ||
| 163 | goto bail; | ||
| 164 | |||
| 165 | oic.ic_clustersize = osb->s_clustersize; | ||
| 166 | oic.ic_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
| 167 | |||
| 168 | if (o2info_to_user(oic, req)) | ||
| 169 | goto bail; | ||
| 170 | |||
| 171 | status = 0; | ||
| 172 | bail: | ||
| 173 | if (status) | ||
| 174 | o2info_set_request_error(oic, req); | ||
| 175 | |||
| 176 | return status; | ||
| 177 | } | ||
| 178 | |||
| 179 | int ocfs2_info_handle_maxslots(struct inode *inode, | ||
| 180 | struct ocfs2_info_request __user *req) | ||
| 181 | { | ||
| 182 | int status = -EFAULT; | ||
| 183 | struct ocfs2_info_maxslots oim; | ||
| 184 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 185 | |||
| 186 | if (o2info_from_user(oim, req)) | ||
| 187 | goto bail; | ||
| 188 | |||
| 189 | oim.im_max_slots = osb->max_slots; | ||
| 190 | oim.im_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
| 191 | |||
| 192 | if (o2info_to_user(oim, req)) | ||
| 193 | goto bail; | ||
| 194 | |||
| 195 | status = 0; | ||
| 196 | bail: | ||
| 197 | if (status) | ||
| 198 | o2info_set_request_error(oim, req); | ||
| 199 | |||
| 200 | return status; | ||
| 201 | } | ||
| 202 | |||
| 203 | int ocfs2_info_handle_label(struct inode *inode, | ||
| 204 | struct ocfs2_info_request __user *req) | ||
| 205 | { | ||
| 206 | int status = -EFAULT; | ||
| 207 | struct ocfs2_info_label oil; | ||
| 208 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 209 | |||
| 210 | if (o2info_from_user(oil, req)) | ||
| 211 | goto bail; | ||
| 212 | |||
| 213 | memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); | ||
| 214 | oil.il_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
| 215 | |||
| 216 | if (o2info_to_user(oil, req)) | ||
| 217 | goto bail; | ||
| 218 | |||
| 219 | status = 0; | ||
| 220 | bail: | ||
| 221 | if (status) | ||
| 222 | o2info_set_request_error(oil, req); | ||
| 223 | |||
| 224 | return status; | ||
| 225 | } | ||
| 226 | |||
| 227 | int ocfs2_info_handle_uuid(struct inode *inode, | ||
| 228 | struct ocfs2_info_request __user *req) | ||
| 229 | { | ||
| 230 | int status = -EFAULT; | ||
| 231 | struct ocfs2_info_uuid oiu; | ||
| 232 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 233 | |||
| 234 | if (o2info_from_user(oiu, req)) | ||
| 235 | goto bail; | ||
| 236 | |||
| 237 | memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); | ||
| 238 | oiu.iu_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
| 239 | |||
| 240 | if (o2info_to_user(oiu, req)) | ||
| 241 | goto bail; | ||
| 242 | |||
| 243 | status = 0; | ||
| 244 | bail: | ||
| 245 | if (status) | ||
| 246 | o2info_set_request_error(oiu, req); | ||
| 247 | |||
| 248 | return status; | ||
| 249 | } | ||
| 250 | |||
| 251 | int ocfs2_info_handle_fs_features(struct inode *inode, | ||
| 252 | struct ocfs2_info_request __user *req) | ||
| 253 | { | ||
| 254 | int status = -EFAULT; | ||
| 255 | struct ocfs2_info_fs_features oif; | ||
| 256 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 257 | |||
| 258 | if (o2info_from_user(oif, req)) | ||
| 259 | goto bail; | ||
| 260 | |||
| 261 | oif.if_compat_features = osb->s_feature_compat; | ||
| 262 | oif.if_incompat_features = osb->s_feature_incompat; | ||
| 263 | oif.if_ro_compat_features = osb->s_feature_ro_compat; | ||
| 264 | oif.if_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
| 265 | |||
| 266 | if (o2info_to_user(oif, req)) | ||
| 267 | goto bail; | ||
| 268 | |||
| 269 | status = 0; | ||
| 270 | bail: | ||
| 271 | if (status) | ||
| 272 | o2info_set_request_error(oif, req); | ||
| 273 | |||
| 274 | return status; | ||
| 275 | } | ||
| 276 | |||
| 277 | int ocfs2_info_handle_journal_size(struct inode *inode, | ||
| 278 | struct ocfs2_info_request __user *req) | ||
| 279 | { | ||
| 280 | int status = -EFAULT; | ||
| 281 | struct ocfs2_info_journal_size oij; | ||
| 282 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 283 | |||
| 284 | if (o2info_from_user(oij, req)) | ||
| 285 | goto bail; | ||
| 286 | |||
| 287 | oij.ij_journal_size = osb->journal->j_inode->i_size; | ||
| 288 | |||
| 289 | oij.ij_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
| 290 | |||
| 291 | if (o2info_to_user(oij, req)) | ||
| 292 | goto bail; | ||
| 293 | |||
| 294 | status = 0; | ||
| 295 | bail: | ||
| 296 | if (status) | ||
| 297 | o2info_set_request_error(oij, req); | ||
| 298 | |||
| 299 | return status; | ||
| 300 | } | ||
| 301 | |||
| 302 | int ocfs2_info_handle_unknown(struct inode *inode, | ||
| 303 | struct ocfs2_info_request __user *req) | ||
| 304 | { | ||
| 305 | int status = -EFAULT; | ||
| 306 | struct ocfs2_info_request oir; | ||
| 307 | |||
| 308 | if (o2info_from_user(oir, req)) | ||
| 309 | goto bail; | ||
| 310 | |||
| 311 | oir.ir_flags &= ~OCFS2_INFO_FL_FILLED; | ||
| 312 | |||
| 313 | if (o2info_to_user(oir, req)) | ||
| 314 | goto bail; | ||
| 315 | |||
| 316 | status = 0; | ||
| 317 | bail: | ||
| 318 | if (status) | ||
| 319 | o2info_set_request_error(oir, req); | ||
| 320 | |||
| 321 | return status; | ||
| 322 | } | ||
| 323 | |||
| 324 | /* | ||
| 325 | * Validate and distinguish OCFS2_IOC_INFO requests. | ||
| 326 | * | ||
| 327 | * - validate the magic number. | ||
| 328 | * - distinguish different requests. | ||
| 329 | * - validate size of different requests. | ||
| 330 | */ | ||
| 331 | int ocfs2_info_handle_request(struct inode *inode, | ||
| 332 | struct ocfs2_info_request __user *req) | ||
| 333 | { | ||
| 334 | int status = -EFAULT; | ||
| 335 | struct ocfs2_info_request oir; | ||
| 336 | |||
| 337 | if (o2info_from_user(oir, req)) | ||
| 338 | goto bail; | ||
| 339 | |||
| 340 | status = -EINVAL; | ||
| 341 | if (oir.ir_magic != OCFS2_INFO_MAGIC) | ||
| 342 | goto bail; | ||
| 343 | |||
| 344 | switch (oir.ir_code) { | ||
| 345 | case OCFS2_INFO_BLOCKSIZE: | ||
| 346 | if (oir.ir_size == sizeof(struct ocfs2_info_blocksize)) | ||
| 347 | status = ocfs2_info_handle_blocksize(inode, req); | ||
| 348 | break; | ||
| 349 | case OCFS2_INFO_CLUSTERSIZE: | ||
| 350 | if (oir.ir_size == sizeof(struct ocfs2_info_clustersize)) | ||
| 351 | status = ocfs2_info_handle_clustersize(inode, req); | ||
| 352 | break; | ||
| 353 | case OCFS2_INFO_MAXSLOTS: | ||
| 354 | if (oir.ir_size == sizeof(struct ocfs2_info_maxslots)) | ||
| 355 | status = ocfs2_info_handle_maxslots(inode, req); | ||
| 356 | break; | ||
| 357 | case OCFS2_INFO_LABEL: | ||
| 358 | if (oir.ir_size == sizeof(struct ocfs2_info_label)) | ||
| 359 | status = ocfs2_info_handle_label(inode, req); | ||
| 360 | break; | ||
| 361 | case OCFS2_INFO_UUID: | ||
| 362 | if (oir.ir_size == sizeof(struct ocfs2_info_uuid)) | ||
| 363 | status = ocfs2_info_handle_uuid(inode, req); | ||
| 364 | break; | ||
| 365 | case OCFS2_INFO_FS_FEATURES: | ||
| 366 | if (oir.ir_size == sizeof(struct ocfs2_info_fs_features)) | ||
| 367 | status = ocfs2_info_handle_fs_features(inode, req); | ||
| 368 | break; | ||
| 369 | case OCFS2_INFO_JOURNAL_SIZE: | ||
| 370 | if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) | ||
| 371 | status = ocfs2_info_handle_journal_size(inode, req); | ||
| 372 | break; | ||
| 373 | default: | ||
| 374 | status = ocfs2_info_handle_unknown(inode, req); | ||
| 375 | break; | ||
| 376 | } | ||
| 377 | |||
| 378 | bail: | ||
| 379 | return status; | ||
| 380 | } | ||
| 381 | |||
| 382 | int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx, | ||
| 383 | u64 *req_addr, int compat_flag) | ||
| 384 | { | ||
| 385 | int status = -EFAULT; | ||
| 386 | u64 __user *bp = NULL; | ||
| 387 | |||
| 388 | if (compat_flag) { | ||
| 389 | #ifdef CONFIG_COMPAT | ||
| 390 | /* | ||
| 391 | * pointer bp stores the base address of a pointers array, | ||
| 392 | * which collects all addresses of separate request. | ||
| 393 | */ | ||
| 394 | bp = (u64 __user *)(unsigned long)compat_ptr(info->oi_requests); | ||
| 395 | #else | ||
| 396 | BUG(); | ||
| 397 | #endif | ||
| 398 | } else | ||
| 399 | bp = (u64 __user *)(unsigned long)(info->oi_requests); | ||
| 400 | |||
| 401 | if (o2info_from_user(*req_addr, bp + idx)) | ||
| 402 | goto bail; | ||
| 403 | |||
| 404 | status = 0; | ||
| 405 | bail: | ||
| 406 | return status; | ||
| 407 | } | ||
| 408 | |||
| 409 | /* | ||
| 410 | * OCFS2_IOC_INFO handles an array of requests passed from userspace. | ||
| 411 | * | ||
| 412 | * ocfs2_info_handle() recevies a large info aggregation, grab and | ||
| 413 | * validate the request count from header, then break it into small | ||
| 414 | * pieces, later specific handlers can handle them one by one. | ||
| 415 | * | ||
| 416 | * Idea here is to make each separate request small enough to ensure | ||
| 417 | * a better backward&forward compatibility, since a small piece of | ||
| 418 | * request will be less likely to be broken if disk layout get changed. | ||
| 419 | */ | ||
| 420 | int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, | ||
| 421 | int compat_flag) | ||
| 422 | { | ||
| 423 | int i, status = 0; | ||
| 424 | u64 req_addr; | ||
| 425 | struct ocfs2_info_request __user *reqp; | ||
| 426 | |||
| 427 | if ((info->oi_count > OCFS2_INFO_MAX_REQUEST) || | ||
| 428 | (!info->oi_requests)) { | ||
| 429 | status = -EINVAL; | ||
| 430 | goto bail; | ||
| 431 | } | ||
| 432 | |||
| 433 | for (i = 0; i < info->oi_count; i++) { | ||
| 434 | |||
| 435 | status = ocfs2_get_request_ptr(info, i, &req_addr, compat_flag); | ||
| 436 | if (status) | ||
| 437 | break; | ||
| 438 | |||
| 439 | reqp = (struct ocfs2_info_request *)(unsigned long)req_addr; | ||
| 440 | if (!reqp) { | ||
| 441 | status = -EINVAL; | ||
| 442 | goto bail; | ||
| 443 | } | ||
| 444 | |||
| 445 | status = ocfs2_info_handle_request(inode, reqp); | ||
| 446 | if (status) | ||
| 447 | break; | ||
| 448 | } | ||
| 449 | |||
| 450 | bail: | ||
| 451 | return status; | ||
| 452 | } | ||
| 453 | |||
| 112 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 454 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
| 113 | { | 455 | { |
| 114 | struct inode *inode = filp->f_path.dentry->d_inode; | 456 | struct inode *inode = filp->f_path.dentry->d_inode; |
| @@ -120,6 +462,7 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 120 | struct reflink_arguments args; | 462 | struct reflink_arguments args; |
| 121 | const char *old_path, *new_path; | 463 | const char *old_path, *new_path; |
| 122 | bool preserve; | 464 | bool preserve; |
| 465 | struct ocfs2_info info; | ||
| 123 | 466 | ||
| 124 | switch (cmd) { | 467 | switch (cmd) { |
| 125 | case OCFS2_IOC_GETFLAGS: | 468 | case OCFS2_IOC_GETFLAGS: |
| @@ -174,6 +517,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 174 | preserve = (args.preserve != 0); | 517 | preserve = (args.preserve != 0); |
| 175 | 518 | ||
| 176 | return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); | 519 | return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); |
| 520 | case OCFS2_IOC_INFO: | ||
| 521 | if (copy_from_user(&info, (struct ocfs2_info __user *)arg, | ||
| 522 | sizeof(struct ocfs2_info))) | ||
| 523 | return -EFAULT; | ||
| 524 | |||
| 525 | return ocfs2_info_handle(inode, &info, 0); | ||
| 177 | default: | 526 | default: |
| 178 | return -ENOTTY; | 527 | return -ENOTTY; |
| 179 | } | 528 | } |
| @@ -185,6 +534,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 185 | bool preserve; | 534 | bool preserve; |
| 186 | struct reflink_arguments args; | 535 | struct reflink_arguments args; |
| 187 | struct inode *inode = file->f_path.dentry->d_inode; | 536 | struct inode *inode = file->f_path.dentry->d_inode; |
| 537 | struct ocfs2_info info; | ||
| 188 | 538 | ||
| 189 | switch (cmd) { | 539 | switch (cmd) { |
| 190 | case OCFS2_IOC32_GETFLAGS: | 540 | case OCFS2_IOC32_GETFLAGS: |
| @@ -209,6 +559,12 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 209 | 559 | ||
| 210 | return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), | 560 | return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), |
| 211 | compat_ptr(args.new_path), preserve); | 561 | compat_ptr(args.new_path), preserve); |
| 562 | case OCFS2_IOC_INFO: | ||
| 563 | if (copy_from_user(&info, (struct ocfs2_info __user *)arg, | ||
| 564 | sizeof(struct ocfs2_info))) | ||
| 565 | return -EFAULT; | ||
| 566 | |||
| 567 | return ocfs2_info_handle(inode, &info, 1); | ||
| 212 | default: | 568 | default: |
| 213 | return -ENOIOCTLCMD; | 569 | return -ENOIOCTLCMD; |
| 214 | } | 570 | } |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 9b57c0350ff9..faa2303dbf0a 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -301,7 +301,6 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
| 301 | { | 301 | { |
| 302 | int status = 0; | 302 | int status = 0; |
| 303 | unsigned int flushed; | 303 | unsigned int flushed; |
| 304 | unsigned long old_id; | ||
| 305 | struct ocfs2_journal *journal = NULL; | 304 | struct ocfs2_journal *journal = NULL; |
| 306 | 305 | ||
| 307 | mlog_entry_void(); | 306 | mlog_entry_void(); |
| @@ -326,7 +325,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
| 326 | goto finally; | 325 | goto finally; |
| 327 | } | 326 | } |
| 328 | 327 | ||
| 329 | old_id = ocfs2_inc_trans_id(journal); | 328 | ocfs2_inc_trans_id(journal); |
| 330 | 329 | ||
| 331 | flushed = atomic_read(&journal->j_num_trans); | 330 | flushed = atomic_read(&journal->j_num_trans); |
| 332 | atomic_set(&journal->j_num_trans, 0); | 331 | atomic_set(&journal->j_num_trans, 0); |
| @@ -342,9 +341,6 @@ finally: | |||
| 342 | return status; | 341 | return status; |
| 343 | } | 342 | } |
| 344 | 343 | ||
| 345 | /* pass it NULL and it will allocate a new handle object for you. If | ||
| 346 | * you pass it a handle however, it may still return error, in which | ||
| 347 | * case it has free'd the passed handle for you. */ | ||
| 348 | handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) | 344 | handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) |
| 349 | { | 345 | { |
| 350 | journal_t *journal = osb->journal->j_journal; | 346 | journal_t *journal = osb->journal->j_journal; |
| @@ -1888,6 +1884,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) | |||
| 1888 | 1884 | ||
| 1889 | os = &osb->osb_orphan_scan; | 1885 | os = &osb->osb_orphan_scan; |
| 1890 | 1886 | ||
| 1887 | mlog(0, "Begin orphan scan\n"); | ||
| 1888 | |||
| 1891 | if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) | 1889 | if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) |
| 1892 | goto out; | 1890 | goto out; |
| 1893 | 1891 | ||
| @@ -1920,6 +1918,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) | |||
| 1920 | unlock: | 1918 | unlock: |
| 1921 | ocfs2_orphan_scan_unlock(osb, seqno); | 1919 | ocfs2_orphan_scan_unlock(osb, seqno); |
| 1922 | out: | 1920 | out: |
| 1921 | mlog(0, "Orphan scan completed\n"); | ||
| 1923 | return; | 1922 | return; |
| 1924 | } | 1923 | } |
| 1925 | 1924 | ||
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index b5baaa8e710f..43e56b97f9c0 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
| @@ -67,11 +67,12 @@ struct ocfs2_journal { | |||
| 67 | struct buffer_head *j_bh; /* Journal disk inode block */ | 67 | struct buffer_head *j_bh; /* Journal disk inode block */ |
| 68 | atomic_t j_num_trans; /* Number of transactions | 68 | atomic_t j_num_trans; /* Number of transactions |
| 69 | * currently in the system. */ | 69 | * currently in the system. */ |
| 70 | spinlock_t j_lock; | ||
| 70 | unsigned long j_trans_id; | 71 | unsigned long j_trans_id; |
| 71 | struct rw_semaphore j_trans_barrier; | 72 | struct rw_semaphore j_trans_barrier; |
| 72 | wait_queue_head_t j_checkpointed; | 73 | wait_queue_head_t j_checkpointed; |
| 73 | 74 | ||
| 74 | spinlock_t j_lock; | 75 | /* both fields protected by j_lock*/ |
| 75 | struct list_head j_la_cleanups; | 76 | struct list_head j_la_cleanups; |
| 76 | struct work_struct j_recovery_work; | 77 | struct work_struct j_recovery_work; |
| 77 | }; | 78 | }; |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 4c18f4ad93b4..7e32db9c2c99 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
| @@ -59,10 +59,11 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) | |||
| 59 | return ret; | 59 | return ret; |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, | 62 | static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, |
| 63 | struct page *page) | 63 | struct page *page) |
| 64 | { | 64 | { |
| 65 | int ret; | 65 | int ret; |
| 66 | struct inode *inode = file->f_path.dentry->d_inode; | ||
| 66 | struct address_space *mapping = inode->i_mapping; | 67 | struct address_space *mapping = inode->i_mapping; |
| 67 | loff_t pos = page_offset(page); | 68 | loff_t pos = page_offset(page); |
| 68 | unsigned int len = PAGE_CACHE_SIZE; | 69 | unsigned int len = PAGE_CACHE_SIZE; |
| @@ -111,7 +112,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, | |||
| 111 | if (page->index == last_index) | 112 | if (page->index == last_index) |
| 112 | len = ((size - 1) & ~PAGE_CACHE_MASK) + 1; | 113 | len = ((size - 1) & ~PAGE_CACHE_MASK) + 1; |
| 113 | 114 | ||
| 114 | ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, | 115 | ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page, |
| 115 | &fsdata, di_bh, page); | 116 | &fsdata, di_bh, page); |
| 116 | if (ret) { | 117 | if (ret) { |
| 117 | if (ret != -ENOSPC) | 118 | if (ret != -ENOSPC) |
| @@ -159,7 +160,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 159 | */ | 160 | */ |
| 160 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 161 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
| 161 | 162 | ||
| 162 | ret = __ocfs2_page_mkwrite(inode, di_bh, page); | 163 | ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page); |
| 163 | 164 | ||
| 164 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 165 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
| 165 | 166 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index a00dda2e4f16..e7bde21149ae 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -171,7 +171,8 @@ bail_add: | |||
| 171 | ret = ERR_PTR(status); | 171 | ret = ERR_PTR(status); |
| 172 | goto bail_unlock; | 172 | goto bail_unlock; |
| 173 | } | 173 | } |
| 174 | } | 174 | } else |
| 175 | ocfs2_dentry_attach_gen(dentry); | ||
| 175 | 176 | ||
| 176 | bail_unlock: | 177 | bail_unlock: |
| 177 | /* Don't drop the cluster lock until *after* the d_add -- | 178 | /* Don't drop the cluster lock until *after* the d_add -- |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index c67003b6b5a2..d8408217e3bd 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
| @@ -150,26 +150,33 @@ typedef void (*ocfs2_lock_callback)(int status, unsigned long data); | |||
| 150 | struct ocfs2_lock_res { | 150 | struct ocfs2_lock_res { |
| 151 | void *l_priv; | 151 | void *l_priv; |
| 152 | struct ocfs2_lock_res_ops *l_ops; | 152 | struct ocfs2_lock_res_ops *l_ops; |
| 153 | spinlock_t l_lock; | 153 | |
| 154 | 154 | ||
| 155 | struct list_head l_blocked_list; | 155 | struct list_head l_blocked_list; |
| 156 | struct list_head l_mask_waiters; | 156 | struct list_head l_mask_waiters; |
| 157 | 157 | ||
| 158 | enum ocfs2_lock_type l_type; | ||
| 159 | unsigned long l_flags; | 158 | unsigned long l_flags; |
| 160 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; | 159 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; |
| 161 | int l_level; | ||
| 162 | unsigned int l_ro_holders; | 160 | unsigned int l_ro_holders; |
| 163 | unsigned int l_ex_holders; | 161 | unsigned int l_ex_holders; |
| 164 | struct ocfs2_dlm_lksb l_lksb; | 162 | unsigned char l_level; |
| 163 | |||
| 164 | /* Data packed - type enum ocfs2_lock_type */ | ||
| 165 | unsigned char l_type; | ||
| 165 | 166 | ||
| 166 | /* used from AST/BAST funcs. */ | 167 | /* used from AST/BAST funcs. */ |
| 167 | enum ocfs2_ast_action l_action; | 168 | /* Data packed - enum type ocfs2_ast_action */ |
| 168 | enum ocfs2_unlock_action l_unlock_action; | 169 | unsigned char l_action; |
| 169 | int l_requested; | 170 | /* Data packed - enum type ocfs2_unlock_action */ |
| 170 | int l_blocking; | 171 | unsigned char l_unlock_action; |
| 172 | unsigned char l_requested; | ||
| 173 | unsigned char l_blocking; | ||
| 171 | unsigned int l_pending_gen; | 174 | unsigned int l_pending_gen; |
| 172 | 175 | ||
| 176 | spinlock_t l_lock; | ||
| 177 | |||
| 178 | struct ocfs2_dlm_lksb l_lksb; | ||
| 179 | |||
| 173 | wait_queue_head_t l_event; | 180 | wait_queue_head_t l_event; |
| 174 | 181 | ||
| 175 | struct list_head l_debug_list; | 182 | struct list_head l_debug_list; |
| @@ -243,7 +250,7 @@ enum ocfs2_local_alloc_state | |||
| 243 | 250 | ||
| 244 | enum ocfs2_mount_options | 251 | enum ocfs2_mount_options |
| 245 | { | 252 | { |
| 246 | OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Heartbeat started in local mode */ | 253 | OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */ |
| 247 | OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ | 254 | OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ |
| 248 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ | 255 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ |
| 249 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | 256 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ |
| @@ -256,6 +263,10 @@ enum ocfs2_mount_options | |||
| 256 | control lists */ | 263 | control lists */ |
| 257 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ | 264 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ |
| 258 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ | 265 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ |
| 266 | OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12, /* Allow concurrent O_DIRECT | ||
| 267 | writes */ | ||
| 268 | OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */ | ||
| 269 | OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ | ||
| 259 | }; | 270 | }; |
| 260 | 271 | ||
| 261 | #define OCFS2_OSB_SOFT_RO 0x0001 | 272 | #define OCFS2_OSB_SOFT_RO 0x0001 |
| @@ -277,7 +288,8 @@ struct ocfs2_super | |||
| 277 | struct super_block *sb; | 288 | struct super_block *sb; |
| 278 | struct inode *root_inode; | 289 | struct inode *root_inode; |
| 279 | struct inode *sys_root_inode; | 290 | struct inode *sys_root_inode; |
| 280 | struct inode *system_inodes[NUM_SYSTEM_INODES]; | 291 | struct inode *global_system_inodes[NUM_GLOBAL_SYSTEM_INODES]; |
| 292 | struct inode **local_system_inodes; | ||
| 281 | 293 | ||
| 282 | struct ocfs2_slot_info *slot_info; | 294 | struct ocfs2_slot_info *slot_info; |
| 283 | 295 | ||
| @@ -368,6 +380,8 @@ struct ocfs2_super | |||
| 368 | struct ocfs2_alloc_stats alloc_stats; | 380 | struct ocfs2_alloc_stats alloc_stats; |
| 369 | char dev_str[20]; /* "major,minor" of the device */ | 381 | char dev_str[20]; /* "major,minor" of the device */ |
| 370 | 382 | ||
| 383 | u8 osb_stackflags; | ||
| 384 | |||
| 371 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; | 385 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; |
| 372 | struct ocfs2_cluster_connection *cconn; | 386 | struct ocfs2_cluster_connection *cconn; |
| 373 | struct ocfs2_lock_res osb_super_lockres; | 387 | struct ocfs2_lock_res osb_super_lockres; |
| @@ -601,10 +615,35 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) | |||
| 601 | return ret; | 615 | return ret; |
| 602 | } | 616 | } |
| 603 | 617 | ||
| 604 | static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) | 618 | static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb) |
| 605 | { | 619 | { |
| 606 | return (osb->s_feature_incompat & | 620 | return (osb->s_feature_incompat & |
| 607 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK); | 621 | (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK | |
| 622 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)); | ||
| 623 | } | ||
| 624 | |||
| 625 | static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) | ||
| 626 | { | ||
| 627 | if (ocfs2_clusterinfo_valid(osb) && | ||
| 628 | memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, | ||
| 629 | OCFS2_STACK_LABEL_LEN)) | ||
| 630 | return 1; | ||
| 631 | return 0; | ||
| 632 | } | ||
| 633 | |||
| 634 | static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb) | ||
| 635 | { | ||
| 636 | if (ocfs2_clusterinfo_valid(osb) && | ||
| 637 | !memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, | ||
| 638 | OCFS2_STACK_LABEL_LEN)) | ||
| 639 | return 1; | ||
| 640 | return 0; | ||
| 641 | } | ||
| 642 | |||
| 643 | static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) | ||
| 644 | { | ||
| 645 | return ocfs2_o2cb_stack(osb) && | ||
| 646 | (osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT); | ||
| 608 | } | 647 | } |
| 609 | 648 | ||
| 610 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) | 649 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index fa31d05e41b7..c2e4f8222e2f 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
| @@ -101,7 +101,8 @@ | |||
| 101 | | OCFS2_FEATURE_INCOMPAT_META_ECC \ | 101 | | OCFS2_FEATURE_INCOMPAT_META_ECC \ |
| 102 | | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ | 102 | | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ |
| 103 | | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ | 103 | | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ |
| 104 | | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG) | 104 | | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \ |
| 105 | | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) | ||
| 105 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ | 106 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ |
| 106 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ | 107 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ |
| 107 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) | 108 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) |
| @@ -170,6 +171,13 @@ | |||
| 170 | #define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 | 171 | #define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 |
| 171 | 172 | ||
| 172 | /* | 173 | /* |
| 174 | * Incompat bit to indicate useable clusterinfo with stackflags for all | ||
| 175 | * cluster stacks (userspace adnd o2cb). If this bit is set, | ||
| 176 | * INCOMPAT_USERSPACE_STACK becomes superfluous and thus should not be set. | ||
| 177 | */ | ||
| 178 | #define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000 | ||
| 179 | |||
| 180 | /* | ||
| 173 | * backup superblock flag is used to indicate that this volume | 181 | * backup superblock flag is used to indicate that this volume |
| 174 | * has backup superblocks. | 182 | * has backup superblocks. |
| 175 | */ | 183 | */ |
| @@ -292,10 +300,13 @@ | |||
| 292 | #define OCFS2_VOL_UUID_LEN 16 | 300 | #define OCFS2_VOL_UUID_LEN 16 |
| 293 | #define OCFS2_MAX_VOL_LABEL_LEN 64 | 301 | #define OCFS2_MAX_VOL_LABEL_LEN 64 |
| 294 | 302 | ||
| 295 | /* The alternate, userspace stack fields */ | 303 | /* The cluster stack fields */ |
| 296 | #define OCFS2_STACK_LABEL_LEN 4 | 304 | #define OCFS2_STACK_LABEL_LEN 4 |
| 297 | #define OCFS2_CLUSTER_NAME_LEN 16 | 305 | #define OCFS2_CLUSTER_NAME_LEN 16 |
| 298 | 306 | ||
| 307 | /* Classic (historically speaking) cluster stack */ | ||
| 308 | #define OCFS2_CLASSIC_CLUSTER_STACK "o2cb" | ||
| 309 | |||
| 299 | /* Journal limits (in bytes) */ | 310 | /* Journal limits (in bytes) */ |
| 300 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) | 311 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) |
| 301 | 312 | ||
| @@ -305,6 +316,11 @@ | |||
| 305 | */ | 316 | */ |
| 306 | #define OCFS2_MIN_XATTR_INLINE_SIZE 256 | 317 | #define OCFS2_MIN_XATTR_INLINE_SIZE 256 |
| 307 | 318 | ||
| 319 | /* | ||
| 320 | * Cluster info flags (ocfs2_cluster_info.ci_stackflags) | ||
| 321 | */ | ||
| 322 | #define OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT (0x01) | ||
| 323 | |||
| 308 | struct ocfs2_system_inode_info { | 324 | struct ocfs2_system_inode_info { |
| 309 | char *si_name; | 325 | char *si_name; |
| 310 | int si_iflags; | 326 | int si_iflags; |
| @@ -322,6 +338,7 @@ enum { | |||
| 322 | USER_QUOTA_SYSTEM_INODE, | 338 | USER_QUOTA_SYSTEM_INODE, |
| 323 | GROUP_QUOTA_SYSTEM_INODE, | 339 | GROUP_QUOTA_SYSTEM_INODE, |
| 324 | #define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE | 340 | #define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE |
| 341 | #define OCFS2_FIRST_LOCAL_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE | ||
| 325 | ORPHAN_DIR_SYSTEM_INODE, | 342 | ORPHAN_DIR_SYSTEM_INODE, |
| 326 | EXTENT_ALLOC_SYSTEM_INODE, | 343 | EXTENT_ALLOC_SYSTEM_INODE, |
| 327 | INODE_ALLOC_SYSTEM_INODE, | 344 | INODE_ALLOC_SYSTEM_INODE, |
| @@ -330,8 +347,12 @@ enum { | |||
| 330 | TRUNCATE_LOG_SYSTEM_INODE, | 347 | TRUNCATE_LOG_SYSTEM_INODE, |
| 331 | LOCAL_USER_QUOTA_SYSTEM_INODE, | 348 | LOCAL_USER_QUOTA_SYSTEM_INODE, |
| 332 | LOCAL_GROUP_QUOTA_SYSTEM_INODE, | 349 | LOCAL_GROUP_QUOTA_SYSTEM_INODE, |
| 350 | #define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE | ||
| 333 | NUM_SYSTEM_INODES | 351 | NUM_SYSTEM_INODES |
| 334 | }; | 352 | }; |
| 353 | #define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE | ||
| 354 | #define NUM_LOCAL_SYSTEM_INODES \ | ||
| 355 | (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE) | ||
| 335 | 356 | ||
| 336 | static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { | 357 | static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { |
| 337 | /* Global system inodes (single copy) */ | 358 | /* Global system inodes (single copy) */ |
| @@ -360,6 +381,7 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { | |||
| 360 | /* Parameter passed from mount.ocfs2 to module */ | 381 | /* Parameter passed from mount.ocfs2 to module */ |
| 361 | #define OCFS2_HB_NONE "heartbeat=none" | 382 | #define OCFS2_HB_NONE "heartbeat=none" |
| 362 | #define OCFS2_HB_LOCAL "heartbeat=local" | 383 | #define OCFS2_HB_LOCAL "heartbeat=local" |
| 384 | #define OCFS2_HB_GLOBAL "heartbeat=global" | ||
| 363 | 385 | ||
| 364 | /* | 386 | /* |
| 365 | * OCFS2 directory file types. Only the low 3 bits are used. The | 387 | * OCFS2 directory file types. Only the low 3 bits are used. The |
| @@ -566,9 +588,21 @@ struct ocfs2_slot_map_extended { | |||
| 566 | */ | 588 | */ |
| 567 | }; | 589 | }; |
| 568 | 590 | ||
| 591 | /* | ||
| 592 | * ci_stackflags is only valid if the incompat bit | ||
| 593 | * OCFS2_FEATURE_INCOMPAT_CLUSTERINFO is set. | ||
| 594 | */ | ||
| 569 | struct ocfs2_cluster_info { | 595 | struct ocfs2_cluster_info { |
| 570 | /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; | 596 | /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; |
| 571 | __le32 ci_reserved; | 597 | union { |
| 598 | __le32 ci_reserved; | ||
| 599 | struct { | ||
| 600 | __u8 ci_stackflags; | ||
| 601 | __u8 ci_reserved1; | ||
| 602 | __u8 ci_reserved2; | ||
| 603 | __u8 ci_reserved3; | ||
| 604 | }; | ||
| 605 | }; | ||
| 572 | /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; | 606 | /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; |
| 573 | /*18*/ | 607 | /*18*/ |
| 574 | }; | 608 | }; |
| @@ -605,9 +639,9 @@ struct ocfs2_super_block { | |||
| 605 | * group header */ | 639 | * group header */ |
| 606 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ | 640 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ |
| 607 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ | 641 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ |
| 608 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace | 642 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Only valid if either |
| 609 | stack. Only valid | 643 | userspace or clusterinfo |
| 610 | with INCOMPAT flag. */ | 644 | INCOMPAT flag set. */ |
| 611 | /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size | 645 | /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size |
| 612 | for this fs*/ | 646 | for this fs*/ |
| 613 | __le16 s_reserved0; | 647 | __le16 s_reserved0; |
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 5d241505690b..b46f39bf7438 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h | |||
| @@ -76,4 +76,99 @@ struct reflink_arguments { | |||
| 76 | }; | 76 | }; |
| 77 | #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) | 77 | #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) |
| 78 | 78 | ||
| 79 | /* Following definitions dedicated for ocfs2_info_request ioctls. */ | ||
| 80 | #define OCFS2_INFO_MAX_REQUEST (50) | ||
| 81 | #define OCFS2_TEXT_UUID_LEN (OCFS2_VOL_UUID_LEN * 2) | ||
| 82 | |||
| 83 | /* Magic number of all requests */ | ||
| 84 | #define OCFS2_INFO_MAGIC (0x4F32494E) | ||
| 85 | |||
| 86 | /* | ||
| 87 | * Always try to separate info request into small pieces to | ||
| 88 | * guarantee the backward&forward compatibility. | ||
| 89 | */ | ||
| 90 | struct ocfs2_info { | ||
| 91 | __u64 oi_requests; /* Array of __u64 pointers to requests */ | ||
| 92 | __u32 oi_count; /* Number of requests in info_requests */ | ||
| 93 | __u32 oi_pad; | ||
| 94 | }; | ||
| 95 | |||
| 96 | struct ocfs2_info_request { | ||
| 97 | /*00*/ __u32 ir_magic; /* Magic number */ | ||
| 98 | __u32 ir_code; /* Info request code */ | ||
| 99 | __u32 ir_size; /* Size of request */ | ||
| 100 | __u32 ir_flags; /* Request flags */ | ||
| 101 | /*10*/ /* Request specific fields */ | ||
| 102 | }; | ||
| 103 | |||
| 104 | struct ocfs2_info_clustersize { | ||
| 105 | struct ocfs2_info_request ic_req; | ||
| 106 | __u32 ic_clustersize; | ||
| 107 | __u32 ic_pad; | ||
| 108 | }; | ||
| 109 | |||
| 110 | struct ocfs2_info_blocksize { | ||
| 111 | struct ocfs2_info_request ib_req; | ||
| 112 | __u32 ib_blocksize; | ||
| 113 | __u32 ib_pad; | ||
| 114 | }; | ||
| 115 | |||
| 116 | struct ocfs2_info_maxslots { | ||
| 117 | struct ocfs2_info_request im_req; | ||
| 118 | __u32 im_max_slots; | ||
| 119 | __u32 im_pad; | ||
| 120 | }; | ||
| 121 | |||
| 122 | struct ocfs2_info_label { | ||
| 123 | struct ocfs2_info_request il_req; | ||
| 124 | __u8 il_label[OCFS2_MAX_VOL_LABEL_LEN]; | ||
| 125 | } __attribute__ ((packed)); | ||
| 126 | |||
| 127 | struct ocfs2_info_uuid { | ||
| 128 | struct ocfs2_info_request iu_req; | ||
| 129 | __u8 iu_uuid_str[OCFS2_TEXT_UUID_LEN + 1]; | ||
| 130 | } __attribute__ ((packed)); | ||
| 131 | |||
| 132 | struct ocfs2_info_fs_features { | ||
| 133 | struct ocfs2_info_request if_req; | ||
| 134 | __u32 if_compat_features; | ||
| 135 | __u32 if_incompat_features; | ||
| 136 | __u32 if_ro_compat_features; | ||
| 137 | __u32 if_pad; | ||
| 138 | }; | ||
| 139 | |||
| 140 | struct ocfs2_info_journal_size { | ||
| 141 | struct ocfs2_info_request ij_req; | ||
| 142 | __u64 ij_journal_size; | ||
| 143 | }; | ||
| 144 | |||
| 145 | /* Codes for ocfs2_info_request */ | ||
| 146 | enum ocfs2_info_type { | ||
| 147 | OCFS2_INFO_CLUSTERSIZE = 1, | ||
| 148 | OCFS2_INFO_BLOCKSIZE, | ||
| 149 | OCFS2_INFO_MAXSLOTS, | ||
| 150 | OCFS2_INFO_LABEL, | ||
| 151 | OCFS2_INFO_UUID, | ||
| 152 | OCFS2_INFO_FS_FEATURES, | ||
| 153 | OCFS2_INFO_JOURNAL_SIZE, | ||
| 154 | OCFS2_INFO_NUM_TYPES | ||
| 155 | }; | ||
| 156 | |||
| 157 | /* Flags for struct ocfs2_info_request */ | ||
| 158 | /* Filled by the caller */ | ||
| 159 | #define OCFS2_INFO_FL_NON_COHERENT (0x00000001) /* Cluster coherency not | ||
| 160 | required. This is a hint. | ||
| 161 | It is up to ocfs2 whether | ||
| 162 | the request can be fulfilled | ||
| 163 | without locking. */ | ||
| 164 | /* Filled by ocfs2 */ | ||
| 165 | #define OCFS2_INFO_FL_FILLED (0x40000000) /* Filesystem understood | ||
| 166 | this request and | ||
| 167 | filled in the answer */ | ||
| 168 | |||
| 169 | #define OCFS2_INFO_FL_ERROR (0x80000000) /* Error happened during | ||
| 170 | request handling. */ | ||
| 171 | |||
| 172 | #define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) | ||
| 173 | |||
| 79 | #endif /* OCFS2_IOCTL_H */ | 174 | #endif /* OCFS2_IOCTL_H */ |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index efdd75607406..b5f9160e93e9 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | 49 | ||
| 50 | struct ocfs2_cow_context { | 50 | struct ocfs2_cow_context { |
| 51 | struct inode *inode; | 51 | struct inode *inode; |
| 52 | struct file *file; | ||
| 52 | u32 cow_start; | 53 | u32 cow_start; |
| 53 | u32 cow_len; | 54 | u32 cow_len; |
| 54 | struct ocfs2_extent_tree data_et; | 55 | struct ocfs2_extent_tree data_et; |
| @@ -2932,13 +2933,16 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
| 2932 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | 2933 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); |
| 2933 | struct page *page; | 2934 | struct page *page; |
| 2934 | pgoff_t page_index; | 2935 | pgoff_t page_index; |
| 2935 | unsigned int from, to; | 2936 | unsigned int from, to, readahead_pages; |
| 2936 | loff_t offset, end, map_end; | 2937 | loff_t offset, end, map_end; |
| 2937 | struct address_space *mapping = context->inode->i_mapping; | 2938 | struct address_space *mapping = context->inode->i_mapping; |
| 2938 | 2939 | ||
| 2939 | mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, | 2940 | mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, |
| 2940 | new_cluster, new_len, cpos); | 2941 | new_cluster, new_len, cpos); |
| 2941 | 2942 | ||
| 2943 | readahead_pages = | ||
| 2944 | (ocfs2_cow_contig_clusters(sb) << | ||
| 2945 | OCFS2_SB(sb)->s_clustersize_bits) >> PAGE_CACHE_SHIFT; | ||
| 2942 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; | 2946 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; |
| 2943 | end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); | 2947 | end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); |
| 2944 | /* | 2948 | /* |
| @@ -2969,6 +2973,14 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
| 2969 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) | 2973 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) |
| 2970 | BUG_ON(PageDirty(page)); | 2974 | BUG_ON(PageDirty(page)); |
| 2971 | 2975 | ||
| 2976 | if (PageReadahead(page) && context->file) { | ||
| 2977 | page_cache_async_readahead(mapping, | ||
| 2978 | &context->file->f_ra, | ||
| 2979 | context->file, | ||
| 2980 | page, page_index, | ||
| 2981 | readahead_pages); | ||
| 2982 | } | ||
| 2983 | |||
| 2972 | if (!PageUptodate(page)) { | 2984 | if (!PageUptodate(page)) { |
| 2973 | ret = block_read_full_page(page, ocfs2_get_block); | 2985 | ret = block_read_full_page(page, ocfs2_get_block); |
| 2974 | if (ret) { | 2986 | if (ret) { |
| @@ -3409,12 +3421,35 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context) | |||
| 3409 | return ret; | 3421 | return ret; |
| 3410 | } | 3422 | } |
| 3411 | 3423 | ||
| 3424 | static void ocfs2_readahead_for_cow(struct inode *inode, | ||
| 3425 | struct file *file, | ||
| 3426 | u32 start, u32 len) | ||
| 3427 | { | ||
| 3428 | struct address_space *mapping; | ||
| 3429 | pgoff_t index; | ||
| 3430 | unsigned long num_pages; | ||
| 3431 | int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
| 3432 | |||
| 3433 | if (!file) | ||
| 3434 | return; | ||
| 3435 | |||
| 3436 | mapping = file->f_mapping; | ||
| 3437 | num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT; | ||
| 3438 | if (!num_pages) | ||
| 3439 | num_pages = 1; | ||
| 3440 | |||
| 3441 | index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT; | ||
| 3442 | page_cache_sync_readahead(mapping, &file->f_ra, file, | ||
| 3443 | index, num_pages); | ||
| 3444 | } | ||
| 3445 | |||
| 3412 | /* | 3446 | /* |
| 3413 | * Starting at cpos, try to CoW write_len clusters. Don't CoW | 3447 | * Starting at cpos, try to CoW write_len clusters. Don't CoW |
| 3414 | * past max_cpos. This will stop when it runs into a hole or an | 3448 | * past max_cpos. This will stop when it runs into a hole or an |
| 3415 | * unrefcounted extent. | 3449 | * unrefcounted extent. |
| 3416 | */ | 3450 | */ |
| 3417 | static int ocfs2_refcount_cow_hunk(struct inode *inode, | 3451 | static int ocfs2_refcount_cow_hunk(struct inode *inode, |
| 3452 | struct file *file, | ||
| 3418 | struct buffer_head *di_bh, | 3453 | struct buffer_head *di_bh, |
| 3419 | u32 cpos, u32 write_len, u32 max_cpos) | 3454 | u32 cpos, u32 write_len, u32 max_cpos) |
| 3420 | { | 3455 | { |
| @@ -3443,6 +3478,8 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, | |||
| 3443 | 3478 | ||
| 3444 | BUG_ON(cow_len == 0); | 3479 | BUG_ON(cow_len == 0); |
| 3445 | 3480 | ||
| 3481 | ocfs2_readahead_for_cow(inode, file, cow_start, cow_len); | ||
| 3482 | |||
| 3446 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); | 3483 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); |
| 3447 | if (!context) { | 3484 | if (!context) { |
| 3448 | ret = -ENOMEM; | 3485 | ret = -ENOMEM; |
| @@ -3464,6 +3501,7 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, | |||
| 3464 | context->ref_root_bh = ref_root_bh; | 3501 | context->ref_root_bh = ref_root_bh; |
| 3465 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; | 3502 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; |
| 3466 | context->get_clusters = ocfs2_di_get_clusters; | 3503 | context->get_clusters = ocfs2_di_get_clusters; |
| 3504 | context->file = file; | ||
| 3467 | 3505 | ||
| 3468 | ocfs2_init_dinode_extent_tree(&context->data_et, | 3506 | ocfs2_init_dinode_extent_tree(&context->data_et, |
| 3469 | INODE_CACHE(inode), di_bh); | 3507 | INODE_CACHE(inode), di_bh); |
| @@ -3492,6 +3530,7 @@ out: | |||
| 3492 | * clusters between cpos and cpos+write_len are safe to modify. | 3530 | * clusters between cpos and cpos+write_len are safe to modify. |
| 3493 | */ | 3531 | */ |
| 3494 | int ocfs2_refcount_cow(struct inode *inode, | 3532 | int ocfs2_refcount_cow(struct inode *inode, |
| 3533 | struct file *file, | ||
| 3495 | struct buffer_head *di_bh, | 3534 | struct buffer_head *di_bh, |
| 3496 | u32 cpos, u32 write_len, u32 max_cpos) | 3535 | u32 cpos, u32 write_len, u32 max_cpos) |
| 3497 | { | 3536 | { |
| @@ -3511,7 +3550,7 @@ int ocfs2_refcount_cow(struct inode *inode, | |||
| 3511 | num_clusters = write_len; | 3550 | num_clusters = write_len; |
| 3512 | 3551 | ||
| 3513 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { | 3552 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { |
| 3514 | ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, | 3553 | ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos, |
| 3515 | num_clusters, max_cpos); | 3554 | num_clusters, max_cpos); |
| 3516 | if (ret) { | 3555 | if (ret) { |
| 3517 | mlog_errno(ret); | 3556 | mlog_errno(ret); |
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 9983ba1570e2..c8ce46f7d8e3 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h | |||
| @@ -21,14 +21,14 @@ struct ocfs2_refcount_tree { | |||
| 21 | struct rb_node rf_node; | 21 | struct rb_node rf_node; |
| 22 | u64 rf_blkno; | 22 | u64 rf_blkno; |
| 23 | u32 rf_generation; | 23 | u32 rf_generation; |
| 24 | struct kref rf_getcnt; | ||
| 24 | struct rw_semaphore rf_sem; | 25 | struct rw_semaphore rf_sem; |
| 25 | struct ocfs2_lock_res rf_lockres; | 26 | struct ocfs2_lock_res rf_lockres; |
| 26 | struct kref rf_getcnt; | ||
| 27 | int rf_removed; | 27 | int rf_removed; |
| 28 | 28 | ||
| 29 | /* the following 4 fields are used by caching_info. */ | 29 | /* the following 4 fields are used by caching_info. */ |
| 30 | struct ocfs2_caching_info rf_ci; | ||
| 31 | spinlock_t rf_lock; | 30 | spinlock_t rf_lock; |
| 31 | struct ocfs2_caching_info rf_ci; | ||
| 32 | struct mutex rf_io_mutex; | 32 | struct mutex rf_io_mutex; |
| 33 | struct super_block *rf_sb; | 33 | struct super_block *rf_sb; |
| 34 | }; | 34 | }; |
| @@ -52,7 +52,8 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, | |||
| 52 | u32 clusters, | 52 | u32 clusters, |
| 53 | int *credits, | 53 | int *credits, |
| 54 | int *ref_blocks); | 54 | int *ref_blocks); |
| 55 | int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, | 55 | int ocfs2_refcount_cow(struct inode *inode, |
| 56 | struct file *filep, struct buffer_head *di_bh, | ||
| 56 | u32 cpos, u32 write_len, u32 max_cpos); | 57 | u32 cpos, u32 write_len, u32 max_cpos); |
| 57 | 58 | ||
| 58 | typedef int (ocfs2_post_refcount_func)(struct inode *inode, | 59 | typedef int (ocfs2_post_refcount_func)(struct inode *inode, |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index bfbd7e9e949f..ab4e0172cc1d 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
| @@ -357,7 +357,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, | |||
| 357 | { | 357 | { |
| 358 | int status = 0; | 358 | int status = 0; |
| 359 | u64 blkno; | 359 | u64 blkno; |
| 360 | unsigned long long blocks, bytes; | 360 | unsigned long long blocks, bytes = 0; |
| 361 | unsigned int i; | 361 | unsigned int i; |
| 362 | struct buffer_head *bh; | 362 | struct buffer_head *bh; |
| 363 | 363 | ||
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 0d3049f696c5..19965b00c43c 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c | |||
| @@ -283,6 +283,8 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
| 283 | /* for now we only have one cluster/node, make sure we see it | 283 | /* for now we only have one cluster/node, make sure we see it |
| 284 | * in the heartbeat universe */ | 284 | * in the heartbeat universe */ |
| 285 | if (!o2hb_check_local_node_heartbeating()) { | 285 | if (!o2hb_check_local_node_heartbeating()) { |
| 286 | if (o2hb_global_heartbeat_active()) | ||
| 287 | mlog(ML_ERROR, "Global heartbeat not started\n"); | ||
| 286 | rc = -EINVAL; | 288 | rc = -EINVAL; |
| 287 | goto out; | 289 | goto out; |
| 288 | } | 290 | } |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 849c2f0e0a0e..5fed60de7630 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
| @@ -1380,6 +1380,14 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, | |||
| 1380 | } | 1380 | } |
| 1381 | 1381 | ||
| 1382 | le16_add_cpu(&bg->bg_free_bits_count, -num_bits); | 1382 | le16_add_cpu(&bg->bg_free_bits_count, -num_bits); |
| 1383 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { | ||
| 1384 | ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" | ||
| 1385 | " count %u but claims %u are freed. num_bits %d", | ||
| 1386 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | ||
| 1387 | le16_to_cpu(bg->bg_bits), | ||
| 1388 | le16_to_cpu(bg->bg_free_bits_count), num_bits); | ||
| 1389 | return -EROFS; | ||
| 1390 | } | ||
| 1383 | while(num_bits--) | 1391 | while(num_bits--) |
| 1384 | ocfs2_set_bit(bit_off++, bitmap); | 1392 | ocfs2_set_bit(bit_off++, bitmap); |
| 1385 | 1393 | ||
| @@ -2419,6 +2427,14 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, | |||
| 2419 | (unsigned long *) undo_bg->bg_bitmap); | 2427 | (unsigned long *) undo_bg->bg_bitmap); |
| 2420 | } | 2428 | } |
| 2421 | le16_add_cpu(&bg->bg_free_bits_count, num_bits); | 2429 | le16_add_cpu(&bg->bg_free_bits_count, num_bits); |
| 2430 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { | ||
| 2431 | ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" | ||
| 2432 | " count %u but claims %u are freed. num_bits %d", | ||
| 2433 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | ||
| 2434 | le16_to_cpu(bg->bg_bits), | ||
| 2435 | le16_to_cpu(bg->bg_free_bits_count), num_bits); | ||
| 2436 | return -EROFS; | ||
| 2437 | } | ||
| 2422 | 2438 | ||
| 2423 | if (undo_fn) | 2439 | if (undo_fn) |
| 2424 | jbd_unlock_bh_state(group_bh); | 2440 | jbd_unlock_bh_state(group_bh); |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index fa1be1b304d1..a8a0ca44f88f 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -162,6 +162,7 @@ enum { | |||
| 162 | Opt_nointr, | 162 | Opt_nointr, |
| 163 | Opt_hb_none, | 163 | Opt_hb_none, |
| 164 | Opt_hb_local, | 164 | Opt_hb_local, |
| 165 | Opt_hb_global, | ||
| 165 | Opt_data_ordered, | 166 | Opt_data_ordered, |
| 166 | Opt_data_writeback, | 167 | Opt_data_writeback, |
| 167 | Opt_atime_quantum, | 168 | Opt_atime_quantum, |
| @@ -177,6 +178,8 @@ enum { | |||
| 177 | Opt_noacl, | 178 | Opt_noacl, |
| 178 | Opt_usrquota, | 179 | Opt_usrquota, |
| 179 | Opt_grpquota, | 180 | Opt_grpquota, |
| 181 | Opt_coherency_buffered, | ||
| 182 | Opt_coherency_full, | ||
| 180 | Opt_resv_level, | 183 | Opt_resv_level, |
| 181 | Opt_dir_resv_level, | 184 | Opt_dir_resv_level, |
| 182 | Opt_err, | 185 | Opt_err, |
| @@ -190,6 +193,7 @@ static const match_table_t tokens = { | |||
| 190 | {Opt_nointr, "nointr"}, | 193 | {Opt_nointr, "nointr"}, |
| 191 | {Opt_hb_none, OCFS2_HB_NONE}, | 194 | {Opt_hb_none, OCFS2_HB_NONE}, |
| 192 | {Opt_hb_local, OCFS2_HB_LOCAL}, | 195 | {Opt_hb_local, OCFS2_HB_LOCAL}, |
| 196 | {Opt_hb_global, OCFS2_HB_GLOBAL}, | ||
| 193 | {Opt_data_ordered, "data=ordered"}, | 197 | {Opt_data_ordered, "data=ordered"}, |
| 194 | {Opt_data_writeback, "data=writeback"}, | 198 | {Opt_data_writeback, "data=writeback"}, |
| 195 | {Opt_atime_quantum, "atime_quantum=%u"}, | 199 | {Opt_atime_quantum, "atime_quantum=%u"}, |
| @@ -205,6 +209,8 @@ static const match_table_t tokens = { | |||
| 205 | {Opt_noacl, "noacl"}, | 209 | {Opt_noacl, "noacl"}, |
| 206 | {Opt_usrquota, "usrquota"}, | 210 | {Opt_usrquota, "usrquota"}, |
| 207 | {Opt_grpquota, "grpquota"}, | 211 | {Opt_grpquota, "grpquota"}, |
| 212 | {Opt_coherency_buffered, "coherency=buffered"}, | ||
| 213 | {Opt_coherency_full, "coherency=full"}, | ||
| 208 | {Opt_resv_level, "resv_level=%u"}, | 214 | {Opt_resv_level, "resv_level=%u"}, |
| 209 | {Opt_dir_resv_level, "dir_resv_level=%u"}, | 215 | {Opt_dir_resv_level, "dir_resv_level=%u"}, |
| 210 | {Opt_err, NULL} | 216 | {Opt_err, NULL} |
| @@ -514,11 +520,11 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb) | |||
| 514 | 520 | ||
| 515 | mlog_entry_void(); | 521 | mlog_entry_void(); |
| 516 | 522 | ||
| 517 | for (i = 0; i < NUM_SYSTEM_INODES; i++) { | 523 | for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) { |
| 518 | inode = osb->system_inodes[i]; | 524 | inode = osb->global_system_inodes[i]; |
| 519 | if (inode) { | 525 | if (inode) { |
| 520 | iput(inode); | 526 | iput(inode); |
| 521 | osb->system_inodes[i] = NULL; | 527 | osb->global_system_inodes[i] = NULL; |
| 522 | } | 528 | } |
| 523 | } | 529 | } |
| 524 | 530 | ||
| @@ -534,6 +540,20 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb) | |||
| 534 | osb->root_inode = NULL; | 540 | osb->root_inode = NULL; |
| 535 | } | 541 | } |
| 536 | 542 | ||
| 543 | if (!osb->local_system_inodes) | ||
| 544 | goto out; | ||
| 545 | |||
| 546 | for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) { | ||
| 547 | if (osb->local_system_inodes[i]) { | ||
| 548 | iput(osb->local_system_inodes[i]); | ||
| 549 | osb->local_system_inodes[i] = NULL; | ||
| 550 | } | ||
| 551 | } | ||
| 552 | |||
| 553 | kfree(osb->local_system_inodes); | ||
| 554 | osb->local_system_inodes = NULL; | ||
| 555 | |||
| 556 | out: | ||
| 537 | mlog_exit(0); | 557 | mlog_exit(0); |
| 538 | } | 558 | } |
| 539 | 559 | ||
| @@ -608,6 +628,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
| 608 | int ret = 0; | 628 | int ret = 0; |
| 609 | struct mount_options parsed_options; | 629 | struct mount_options parsed_options; |
| 610 | struct ocfs2_super *osb = OCFS2_SB(sb); | 630 | struct ocfs2_super *osb = OCFS2_SB(sb); |
| 631 | u32 tmp; | ||
| 611 | 632 | ||
| 612 | lock_kernel(); | 633 | lock_kernel(); |
| 613 | 634 | ||
| @@ -617,8 +638,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
| 617 | goto out; | 638 | goto out; |
| 618 | } | 639 | } |
| 619 | 640 | ||
| 620 | if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) != | 641 | tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | |
| 621 | (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) { | 642 | OCFS2_MOUNT_HB_NONE; |
| 643 | if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { | ||
| 622 | ret = -EINVAL; | 644 | ret = -EINVAL; |
| 623 | mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); | 645 | mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); |
| 624 | goto out; | 646 | goto out; |
| @@ -809,23 +831,29 @@ bail: | |||
| 809 | 831 | ||
| 810 | static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) | 832 | static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) |
| 811 | { | 833 | { |
| 812 | if (ocfs2_mount_local(osb)) { | 834 | u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL; |
| 813 | if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { | 835 | |
| 836 | if (osb->s_mount_opt & hb_enabled) { | ||
| 837 | if (ocfs2_mount_local(osb)) { | ||
| 814 | mlog(ML_ERROR, "Cannot heartbeat on a locally " | 838 | mlog(ML_ERROR, "Cannot heartbeat on a locally " |
| 815 | "mounted device.\n"); | 839 | "mounted device.\n"); |
| 816 | return -EINVAL; | 840 | return -EINVAL; |
| 817 | } | 841 | } |
| 818 | } | 842 | if (ocfs2_userspace_stack(osb)) { |
| 819 | |||
| 820 | if (ocfs2_userspace_stack(osb)) { | ||
| 821 | if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { | ||
| 822 | mlog(ML_ERROR, "Userspace stack expected, but " | 843 | mlog(ML_ERROR, "Userspace stack expected, but " |
| 823 | "o2cb heartbeat arguments passed to mount\n"); | 844 | "o2cb heartbeat arguments passed to mount\n"); |
| 824 | return -EINVAL; | 845 | return -EINVAL; |
| 825 | } | 846 | } |
| 847 | if (((osb->s_mount_opt & OCFS2_MOUNT_HB_GLOBAL) && | ||
| 848 | !ocfs2_cluster_o2cb_global_heartbeat(osb)) || | ||
| 849 | ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) && | ||
| 850 | ocfs2_cluster_o2cb_global_heartbeat(osb))) { | ||
| 851 | mlog(ML_ERROR, "Mismatching o2cb heartbeat modes\n"); | ||
| 852 | return -EINVAL; | ||
| 853 | } | ||
| 826 | } | 854 | } |
| 827 | 855 | ||
| 828 | if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { | 856 | if (!(osb->s_mount_opt & hb_enabled)) { |
| 829 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && | 857 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && |
| 830 | !ocfs2_userspace_stack(osb)) { | 858 | !ocfs2_userspace_stack(osb)) { |
| 831 | mlog(ML_ERROR, "Heartbeat has to be started to mount " | 859 | mlog(ML_ERROR, "Heartbeat has to be started to mount " |
| @@ -1291,6 +1319,7 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 1291 | { | 1319 | { |
| 1292 | int status; | 1320 | int status; |
| 1293 | char *p; | 1321 | char *p; |
| 1322 | u32 tmp; | ||
| 1294 | 1323 | ||
| 1295 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, | 1324 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, |
| 1296 | options ? options : "(none)"); | 1325 | options ? options : "(none)"); |
| @@ -1322,7 +1351,10 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 1322 | mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; | 1351 | mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; |
| 1323 | break; | 1352 | break; |
| 1324 | case Opt_hb_none: | 1353 | case Opt_hb_none: |
| 1325 | mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL; | 1354 | mopt->mount_opt |= OCFS2_MOUNT_HB_NONE; |
| 1355 | break; | ||
| 1356 | case Opt_hb_global: | ||
| 1357 | mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL; | ||
| 1326 | break; | 1358 | break; |
| 1327 | case Opt_barrier: | 1359 | case Opt_barrier: |
| 1328 | if (match_int(&args[0], &option)) { | 1360 | if (match_int(&args[0], &option)) { |
| @@ -1438,6 +1470,12 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 1438 | case Opt_grpquota: | 1470 | case Opt_grpquota: |
| 1439 | mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; | 1471 | mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; |
| 1440 | break; | 1472 | break; |
| 1473 | case Opt_coherency_buffered: | ||
| 1474 | mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED; | ||
| 1475 | break; | ||
| 1476 | case Opt_coherency_full: | ||
| 1477 | mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED; | ||
| 1478 | break; | ||
| 1441 | case Opt_acl: | 1479 | case Opt_acl: |
| 1442 | mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; | 1480 | mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; |
| 1443 | mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; | 1481 | mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; |
| @@ -1477,6 +1515,15 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 1477 | } | 1515 | } |
| 1478 | } | 1516 | } |
| 1479 | 1517 | ||
| 1518 | /* Ensure only one heartbeat mode */ | ||
| 1519 | tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | | ||
| 1520 | OCFS2_MOUNT_HB_NONE); | ||
| 1521 | if (hweight32(tmp) != 1) { | ||
| 1522 | mlog(ML_ERROR, "Invalid heartbeat mount options\n"); | ||
| 1523 | status = 0; | ||
| 1524 | goto bail; | ||
| 1525 | } | ||
| 1526 | |||
| 1480 | status = 1; | 1527 | status = 1; |
| 1481 | 1528 | ||
| 1482 | bail: | 1529 | bail: |
| @@ -1490,10 +1537,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 1490 | unsigned long opts = osb->s_mount_opt; | 1537 | unsigned long opts = osb->s_mount_opt; |
| 1491 | unsigned int local_alloc_megs; | 1538 | unsigned int local_alloc_megs; |
| 1492 | 1539 | ||
| 1493 | if (opts & OCFS2_MOUNT_HB_LOCAL) | 1540 | if (opts & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL)) { |
| 1494 | seq_printf(s, ",_netdev,heartbeat=local"); | 1541 | seq_printf(s, ",_netdev"); |
| 1495 | else | 1542 | if (opts & OCFS2_MOUNT_HB_LOCAL) |
| 1496 | seq_printf(s, ",heartbeat=none"); | 1543 | seq_printf(s, ",%s", OCFS2_HB_LOCAL); |
| 1544 | else | ||
| 1545 | seq_printf(s, ",%s", OCFS2_HB_GLOBAL); | ||
| 1546 | } else | ||
| 1547 | seq_printf(s, ",%s", OCFS2_HB_NONE); | ||
| 1497 | 1548 | ||
| 1498 | if (opts & OCFS2_MOUNT_NOINTR) | 1549 | if (opts & OCFS2_MOUNT_NOINTR) |
| 1499 | seq_printf(s, ",nointr"); | 1550 | seq_printf(s, ",nointr"); |
| @@ -1536,6 +1587,11 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 1536 | if (opts & OCFS2_MOUNT_GRPQUOTA) | 1587 | if (opts & OCFS2_MOUNT_GRPQUOTA) |
| 1537 | seq_printf(s, ",grpquota"); | 1588 | seq_printf(s, ",grpquota"); |
| 1538 | 1589 | ||
| 1590 | if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED) | ||
| 1591 | seq_printf(s, ",coherency=buffered"); | ||
| 1592 | else | ||
| 1593 | seq_printf(s, ",coherency=full"); | ||
| 1594 | |||
| 1539 | if (opts & OCFS2_MOUNT_NOUSERXATTR) | 1595 | if (opts & OCFS2_MOUNT_NOUSERXATTR) |
| 1540 | seq_printf(s, ",nouser_xattr"); | 1596 | seq_printf(s, ",nouser_xattr"); |
| 1541 | else | 1597 | else |
| @@ -1990,6 +2046,36 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu | |||
| 1990 | return 0; | 2046 | return 0; |
| 1991 | } | 2047 | } |
| 1992 | 2048 | ||
| 2049 | /* Make sure entire volume is addressable by our journal. Requires | ||
| 2050 | osb_clusters_at_boot to be valid and for the journal to have been | ||
| 2051 | initialized by ocfs2_journal_init(). */ | ||
| 2052 | static int ocfs2_journal_addressable(struct ocfs2_super *osb) | ||
| 2053 | { | ||
| 2054 | int status = 0; | ||
| 2055 | u64 max_block = | ||
| 2056 | ocfs2_clusters_to_blocks(osb->sb, | ||
| 2057 | osb->osb_clusters_at_boot) - 1; | ||
| 2058 | |||
| 2059 | /* 32-bit block number is always OK. */ | ||
| 2060 | if (max_block <= (u32)~0ULL) | ||
| 2061 | goto out; | ||
| 2062 | |||
| 2063 | /* Volume is "huge", so see if our journal is new enough to | ||
| 2064 | support it. */ | ||
| 2065 | if (!(OCFS2_HAS_COMPAT_FEATURE(osb->sb, | ||
| 2066 | OCFS2_FEATURE_COMPAT_JBD2_SB) && | ||
| 2067 | jbd2_journal_check_used_features(osb->journal->j_journal, 0, 0, | ||
| 2068 | JBD2_FEATURE_INCOMPAT_64BIT))) { | ||
| 2069 | mlog(ML_ERROR, "The journal cannot address the entire volume. " | ||
| 2070 | "Enable the 'block64' journal option with tunefs.ocfs2"); | ||
| 2071 | status = -EFBIG; | ||
| 2072 | goto out; | ||
| 2073 | } | ||
| 2074 | |||
| 2075 | out: | ||
| 2076 | return status; | ||
| 2077 | } | ||
| 2078 | |||
| 1993 | static int ocfs2_initialize_super(struct super_block *sb, | 2079 | static int ocfs2_initialize_super(struct super_block *sb, |
| 1994 | struct buffer_head *bh, | 2080 | struct buffer_head *bh, |
| 1995 | int sector_size, | 2081 | int sector_size, |
| @@ -2002,6 +2088,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2002 | struct ocfs2_journal *journal; | 2088 | struct ocfs2_journal *journal; |
| 2003 | __le32 uuid_net_key; | 2089 | __le32 uuid_net_key; |
| 2004 | struct ocfs2_super *osb; | 2090 | struct ocfs2_super *osb; |
| 2091 | u64 total_blocks; | ||
| 2005 | 2092 | ||
| 2006 | mlog_entry_void(); | 2093 | mlog_entry_void(); |
| 2007 | 2094 | ||
| @@ -2060,6 +2147,15 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2060 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", | 2147 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", |
| 2061 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 2148 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
| 2062 | 2149 | ||
| 2150 | osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); | ||
| 2151 | if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { | ||
| 2152 | mlog(ML_ERROR, "Invalid number of node slots (%u)\n", | ||
| 2153 | osb->max_slots); | ||
| 2154 | status = -EINVAL; | ||
| 2155 | goto bail; | ||
| 2156 | } | ||
| 2157 | mlog(0, "max_slots for this device: %u\n", osb->max_slots); | ||
| 2158 | |||
| 2063 | ocfs2_orphan_scan_init(osb); | 2159 | ocfs2_orphan_scan_init(osb); |
| 2064 | 2160 | ||
| 2065 | status = ocfs2_recovery_init(osb); | 2161 | status = ocfs2_recovery_init(osb); |
| @@ -2098,15 +2194,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2098 | goto bail; | 2194 | goto bail; |
| 2099 | } | 2195 | } |
| 2100 | 2196 | ||
| 2101 | osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); | ||
| 2102 | if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { | ||
| 2103 | mlog(ML_ERROR, "Invalid number of node slots (%u)\n", | ||
| 2104 | osb->max_slots); | ||
| 2105 | status = -EINVAL; | ||
| 2106 | goto bail; | ||
| 2107 | } | ||
| 2108 | mlog(0, "max_slots for this device: %u\n", osb->max_slots); | ||
| 2109 | |||
| 2110 | osb->slot_recovery_generations = | 2197 | osb->slot_recovery_generations = |
| 2111 | kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), | 2198 | kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), |
| 2112 | GFP_KERNEL); | 2199 | GFP_KERNEL); |
| @@ -2149,7 +2236,9 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2149 | goto bail; | 2236 | goto bail; |
| 2150 | } | 2237 | } |
| 2151 | 2238 | ||
| 2152 | if (ocfs2_userspace_stack(osb)) { | 2239 | if (ocfs2_clusterinfo_valid(osb)) { |
| 2240 | osb->osb_stackflags = | ||
| 2241 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; | ||
| 2153 | memcpy(osb->osb_cluster_stack, | 2242 | memcpy(osb->osb_cluster_stack, |
| 2154 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, | 2243 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, |
| 2155 | OCFS2_STACK_LABEL_LEN); | 2244 | OCFS2_STACK_LABEL_LEN); |
| @@ -2214,11 +2303,15 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 2214 | goto bail; | 2303 | goto bail; |
| 2215 | } | 2304 | } |
| 2216 | 2305 | ||
| 2217 | if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters) - 1) | 2306 | total_blocks = ocfs2_clusters_to_blocks(osb->sb, |
| 2218 | > (u32)~0UL) { | 2307 | le32_to_cpu(di->i_clusters)); |
| 2219 | mlog(ML_ERROR, "Volume might try to write to blocks beyond " | 2308 | |
| 2220 | "what jbd can address in 32 bits.\n"); | 2309 | status = generic_check_addressable(osb->sb->s_blocksize_bits, |
| 2221 | status = -EINVAL; | 2310 | total_blocks); |
| 2311 | if (status) { | ||
| 2312 | mlog(ML_ERROR, "Volume too large " | ||
| 2313 | "to mount safely on this system"); | ||
| 2314 | status = -EFBIG; | ||
| 2222 | goto bail; | 2315 | goto bail; |
| 2223 | } | 2316 | } |
| 2224 | 2317 | ||
| @@ -2380,6 +2473,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) | |||
| 2380 | goto finally; | 2473 | goto finally; |
| 2381 | } | 2474 | } |
| 2382 | 2475 | ||
| 2476 | /* Now that journal has been initialized, check to make sure | ||
| 2477 | entire volume is addressable. */ | ||
| 2478 | status = ocfs2_journal_addressable(osb); | ||
| 2479 | if (status) | ||
| 2480 | goto finally; | ||
| 2481 | |||
| 2383 | /* If the journal was unmounted cleanly then we don't want to | 2482 | /* If the journal was unmounted cleanly then we don't want to |
| 2384 | * recover anything. Otherwise, journal_load will do that | 2483 | * recover anything. Otherwise, journal_load will do that |
| 2385 | * dirty work for us :) */ | 2484 | * dirty work for us :) */ |
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index bfe7190cdbf1..902efb23b6a6 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c | |||
| @@ -44,11 +44,6 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
| 44 | int type, | 44 | int type, |
| 45 | u32 slot); | 45 | u32 slot); |
| 46 | 46 | ||
| 47 | static inline int is_global_system_inode(int type); | ||
| 48 | static inline int is_in_system_inode_array(struct ocfs2_super *osb, | ||
| 49 | int type, | ||
| 50 | u32 slot); | ||
| 51 | |||
| 52 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 47 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 53 | static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; | 48 | static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; |
| 54 | #endif | 49 | #endif |
| @@ -59,11 +54,52 @@ static inline int is_global_system_inode(int type) | |||
| 59 | type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; | 54 | type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; |
| 60 | } | 55 | } |
| 61 | 56 | ||
| 62 | static inline int is_in_system_inode_array(struct ocfs2_super *osb, | 57 | static struct inode **get_local_system_inode(struct ocfs2_super *osb, |
| 63 | int type, | 58 | int type, |
| 64 | u32 slot) | 59 | u32 slot) |
| 65 | { | 60 | { |
| 66 | return slot == osb->slot_num || is_global_system_inode(type); | 61 | int index; |
| 62 | struct inode **local_system_inodes, **free = NULL; | ||
| 63 | |||
| 64 | BUG_ON(slot == OCFS2_INVALID_SLOT); | ||
| 65 | BUG_ON(type < OCFS2_FIRST_LOCAL_SYSTEM_INODE || | ||
| 66 | type > OCFS2_LAST_LOCAL_SYSTEM_INODE); | ||
| 67 | |||
| 68 | spin_lock(&osb->osb_lock); | ||
| 69 | local_system_inodes = osb->local_system_inodes; | ||
| 70 | spin_unlock(&osb->osb_lock); | ||
| 71 | |||
| 72 | if (unlikely(!local_system_inodes)) { | ||
| 73 | local_system_inodes = kzalloc(sizeof(struct inode *) * | ||
| 74 | NUM_LOCAL_SYSTEM_INODES * | ||
| 75 | osb->max_slots, | ||
| 76 | GFP_NOFS); | ||
| 77 | if (!local_system_inodes) { | ||
| 78 | mlog_errno(-ENOMEM); | ||
| 79 | /* | ||
| 80 | * return NULL here so that ocfs2_get_sytem_file_inodes | ||
| 81 | * will try to create an inode and use it. We will try | ||
| 82 | * to initialize local_system_inodes next time. | ||
| 83 | */ | ||
| 84 | return NULL; | ||
| 85 | } | ||
| 86 | |||
| 87 | spin_lock(&osb->osb_lock); | ||
| 88 | if (osb->local_system_inodes) { | ||
| 89 | /* Someone has initialized it for us. */ | ||
| 90 | free = local_system_inodes; | ||
| 91 | local_system_inodes = osb->local_system_inodes; | ||
| 92 | } else | ||
| 93 | osb->local_system_inodes = local_system_inodes; | ||
| 94 | spin_unlock(&osb->osb_lock); | ||
| 95 | if (unlikely(free)) | ||
| 96 | kfree(free); | ||
| 97 | } | ||
| 98 | |||
| 99 | index = (slot * NUM_LOCAL_SYSTEM_INODES) + | ||
| 100 | (type - OCFS2_FIRST_LOCAL_SYSTEM_INODE); | ||
| 101 | |||
| 102 | return &local_system_inodes[index]; | ||
| 67 | } | 103 | } |
| 68 | 104 | ||
| 69 | struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, | 105 | struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, |
| @@ -74,8 +110,10 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
| 74 | struct inode **arr = NULL; | 110 | struct inode **arr = NULL; |
| 75 | 111 | ||
| 76 | /* avoid the lookup if cached in local system file array */ | 112 | /* avoid the lookup if cached in local system file array */ |
| 77 | if (is_in_system_inode_array(osb, type, slot)) | 113 | if (is_global_system_inode(type)) { |
| 78 | arr = &(osb->system_inodes[type]); | 114 | arr = &(osb->global_system_inodes[type]); |
| 115 | } else | ||
| 116 | arr = get_local_system_inode(osb, type, slot); | ||
| 79 | 117 | ||
| 80 | if (arr && ((inode = *arr) != NULL)) { | 118 | if (arr && ((inode = *arr) != NULL)) { |
| 81 | /* get a ref in addition to the array ref */ | 119 | /* get a ref in addition to the array ref */ |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 06fa5e77c40e..67cd43914641 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
| @@ -7081,7 +7081,7 @@ static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, | |||
| 7081 | goto out; | 7081 | goto out; |
| 7082 | } | 7082 | } |
| 7083 | 7083 | ||
| 7084 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) | 7084 | if (!indexed) |
| 7085 | ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); | 7085 | ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); |
| 7086 | else | 7086 | else |
| 7087 | ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); | 7087 | ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 3168dcfb94f2..7d6f18fddfdb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -2378,6 +2378,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, | |||
| 2378 | 2378 | ||
| 2379 | extern int generic_file_fsync(struct file *, int); | 2379 | extern int generic_file_fsync(struct file *, int); |
| 2380 | 2380 | ||
| 2381 | extern int generic_check_addressable(unsigned, u64); | ||
| 2382 | |||
| 2381 | #ifdef CONFIG_MIGRATION | 2383 | #ifdef CONFIG_MIGRATION |
| 2382 | extern int buffer_migrate_page(struct address_space *, | 2384 | extern int buffer_migrate_page(struct address_space *, |
| 2383 | struct page *, struct page *); | 2385 | struct page *, struct page *); |
