diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-21 22:01:34 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-21 22:01:34 -0400 |
commit | f3270b16e00f0614fa418dcc50883da5949375b4 (patch) | |
tree | 2f76b9679cbc9e8da39c8a67cd2bc00084d441e7 | |
parent | 3044100e58c84e133791c8b60a2f5bef69d732e4 (diff) | |
parent | 2decd65a2630633cee04d0b83fdcee46ad2989a1 (diff) |
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (48 commits)
ocfs2: Avoid to evaluate xattr block flags again.
ocfs2/cluster: Release debugfs file elapsed_time_in_ms
ocfs2: Add a mount option "coherency=*" to handle cluster coherency for O_DIRECT writes.
Initialize max_slots early
When I tried to compile I got the following warning: fs/ocfs2/slot_map.c: In function ‘ocfs2_init_slot_info’: fs/ocfs2/slot_map.c:360: warning: ‘bytes’ may be used uninitialized in this function fs/ocfs2/slot_map.c:360: note: ‘bytes’ was declared here Compiler: gcc version 4.4.3 (GCC) on Mandriva I'm not sure why this warning occurs, I think compiler don't know that variable "bytes" is initialized when it is sent by reference to ocfs2_slot_map_physical_size and it throws that ugly warning. However, a simple initialization of "bytes" variable with 0 will fix it.
ocfs2: validate bg_free_bits_count after update
ocfs2/cluster: Bump up dlm protocol to version 1.1
ocfs2/cluster: Show per region heartbeat elapsed time
ocfs2/cluster: Add mlogs for heartbeat up/down events
ocfs2/cluster: Create debugfs dir/files for each region
ocfs2/cluster: Create debugfs files for live, quorum and failed region bitmaps
ocfs2/cluster: Maintain bitmap of failed regions
ocfs2/cluster: Maintain bitmap of quorum regions
ocfs2/cluster: Track bitmap of live heartbeat regions
ocfs2/cluster: Track number of global heartbeat regions
ocfs2/cluster: Maintain live node bitmap per heartbeat region
ocfs2/cluster: Reorganize o2hb debugfs init
ocfs2/cluster: Check slots for unconfigured live nodes
ocfs2/cluster: Print messages when adding/removing nodes
ocfs2/cluster: Print messages when adding/removing heartbeat regions
...
39 files changed, 1894 insertions, 173 deletions
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 1f7ae144f6d8..5393e6611691 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt | |||
@@ -87,3 +87,10 @@ dir_resv_level= (*) By default, directory reservations will scale with file | |||
87 | reservations - users should rarely need to change this | 87 | reservations - users should rarely need to change this |
88 | value. If allocation reservations are turned off, this | 88 | value. If allocation reservations are turned off, this |
89 | option will have no effect. | 89 | option will have no effect. |
90 | coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode | ||
91 | lock will be taken to force other nodes drop cache, | ||
92 | therefore full cluster coherency is guaranteed even | ||
93 | for O_DIRECT writes. | ||
94 | coherency=buffered Allow concurrent O_DIRECT writes without EX lock among | ||
95 | nodes, which gains high performance at risk of getting | ||
96 | stale data on other nodes. | ||
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 5dbf4dba03c4..a367dd044280 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -1849,8 +1849,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1849 | goto failed_mount; | 1849 | goto failed_mount; |
1850 | } | 1850 | } |
1851 | 1851 | ||
1852 | if (le32_to_cpu(es->s_blocks_count) > | 1852 | if (generic_check_addressable(sb->s_blocksize_bits, |
1853 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { | 1853 | le32_to_cpu(es->s_blocks_count))) { |
1854 | ext3_msg(sb, KERN_ERR, | 1854 | ext3_msg(sb, KERN_ERR, |
1855 | "error: filesystem is too large to mount safely"); | 1855 | "error: filesystem is too large to mount safely"); |
1856 | if (sizeof(sector_t) < 8) | 1856 | if (sizeof(sector_t) < 8) |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 26147746c272..7f47c366bf15 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -2831,15 +2831,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2831 | * Test whether we have more sectors than will fit in sector_t, | 2831 | * Test whether we have more sectors than will fit in sector_t, |
2832 | * and whether the max offset is addressable by the page cache. | 2832 | * and whether the max offset is addressable by the page cache. |
2833 | */ | 2833 | */ |
2834 | if ((ext4_blocks_count(es) > | 2834 | ret = generic_check_addressable(sb->s_blocksize_bits, |
2835 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || | 2835 | ext4_blocks_count(es)); |
2836 | (ext4_blocks_count(es) > | 2836 | if (ret) { |
2837 | (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { | ||
2838 | ext4_msg(sb, KERN_ERR, "filesystem" | 2837 | ext4_msg(sb, KERN_ERR, "filesystem" |
2839 | " too large to mount safely on this system"); | 2838 | " too large to mount safely on this system"); |
2840 | if (sizeof(sector_t) < 8) | 2839 | if (sizeof(sector_t) < 8) |
2841 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); | 2840 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); |
2842 | ret = -EFBIG; | ||
2843 | goto failed_mount; | 2841 | goto failed_mount; |
2844 | } | 2842 | } |
2845 | 2843 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0e8014ea6b94..262419f83d80 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -1371,6 +1371,10 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, | |||
1371 | 1371 | ||
1372 | if (!compat && !ro && !incompat) | 1372 | if (!compat && !ro && !incompat) |
1373 | return 1; | 1373 | return 1; |
1374 | /* Load journal superblock if it is not loaded yet. */ | ||
1375 | if (journal->j_format_version == 0 && | ||
1376 | journal_get_superblock(journal) != 0) | ||
1377 | return 0; | ||
1374 | if (journal->j_format_version == 1) | 1378 | if (journal->j_format_version == 1) |
1375 | return 0; | 1379 | return 0; |
1376 | 1380 | ||
diff --git a/fs/libfs.c b/fs/libfs.c index 0a9da95317f7..62baa0387d6e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -913,6 +913,35 @@ int generic_file_fsync(struct file *file, int datasync) | |||
913 | } | 913 | } |
914 | EXPORT_SYMBOL(generic_file_fsync); | 914 | EXPORT_SYMBOL(generic_file_fsync); |
915 | 915 | ||
916 | /** | ||
917 | * generic_check_addressable - Check addressability of file system | ||
918 | * @blocksize_bits: log of file system block size | ||
919 | * @num_blocks: number of blocks in file system | ||
920 | * | ||
921 | * Determine whether a file system with @num_blocks blocks (and a | ||
922 | * block size of 2**@blocksize_bits) is addressable by the sector_t | ||
923 | * and page cache of the system. Return 0 if so and -EFBIG otherwise. | ||
924 | */ | ||
925 | int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) | ||
926 | { | ||
927 | u64 last_fs_block = num_blocks - 1; | ||
928 | u64 last_fs_page = | ||
929 | last_fs_block >> (PAGE_CACHE_SHIFT - blocksize_bits); | ||
930 | |||
931 | if (unlikely(num_blocks == 0)) | ||
932 | return 0; | ||
933 | |||
934 | if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT)) | ||
935 | return -EINVAL; | ||
936 | |||
937 | if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) || | ||
938 | (last_fs_page > (pgoff_t)(~0ULL))) { | ||
939 | return -EFBIG; | ||
940 | } | ||
941 | return 0; | ||
942 | } | ||
943 | EXPORT_SYMBOL(generic_check_addressable); | ||
944 | |||
916 | /* | 945 | /* |
917 | * No-op implementation of ->fsync for in-memory filesystems. | 946 | * No-op implementation of ->fsync for in-memory filesystems. |
918 | */ | 947 | */ |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 0de69c9a08be..5cfeee118158 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -883,8 +883,8 @@ struct ocfs2_write_ctxt { | |||
883 | * out in so that future reads from that region will get | 883 | * out in so that future reads from that region will get |
884 | * zero's. | 884 | * zero's. |
885 | */ | 885 | */ |
886 | struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; | ||
887 | unsigned int w_num_pages; | 886 | unsigned int w_num_pages; |
887 | struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; | ||
888 | struct page *w_target_page; | 888 | struct page *w_target_page; |
889 | 889 | ||
890 | /* | 890 | /* |
@@ -1642,7 +1642,8 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, | |||
1642 | return ret; | 1642 | return ret; |
1643 | } | 1643 | } |
1644 | 1644 | ||
1645 | int ocfs2_write_begin_nolock(struct address_space *mapping, | 1645 | int ocfs2_write_begin_nolock(struct file *filp, |
1646 | struct address_space *mapping, | ||
1646 | loff_t pos, unsigned len, unsigned flags, | 1647 | loff_t pos, unsigned len, unsigned flags, |
1647 | struct page **pagep, void **fsdata, | 1648 | struct page **pagep, void **fsdata, |
1648 | struct buffer_head *di_bh, struct page *mmap_page) | 1649 | struct buffer_head *di_bh, struct page *mmap_page) |
@@ -1692,7 +1693,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1692 | mlog_errno(ret); | 1693 | mlog_errno(ret); |
1693 | goto out; | 1694 | goto out; |
1694 | } else if (ret == 1) { | 1695 | } else if (ret == 1) { |
1695 | ret = ocfs2_refcount_cow(inode, di_bh, | 1696 | ret = ocfs2_refcount_cow(inode, filp, di_bh, |
1696 | wc->w_cpos, wc->w_clen, UINT_MAX); | 1697 | wc->w_cpos, wc->w_clen, UINT_MAX); |
1697 | if (ret) { | 1698 | if (ret) { |
1698 | mlog_errno(ret); | 1699 | mlog_errno(ret); |
@@ -1854,7 +1855,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
1854 | */ | 1855 | */ |
1855 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 1856 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
1856 | 1857 | ||
1857 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, | 1858 | ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep, |
1858 | fsdata, di_bh, NULL); | 1859 | fsdata, di_bh, NULL); |
1859 | if (ret) { | 1860 | if (ret) { |
1860 | mlog_errno(ret); | 1861 | mlog_errno(ret); |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index c48e93ffc513..7606f663da6d 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
@@ -48,7 +48,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping, | |||
48 | loff_t pos, unsigned len, unsigned copied, | 48 | loff_t pos, unsigned len, unsigned copied, |
49 | struct page *page, void *fsdata); | 49 | struct page *page, void *fsdata); |
50 | 50 | ||
51 | int ocfs2_write_begin_nolock(struct address_space *mapping, | 51 | int ocfs2_write_begin_nolock(struct file *filp, |
52 | struct address_space *mapping, | ||
52 | loff_t pos, unsigned len, unsigned flags, | 53 | loff_t pos, unsigned len, unsigned flags, |
53 | struct page **pagep, void **fsdata, | 54 | struct page **pagep, void **fsdata, |
54 | struct buffer_head *di_bh, struct page *mmap_page); | 55 | struct buffer_head *di_bh, struct page *mmap_page); |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 41d5f1f92d56..52c7557f3e25 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -62,10 +62,51 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | |||
62 | static LIST_HEAD(o2hb_node_events); | 62 | static LIST_HEAD(o2hb_node_events); |
63 | static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); | 63 | static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); |
64 | 64 | ||
65 | /* | ||
66 | * In global heartbeat, we maintain a series of region bitmaps. | ||
67 | * - o2hb_region_bitmap allows us to limit the region number to max region. | ||
68 | * - o2hb_live_region_bitmap tracks live regions (seen steady iterations). | ||
69 | * - o2hb_quorum_region_bitmap tracks live regions that have seen all nodes | ||
70 | * heartbeat on it. | ||
71 | * - o2hb_failed_region_bitmap tracks the regions that have seen io timeouts. | ||
72 | */ | ||
73 | static unsigned long o2hb_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
74 | static unsigned long o2hb_live_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
75 | static unsigned long o2hb_quorum_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
76 | static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
77 | |||
78 | #define O2HB_DB_TYPE_LIVENODES 0 | ||
79 | #define O2HB_DB_TYPE_LIVEREGIONS 1 | ||
80 | #define O2HB_DB_TYPE_QUORUMREGIONS 2 | ||
81 | #define O2HB_DB_TYPE_FAILEDREGIONS 3 | ||
82 | #define O2HB_DB_TYPE_REGION_LIVENODES 4 | ||
83 | #define O2HB_DB_TYPE_REGION_NUMBER 5 | ||
84 | #define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6 | ||
85 | struct o2hb_debug_buf { | ||
86 | int db_type; | ||
87 | int db_size; | ||
88 | int db_len; | ||
89 | void *db_data; | ||
90 | }; | ||
91 | |||
92 | static struct o2hb_debug_buf *o2hb_db_livenodes; | ||
93 | static struct o2hb_debug_buf *o2hb_db_liveregions; | ||
94 | static struct o2hb_debug_buf *o2hb_db_quorumregions; | ||
95 | static struct o2hb_debug_buf *o2hb_db_failedregions; | ||
96 | |||
65 | #define O2HB_DEBUG_DIR "o2hb" | 97 | #define O2HB_DEBUG_DIR "o2hb" |
66 | #define O2HB_DEBUG_LIVENODES "livenodes" | 98 | #define O2HB_DEBUG_LIVENODES "livenodes" |
99 | #define O2HB_DEBUG_LIVEREGIONS "live_regions" | ||
100 | #define O2HB_DEBUG_QUORUMREGIONS "quorum_regions" | ||
101 | #define O2HB_DEBUG_FAILEDREGIONS "failed_regions" | ||
102 | #define O2HB_DEBUG_REGION_NUMBER "num" | ||
103 | #define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms" | ||
104 | |||
67 | static struct dentry *o2hb_debug_dir; | 105 | static struct dentry *o2hb_debug_dir; |
68 | static struct dentry *o2hb_debug_livenodes; | 106 | static struct dentry *o2hb_debug_livenodes; |
107 | static struct dentry *o2hb_debug_liveregions; | ||
108 | static struct dentry *o2hb_debug_quorumregions; | ||
109 | static struct dentry *o2hb_debug_failedregions; | ||
69 | 110 | ||
70 | static LIST_HEAD(o2hb_all_regions); | 111 | static LIST_HEAD(o2hb_all_regions); |
71 | 112 | ||
@@ -77,7 +118,19 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type); | |||
77 | 118 | ||
78 | #define O2HB_DEFAULT_BLOCK_BITS 9 | 119 | #define O2HB_DEFAULT_BLOCK_BITS 9 |
79 | 120 | ||
121 | enum o2hb_heartbeat_modes { | ||
122 | O2HB_HEARTBEAT_LOCAL = 0, | ||
123 | O2HB_HEARTBEAT_GLOBAL, | ||
124 | O2HB_HEARTBEAT_NUM_MODES, | ||
125 | }; | ||
126 | |||
127 | char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = { | ||
128 | "local", /* O2HB_HEARTBEAT_LOCAL */ | ||
129 | "global", /* O2HB_HEARTBEAT_GLOBAL */ | ||
130 | }; | ||
131 | |||
80 | unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; | 132 | unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; |
133 | unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; | ||
81 | 134 | ||
82 | /* Only sets a new threshold if there are no active regions. | 135 | /* Only sets a new threshold if there are no active regions. |
83 | * | 136 | * |
@@ -94,6 +147,22 @@ static void o2hb_dead_threshold_set(unsigned int threshold) | |||
94 | } | 147 | } |
95 | } | 148 | } |
96 | 149 | ||
150 | static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode) | ||
151 | { | ||
152 | int ret = -1; | ||
153 | |||
154 | if (hb_mode < O2HB_HEARTBEAT_NUM_MODES) { | ||
155 | spin_lock(&o2hb_live_lock); | ||
156 | if (list_empty(&o2hb_all_regions)) { | ||
157 | o2hb_heartbeat_mode = hb_mode; | ||
158 | ret = 0; | ||
159 | } | ||
160 | spin_unlock(&o2hb_live_lock); | ||
161 | } | ||
162 | |||
163 | return ret; | ||
164 | } | ||
165 | |||
97 | struct o2hb_node_event { | 166 | struct o2hb_node_event { |
98 | struct list_head hn_item; | 167 | struct list_head hn_item; |
99 | enum o2hb_callback_type hn_event_type; | 168 | enum o2hb_callback_type hn_event_type; |
@@ -135,6 +204,18 @@ struct o2hb_region { | |||
135 | struct block_device *hr_bdev; | 204 | struct block_device *hr_bdev; |
136 | struct o2hb_disk_slot *hr_slots; | 205 | struct o2hb_disk_slot *hr_slots; |
137 | 206 | ||
207 | /* live node map of this region */ | ||
208 | unsigned long hr_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
209 | unsigned int hr_region_num; | ||
210 | |||
211 | struct dentry *hr_debug_dir; | ||
212 | struct dentry *hr_debug_livenodes; | ||
213 | struct dentry *hr_debug_regnum; | ||
214 | struct dentry *hr_debug_elapsed_time; | ||
215 | struct o2hb_debug_buf *hr_db_livenodes; | ||
216 | struct o2hb_debug_buf *hr_db_regnum; | ||
217 | struct o2hb_debug_buf *hr_db_elapsed_time; | ||
218 | |||
138 | /* let the person setting up hb wait for it to return until it | 219 | /* let the person setting up hb wait for it to return until it |
139 | * has reached a 'steady' state. This will be fixed when we have | 220 | * has reached a 'steady' state. This will be fixed when we have |
140 | * a more complete api that doesn't lead to this sort of fragility. */ | 221 | * a more complete api that doesn't lead to this sort of fragility. */ |
@@ -163,8 +244,19 @@ struct o2hb_bio_wait_ctxt { | |||
163 | int wc_error; | 244 | int wc_error; |
164 | }; | 245 | }; |
165 | 246 | ||
247 | static int o2hb_pop_count(void *map, int count) | ||
248 | { | ||
249 | int i = -1, pop = 0; | ||
250 | |||
251 | while ((i = find_next_bit(map, count, i + 1)) < count) | ||
252 | pop++; | ||
253 | return pop; | ||
254 | } | ||
255 | |||
166 | static void o2hb_write_timeout(struct work_struct *work) | 256 | static void o2hb_write_timeout(struct work_struct *work) |
167 | { | 257 | { |
258 | int failed, quorum; | ||
259 | unsigned long flags; | ||
168 | struct o2hb_region *reg = | 260 | struct o2hb_region *reg = |
169 | container_of(work, struct o2hb_region, | 261 | container_of(work, struct o2hb_region, |
170 | hr_write_timeout_work.work); | 262 | hr_write_timeout_work.work); |
@@ -172,6 +264,28 @@ static void o2hb_write_timeout(struct work_struct *work) | |||
172 | mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " | 264 | mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " |
173 | "milliseconds\n", reg->hr_dev_name, | 265 | "milliseconds\n", reg->hr_dev_name, |
174 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); | 266 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); |
267 | |||
268 | if (o2hb_global_heartbeat_active()) { | ||
269 | spin_lock_irqsave(&o2hb_live_lock, flags); | ||
270 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
271 | set_bit(reg->hr_region_num, o2hb_failed_region_bitmap); | ||
272 | failed = o2hb_pop_count(&o2hb_failed_region_bitmap, | ||
273 | O2NM_MAX_REGIONS); | ||
274 | quorum = o2hb_pop_count(&o2hb_quorum_region_bitmap, | ||
275 | O2NM_MAX_REGIONS); | ||
276 | spin_unlock_irqrestore(&o2hb_live_lock, flags); | ||
277 | |||
278 | mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n", | ||
279 | quorum, failed); | ||
280 | |||
281 | /* | ||
282 | * Fence if the number of failed regions >= half the number | ||
283 | * of quorum regions | ||
284 | */ | ||
285 | if ((failed << 1) < quorum) | ||
286 | return; | ||
287 | } | ||
288 | |||
175 | o2quo_disk_timeout(); | 289 | o2quo_disk_timeout(); |
176 | } | 290 | } |
177 | 291 | ||
@@ -180,6 +294,11 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg) | |||
180 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", | 294 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", |
181 | O2HB_MAX_WRITE_TIMEOUT_MS); | 295 | O2HB_MAX_WRITE_TIMEOUT_MS); |
182 | 296 | ||
297 | if (o2hb_global_heartbeat_active()) { | ||
298 | spin_lock(&o2hb_live_lock); | ||
299 | clear_bit(reg->hr_region_num, o2hb_failed_region_bitmap); | ||
300 | spin_unlock(&o2hb_live_lock); | ||
301 | } | ||
183 | cancel_delayed_work(®->hr_write_timeout_work); | 302 | cancel_delayed_work(®->hr_write_timeout_work); |
184 | reg->hr_last_timeout_start = jiffies; | 303 | reg->hr_last_timeout_start = jiffies; |
185 | schedule_delayed_work(®->hr_write_timeout_work, | 304 | schedule_delayed_work(®->hr_write_timeout_work, |
@@ -513,6 +632,8 @@ static void o2hb_queue_node_event(struct o2hb_node_event *event, | |||
513 | { | 632 | { |
514 | assert_spin_locked(&o2hb_live_lock); | 633 | assert_spin_locked(&o2hb_live_lock); |
515 | 634 | ||
635 | BUG_ON((!node) && (type != O2HB_NODE_DOWN_CB)); | ||
636 | |||
516 | event->hn_event_type = type; | 637 | event->hn_event_type = type; |
517 | event->hn_node = node; | 638 | event->hn_node = node; |
518 | event->hn_node_num = node_num; | 639 | event->hn_node_num = node_num; |
@@ -554,6 +675,35 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) | |||
554 | o2nm_node_put(node); | 675 | o2nm_node_put(node); |
555 | } | 676 | } |
556 | 677 | ||
678 | static void o2hb_set_quorum_device(struct o2hb_region *reg, | ||
679 | struct o2hb_disk_slot *slot) | ||
680 | { | ||
681 | assert_spin_locked(&o2hb_live_lock); | ||
682 | |||
683 | if (!o2hb_global_heartbeat_active()) | ||
684 | return; | ||
685 | |||
686 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
687 | return; | ||
688 | |||
689 | /* | ||
690 | * A region can be added to the quorum only when it sees all | ||
691 | * live nodes heartbeat on it. In other words, the region has been | ||
692 | * added to all nodes. | ||
693 | */ | ||
694 | if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, | ||
695 | sizeof(o2hb_live_node_bitmap))) | ||
696 | return; | ||
697 | |||
698 | if (slot->ds_changed_samples < O2HB_LIVE_THRESHOLD) | ||
699 | return; | ||
700 | |||
701 | printk(KERN_NOTICE "o2hb: Region %s is now a quorum device\n", | ||
702 | config_item_name(®->hr_item)); | ||
703 | |||
704 | set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); | ||
705 | } | ||
706 | |||
557 | static int o2hb_check_slot(struct o2hb_region *reg, | 707 | static int o2hb_check_slot(struct o2hb_region *reg, |
558 | struct o2hb_disk_slot *slot) | 708 | struct o2hb_disk_slot *slot) |
559 | { | 709 | { |
@@ -565,14 +715,22 @@ static int o2hb_check_slot(struct o2hb_region *reg, | |||
565 | u64 cputime; | 715 | u64 cputime; |
566 | unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; | 716 | unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; |
567 | unsigned int slot_dead_ms; | 717 | unsigned int slot_dead_ms; |
718 | int tmp; | ||
568 | 719 | ||
569 | memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); | 720 | memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); |
570 | 721 | ||
571 | /* Is this correct? Do we assume that the node doesn't exist | 722 | /* |
572 | * if we're not configured for him? */ | 723 | * If a node is no longer configured but is still in the livemap, we |
724 | * may need to clear that bit from the livemap. | ||
725 | */ | ||
573 | node = o2nm_get_node_by_num(slot->ds_node_num); | 726 | node = o2nm_get_node_by_num(slot->ds_node_num); |
574 | if (!node) | 727 | if (!node) { |
575 | return 0; | 728 | spin_lock(&o2hb_live_lock); |
729 | tmp = test_bit(slot->ds_node_num, o2hb_live_node_bitmap); | ||
730 | spin_unlock(&o2hb_live_lock); | ||
731 | if (!tmp) | ||
732 | return 0; | ||
733 | } | ||
576 | 734 | ||
577 | if (!o2hb_verify_crc(reg, hb_block)) { | 735 | if (!o2hb_verify_crc(reg, hb_block)) { |
578 | /* all paths from here will drop o2hb_live_lock for | 736 | /* all paths from here will drop o2hb_live_lock for |
@@ -639,8 +797,12 @@ fire_callbacks: | |||
639 | mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n", | 797 | mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n", |
640 | slot->ds_node_num, (long long)slot->ds_last_generation); | 798 | slot->ds_node_num, (long long)slot->ds_last_generation); |
641 | 799 | ||
800 | set_bit(slot->ds_node_num, reg->hr_live_node_bitmap); | ||
801 | |||
642 | /* first on the list generates a callback */ | 802 | /* first on the list generates a callback */ |
643 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { | 803 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { |
804 | mlog(ML_HEARTBEAT, "o2hb: Add node %d to live nodes " | ||
805 | "bitmap\n", slot->ds_node_num); | ||
644 | set_bit(slot->ds_node_num, o2hb_live_node_bitmap); | 806 | set_bit(slot->ds_node_num, o2hb_live_node_bitmap); |
645 | 807 | ||
646 | o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node, | 808 | o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node, |
@@ -684,13 +846,18 @@ fire_callbacks: | |||
684 | mlog(ML_HEARTBEAT, "Node %d left my region\n", | 846 | mlog(ML_HEARTBEAT, "Node %d left my region\n", |
685 | slot->ds_node_num); | 847 | slot->ds_node_num); |
686 | 848 | ||
849 | clear_bit(slot->ds_node_num, reg->hr_live_node_bitmap); | ||
850 | |||
687 | /* last off the live_slot generates a callback */ | 851 | /* last off the live_slot generates a callback */ |
688 | list_del_init(&slot->ds_live_item); | 852 | list_del_init(&slot->ds_live_item); |
689 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { | 853 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { |
854 | mlog(ML_HEARTBEAT, "o2hb: Remove node %d from live " | ||
855 | "nodes bitmap\n", slot->ds_node_num); | ||
690 | clear_bit(slot->ds_node_num, o2hb_live_node_bitmap); | 856 | clear_bit(slot->ds_node_num, o2hb_live_node_bitmap); |
691 | 857 | ||
692 | o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node, | 858 | /* node can be null */ |
693 | slot->ds_node_num); | 859 | o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, |
860 | node, slot->ds_node_num); | ||
694 | 861 | ||
695 | changed = 1; | 862 | changed = 1; |
696 | } | 863 | } |
@@ -706,11 +873,14 @@ fire_callbacks: | |||
706 | slot->ds_equal_samples = 0; | 873 | slot->ds_equal_samples = 0; |
707 | } | 874 | } |
708 | out: | 875 | out: |
876 | o2hb_set_quorum_device(reg, slot); | ||
877 | |||
709 | spin_unlock(&o2hb_live_lock); | 878 | spin_unlock(&o2hb_live_lock); |
710 | 879 | ||
711 | o2hb_run_event_list(&event); | 880 | o2hb_run_event_list(&event); |
712 | 881 | ||
713 | o2nm_node_put(node); | 882 | if (node) |
883 | o2nm_node_put(node); | ||
714 | return changed; | 884 | return changed; |
715 | } | 885 | } |
716 | 886 | ||
@@ -737,6 +907,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
737 | { | 907 | { |
738 | int i, ret, highest_node, change = 0; | 908 | int i, ret, highest_node, change = 0; |
739 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 909 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
910 | unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
740 | struct o2hb_bio_wait_ctxt write_wc; | 911 | struct o2hb_bio_wait_ctxt write_wc; |
741 | 912 | ||
742 | ret = o2nm_configured_node_map(configured_nodes, | 913 | ret = o2nm_configured_node_map(configured_nodes, |
@@ -746,6 +917,17 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
746 | return ret; | 917 | return ret; |
747 | } | 918 | } |
748 | 919 | ||
920 | /* | ||
921 | * If a node is not configured but is in the livemap, we still need | ||
922 | * to read the slot so as to be able to remove it from the livemap. | ||
923 | */ | ||
924 | o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap)); | ||
925 | i = -1; | ||
926 | while ((i = find_next_bit(live_node_bitmap, | ||
927 | O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { | ||
928 | set_bit(i, configured_nodes); | ||
929 | } | ||
930 | |||
749 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); | 931 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); |
750 | if (highest_node >= O2NM_MAX_NODES) { | 932 | if (highest_node >= O2NM_MAX_NODES) { |
751 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); | 933 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); |
@@ -917,21 +1099,59 @@ static int o2hb_thread(void *data) | |||
917 | #ifdef CONFIG_DEBUG_FS | 1099 | #ifdef CONFIG_DEBUG_FS |
918 | static int o2hb_debug_open(struct inode *inode, struct file *file) | 1100 | static int o2hb_debug_open(struct inode *inode, struct file *file) |
919 | { | 1101 | { |
1102 | struct o2hb_debug_buf *db = inode->i_private; | ||
1103 | struct o2hb_region *reg; | ||
920 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 1104 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
921 | char *buf = NULL; | 1105 | char *buf = NULL; |
922 | int i = -1; | 1106 | int i = -1; |
923 | int out = 0; | 1107 | int out = 0; |
924 | 1108 | ||
1109 | /* max_nodes should be the largest bitmap we pass here */ | ||
1110 | BUG_ON(sizeof(map) < db->db_size); | ||
1111 | |||
925 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | 1112 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
926 | if (!buf) | 1113 | if (!buf) |
927 | goto bail; | 1114 | goto bail; |
928 | 1115 | ||
929 | o2hb_fill_node_map(map, sizeof(map)); | 1116 | switch (db->db_type) { |
1117 | case O2HB_DB_TYPE_LIVENODES: | ||
1118 | case O2HB_DB_TYPE_LIVEREGIONS: | ||
1119 | case O2HB_DB_TYPE_QUORUMREGIONS: | ||
1120 | case O2HB_DB_TYPE_FAILEDREGIONS: | ||
1121 | spin_lock(&o2hb_live_lock); | ||
1122 | memcpy(map, db->db_data, db->db_size); | ||
1123 | spin_unlock(&o2hb_live_lock); | ||
1124 | break; | ||
1125 | |||
1126 | case O2HB_DB_TYPE_REGION_LIVENODES: | ||
1127 | spin_lock(&o2hb_live_lock); | ||
1128 | reg = (struct o2hb_region *)db->db_data; | ||
1129 | memcpy(map, reg->hr_live_node_bitmap, db->db_size); | ||
1130 | spin_unlock(&o2hb_live_lock); | ||
1131 | break; | ||
1132 | |||
1133 | case O2HB_DB_TYPE_REGION_NUMBER: | ||
1134 | reg = (struct o2hb_region *)db->db_data; | ||
1135 | out += snprintf(buf + out, PAGE_SIZE - out, "%d\n", | ||
1136 | reg->hr_region_num); | ||
1137 | goto done; | ||
1138 | |||
1139 | case O2HB_DB_TYPE_REGION_ELAPSED_TIME: | ||
1140 | reg = (struct o2hb_region *)db->db_data; | ||
1141 | out += snprintf(buf + out, PAGE_SIZE - out, "%u\n", | ||
1142 | jiffies_to_msecs(jiffies - | ||
1143 | reg->hr_last_timeout_start)); | ||
1144 | goto done; | ||
1145 | |||
1146 | default: | ||
1147 | goto done; | ||
1148 | } | ||
930 | 1149 | ||
931 | while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) | 1150 | while ((i = find_next_bit(map, db->db_len, i + 1)) < db->db_len) |
932 | out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); | 1151 | out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); |
933 | out += snprintf(buf + out, PAGE_SIZE - out, "\n"); | 1152 | out += snprintf(buf + out, PAGE_SIZE - out, "\n"); |
934 | 1153 | ||
1154 | done: | ||
935 | i_size_write(inode, out); | 1155 | i_size_write(inode, out); |
936 | 1156 | ||
937 | file->private_data = buf; | 1157 | file->private_data = buf; |
@@ -978,10 +1198,104 @@ static const struct file_operations o2hb_debug_fops = { | |||
978 | 1198 | ||
979 | void o2hb_exit(void) | 1199 | void o2hb_exit(void) |
980 | { | 1200 | { |
981 | if (o2hb_debug_livenodes) | 1201 | kfree(o2hb_db_livenodes); |
982 | debugfs_remove(o2hb_debug_livenodes); | 1202 | kfree(o2hb_db_liveregions); |
983 | if (o2hb_debug_dir) | 1203 | kfree(o2hb_db_quorumregions); |
984 | debugfs_remove(o2hb_debug_dir); | 1204 | kfree(o2hb_db_failedregions); |
1205 | debugfs_remove(o2hb_debug_failedregions); | ||
1206 | debugfs_remove(o2hb_debug_quorumregions); | ||
1207 | debugfs_remove(o2hb_debug_liveregions); | ||
1208 | debugfs_remove(o2hb_debug_livenodes); | ||
1209 | debugfs_remove(o2hb_debug_dir); | ||
1210 | } | ||
1211 | |||
1212 | static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, | ||
1213 | struct o2hb_debug_buf **db, int db_len, | ||
1214 | int type, int size, int len, void *data) | ||
1215 | { | ||
1216 | *db = kmalloc(db_len, GFP_KERNEL); | ||
1217 | if (!*db) | ||
1218 | return NULL; | ||
1219 | |||
1220 | (*db)->db_type = type; | ||
1221 | (*db)->db_size = size; | ||
1222 | (*db)->db_len = len; | ||
1223 | (*db)->db_data = data; | ||
1224 | |||
1225 | return debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db, | ||
1226 | &o2hb_debug_fops); | ||
1227 | } | ||
1228 | |||
1229 | static int o2hb_debug_init(void) | ||
1230 | { | ||
1231 | int ret = -ENOMEM; | ||
1232 | |||
1233 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | ||
1234 | if (!o2hb_debug_dir) { | ||
1235 | mlog_errno(ret); | ||
1236 | goto bail; | ||
1237 | } | ||
1238 | |||
1239 | o2hb_debug_livenodes = o2hb_debug_create(O2HB_DEBUG_LIVENODES, | ||
1240 | o2hb_debug_dir, | ||
1241 | &o2hb_db_livenodes, | ||
1242 | sizeof(*o2hb_db_livenodes), | ||
1243 | O2HB_DB_TYPE_LIVENODES, | ||
1244 | sizeof(o2hb_live_node_bitmap), | ||
1245 | O2NM_MAX_NODES, | ||
1246 | o2hb_live_node_bitmap); | ||
1247 | if (!o2hb_debug_livenodes) { | ||
1248 | mlog_errno(ret); | ||
1249 | goto bail; | ||
1250 | } | ||
1251 | |||
1252 | o2hb_debug_liveregions = o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, | ||
1253 | o2hb_debug_dir, | ||
1254 | &o2hb_db_liveregions, | ||
1255 | sizeof(*o2hb_db_liveregions), | ||
1256 | O2HB_DB_TYPE_LIVEREGIONS, | ||
1257 | sizeof(o2hb_live_region_bitmap), | ||
1258 | O2NM_MAX_REGIONS, | ||
1259 | o2hb_live_region_bitmap); | ||
1260 | if (!o2hb_debug_liveregions) { | ||
1261 | mlog_errno(ret); | ||
1262 | goto bail; | ||
1263 | } | ||
1264 | |||
1265 | o2hb_debug_quorumregions = | ||
1266 | o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, | ||
1267 | o2hb_debug_dir, | ||
1268 | &o2hb_db_quorumregions, | ||
1269 | sizeof(*o2hb_db_quorumregions), | ||
1270 | O2HB_DB_TYPE_QUORUMREGIONS, | ||
1271 | sizeof(o2hb_quorum_region_bitmap), | ||
1272 | O2NM_MAX_REGIONS, | ||
1273 | o2hb_quorum_region_bitmap); | ||
1274 | if (!o2hb_debug_quorumregions) { | ||
1275 | mlog_errno(ret); | ||
1276 | goto bail; | ||
1277 | } | ||
1278 | |||
1279 | o2hb_debug_failedregions = | ||
1280 | o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, | ||
1281 | o2hb_debug_dir, | ||
1282 | &o2hb_db_failedregions, | ||
1283 | sizeof(*o2hb_db_failedregions), | ||
1284 | O2HB_DB_TYPE_FAILEDREGIONS, | ||
1285 | sizeof(o2hb_failed_region_bitmap), | ||
1286 | O2NM_MAX_REGIONS, | ||
1287 | o2hb_failed_region_bitmap); | ||
1288 | if (!o2hb_debug_failedregions) { | ||
1289 | mlog_errno(ret); | ||
1290 | goto bail; | ||
1291 | } | ||
1292 | |||
1293 | ret = 0; | ||
1294 | bail: | ||
1295 | if (ret) | ||
1296 | o2hb_exit(); | ||
1297 | |||
1298 | return ret; | ||
985 | } | 1299 | } |
986 | 1300 | ||
987 | int o2hb_init(void) | 1301 | int o2hb_init(void) |
@@ -997,24 +1311,12 @@ int o2hb_init(void) | |||
997 | INIT_LIST_HEAD(&o2hb_node_events); | 1311 | INIT_LIST_HEAD(&o2hb_node_events); |
998 | 1312 | ||
999 | memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); | 1313 | memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); |
1314 | memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap)); | ||
1315 | memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap)); | ||
1316 | memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap)); | ||
1317 | memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap)); | ||
1000 | 1318 | ||
1001 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | 1319 | return o2hb_debug_init(); |
1002 | if (!o2hb_debug_dir) { | ||
1003 | mlog_errno(-ENOMEM); | ||
1004 | return -ENOMEM; | ||
1005 | } | ||
1006 | |||
1007 | o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES, | ||
1008 | S_IFREG|S_IRUSR, | ||
1009 | o2hb_debug_dir, NULL, | ||
1010 | &o2hb_debug_fops); | ||
1011 | if (!o2hb_debug_livenodes) { | ||
1012 | mlog_errno(-ENOMEM); | ||
1013 | debugfs_remove(o2hb_debug_dir); | ||
1014 | return -ENOMEM; | ||
1015 | } | ||
1016 | |||
1017 | return 0; | ||
1018 | } | 1320 | } |
1019 | 1321 | ||
1020 | /* if we're already in a callback then we're already serialized by the sem */ | 1322 | /* if we're already in a callback then we're already serialized by the sem */ |
@@ -1078,6 +1380,13 @@ static void o2hb_region_release(struct config_item *item) | |||
1078 | if (reg->hr_slots) | 1380 | if (reg->hr_slots) |
1079 | kfree(reg->hr_slots); | 1381 | kfree(reg->hr_slots); |
1080 | 1382 | ||
1383 | kfree(reg->hr_db_regnum); | ||
1384 | kfree(reg->hr_db_livenodes); | ||
1385 | debugfs_remove(reg->hr_debug_livenodes); | ||
1386 | debugfs_remove(reg->hr_debug_regnum); | ||
1387 | debugfs_remove(reg->hr_debug_elapsed_time); | ||
1388 | debugfs_remove(reg->hr_debug_dir); | ||
1389 | |||
1081 | spin_lock(&o2hb_live_lock); | 1390 | spin_lock(&o2hb_live_lock); |
1082 | list_del(®->hr_all_item); | 1391 | list_del(®->hr_all_item); |
1083 | spin_unlock(&o2hb_live_lock); | 1392 | spin_unlock(&o2hb_live_lock); |
@@ -1441,6 +1750,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1441 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ | 1750 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ |
1442 | spin_lock(&o2hb_live_lock); | 1751 | spin_lock(&o2hb_live_lock); |
1443 | hb_task = reg->hr_task; | 1752 | hb_task = reg->hr_task; |
1753 | if (o2hb_global_heartbeat_active()) | ||
1754 | set_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
1444 | spin_unlock(&o2hb_live_lock); | 1755 | spin_unlock(&o2hb_live_lock); |
1445 | 1756 | ||
1446 | if (hb_task) | 1757 | if (hb_task) |
@@ -1448,6 +1759,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1448 | else | 1759 | else |
1449 | ret = -EIO; | 1760 | ret = -EIO; |
1450 | 1761 | ||
1762 | if (hb_task && o2hb_global_heartbeat_active()) | ||
1763 | printk(KERN_NOTICE "o2hb: Heartbeat started on region %s\n", | ||
1764 | config_item_name(®->hr_item)); | ||
1765 | |||
1451 | out: | 1766 | out: |
1452 | if (filp) | 1767 | if (filp) |
1453 | fput(filp); | 1768 | fput(filp); |
@@ -1586,21 +1901,94 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group | |||
1586 | : NULL; | 1901 | : NULL; |
1587 | } | 1902 | } |
1588 | 1903 | ||
1904 | static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | ||
1905 | { | ||
1906 | int ret = -ENOMEM; | ||
1907 | |||
1908 | reg->hr_debug_dir = | ||
1909 | debugfs_create_dir(config_item_name(®->hr_item), dir); | ||
1910 | if (!reg->hr_debug_dir) { | ||
1911 | mlog_errno(ret); | ||
1912 | goto bail; | ||
1913 | } | ||
1914 | |||
1915 | reg->hr_debug_livenodes = | ||
1916 | o2hb_debug_create(O2HB_DEBUG_LIVENODES, | ||
1917 | reg->hr_debug_dir, | ||
1918 | &(reg->hr_db_livenodes), | ||
1919 | sizeof(*(reg->hr_db_livenodes)), | ||
1920 | O2HB_DB_TYPE_REGION_LIVENODES, | ||
1921 | sizeof(reg->hr_live_node_bitmap), | ||
1922 | O2NM_MAX_NODES, reg); | ||
1923 | if (!reg->hr_debug_livenodes) { | ||
1924 | mlog_errno(ret); | ||
1925 | goto bail; | ||
1926 | } | ||
1927 | |||
1928 | reg->hr_debug_regnum = | ||
1929 | o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER, | ||
1930 | reg->hr_debug_dir, | ||
1931 | &(reg->hr_db_regnum), | ||
1932 | sizeof(*(reg->hr_db_regnum)), | ||
1933 | O2HB_DB_TYPE_REGION_NUMBER, | ||
1934 | 0, O2NM_MAX_NODES, reg); | ||
1935 | if (!reg->hr_debug_regnum) { | ||
1936 | mlog_errno(ret); | ||
1937 | goto bail; | ||
1938 | } | ||
1939 | |||
1940 | reg->hr_debug_elapsed_time = | ||
1941 | o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME, | ||
1942 | reg->hr_debug_dir, | ||
1943 | &(reg->hr_db_elapsed_time), | ||
1944 | sizeof(*(reg->hr_db_elapsed_time)), | ||
1945 | O2HB_DB_TYPE_REGION_ELAPSED_TIME, | ||
1946 | 0, 0, reg); | ||
1947 | if (!reg->hr_debug_elapsed_time) { | ||
1948 | mlog_errno(ret); | ||
1949 | goto bail; | ||
1950 | } | ||
1951 | |||
1952 | ret = 0; | ||
1953 | bail: | ||
1954 | return ret; | ||
1955 | } | ||
1956 | |||
1589 | static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, | 1957 | static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, |
1590 | const char *name) | 1958 | const char *name) |
1591 | { | 1959 | { |
1592 | struct o2hb_region *reg = NULL; | 1960 | struct o2hb_region *reg = NULL; |
1961 | int ret; | ||
1593 | 1962 | ||
1594 | reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); | 1963 | reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); |
1595 | if (reg == NULL) | 1964 | if (reg == NULL) |
1596 | return ERR_PTR(-ENOMEM); | 1965 | return ERR_PTR(-ENOMEM); |
1597 | 1966 | ||
1598 | config_item_init_type_name(®->hr_item, name, &o2hb_region_type); | 1967 | if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) |
1968 | return ERR_PTR(-ENAMETOOLONG); | ||
1599 | 1969 | ||
1600 | spin_lock(&o2hb_live_lock); | 1970 | spin_lock(&o2hb_live_lock); |
1971 | reg->hr_region_num = 0; | ||
1972 | if (o2hb_global_heartbeat_active()) { | ||
1973 | reg->hr_region_num = find_first_zero_bit(o2hb_region_bitmap, | ||
1974 | O2NM_MAX_REGIONS); | ||
1975 | if (reg->hr_region_num >= O2NM_MAX_REGIONS) { | ||
1976 | spin_unlock(&o2hb_live_lock); | ||
1977 | return ERR_PTR(-EFBIG); | ||
1978 | } | ||
1979 | set_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
1980 | } | ||
1601 | list_add_tail(®->hr_all_item, &o2hb_all_regions); | 1981 | list_add_tail(®->hr_all_item, &o2hb_all_regions); |
1602 | spin_unlock(&o2hb_live_lock); | 1982 | spin_unlock(&o2hb_live_lock); |
1603 | 1983 | ||
1984 | config_item_init_type_name(®->hr_item, name, &o2hb_region_type); | ||
1985 | |||
1986 | ret = o2hb_debug_region_init(reg, o2hb_debug_dir); | ||
1987 | if (ret) { | ||
1988 | config_item_put(®->hr_item); | ||
1989 | return ERR_PTR(ret); | ||
1990 | } | ||
1991 | |||
1604 | return ®->hr_item; | 1992 | return ®->hr_item; |
1605 | } | 1993 | } |
1606 | 1994 | ||
@@ -1612,6 +2000,10 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
1612 | 2000 | ||
1613 | /* stop the thread when the user removes the region dir */ | 2001 | /* stop the thread when the user removes the region dir */ |
1614 | spin_lock(&o2hb_live_lock); | 2002 | spin_lock(&o2hb_live_lock); |
2003 | if (o2hb_global_heartbeat_active()) { | ||
2004 | clear_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
2005 | clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
2006 | } | ||
1615 | hb_task = reg->hr_task; | 2007 | hb_task = reg->hr_task; |
1616 | reg->hr_task = NULL; | 2008 | reg->hr_task = NULL; |
1617 | spin_unlock(&o2hb_live_lock); | 2009 | spin_unlock(&o2hb_live_lock); |
@@ -1628,6 +2020,9 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
1628 | wake_up(&o2hb_steady_queue); | 2020 | wake_up(&o2hb_steady_queue); |
1629 | } | 2021 | } |
1630 | 2022 | ||
2023 | if (o2hb_global_heartbeat_active()) | ||
2024 | printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", | ||
2025 | config_item_name(®->hr_item)); | ||
1631 | config_item_put(item); | 2026 | config_item_put(item); |
1632 | } | 2027 | } |
1633 | 2028 | ||
@@ -1688,6 +2083,41 @@ static ssize_t o2hb_heartbeat_group_threshold_store(struct o2hb_heartbeat_group | |||
1688 | return count; | 2083 | return count; |
1689 | } | 2084 | } |
1690 | 2085 | ||
2086 | static | ||
2087 | ssize_t o2hb_heartbeat_group_mode_show(struct o2hb_heartbeat_group *group, | ||
2088 | char *page) | ||
2089 | { | ||
2090 | return sprintf(page, "%s\n", | ||
2091 | o2hb_heartbeat_mode_desc[o2hb_heartbeat_mode]); | ||
2092 | } | ||
2093 | |||
2094 | static | ||
2095 | ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, | ||
2096 | const char *page, size_t count) | ||
2097 | { | ||
2098 | unsigned int i; | ||
2099 | int ret; | ||
2100 | size_t len; | ||
2101 | |||
2102 | len = (page[count - 1] == '\n') ? count - 1 : count; | ||
2103 | if (!len) | ||
2104 | return -EINVAL; | ||
2105 | |||
2106 | for (i = 0; i < O2HB_HEARTBEAT_NUM_MODES; ++i) { | ||
2107 | if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) | ||
2108 | continue; | ||
2109 | |||
2110 | ret = o2hb_global_hearbeat_mode_set(i); | ||
2111 | if (!ret) | ||
2112 | printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", | ||
2113 | o2hb_heartbeat_mode_desc[i]); | ||
2114 | return count; | ||
2115 | } | ||
2116 | |||
2117 | return -EINVAL; | ||
2118 | |||
2119 | } | ||
2120 | |||
1691 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { | 2121 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { |
1692 | .attr = { .ca_owner = THIS_MODULE, | 2122 | .attr = { .ca_owner = THIS_MODULE, |
1693 | .ca_name = "dead_threshold", | 2123 | .ca_name = "dead_threshold", |
@@ -1696,8 +2126,17 @@ static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold | |||
1696 | .store = o2hb_heartbeat_group_threshold_store, | 2126 | .store = o2hb_heartbeat_group_threshold_store, |
1697 | }; | 2127 | }; |
1698 | 2128 | ||
2129 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_mode = { | ||
2130 | .attr = { .ca_owner = THIS_MODULE, | ||
2131 | .ca_name = "mode", | ||
2132 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
2133 | .show = o2hb_heartbeat_group_mode_show, | ||
2134 | .store = o2hb_heartbeat_group_mode_store, | ||
2135 | }; | ||
2136 | |||
1699 | static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { | 2137 | static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { |
1700 | &o2hb_heartbeat_group_attr_threshold.attr, | 2138 | &o2hb_heartbeat_group_attr_threshold.attr, |
2139 | &o2hb_heartbeat_group_attr_mode.attr, | ||
1701 | NULL, | 2140 | NULL, |
1702 | }; | 2141 | }; |
1703 | 2142 | ||
@@ -1963,3 +2402,34 @@ void o2hb_stop_all_regions(void) | |||
1963 | spin_unlock(&o2hb_live_lock); | 2402 | spin_unlock(&o2hb_live_lock); |
1964 | } | 2403 | } |
1965 | EXPORT_SYMBOL_GPL(o2hb_stop_all_regions); | 2404 | EXPORT_SYMBOL_GPL(o2hb_stop_all_regions); |
2405 | |||
2406 | int o2hb_get_all_regions(char *region_uuids, u8 max_regions) | ||
2407 | { | ||
2408 | struct o2hb_region *reg; | ||
2409 | int numregs = 0; | ||
2410 | char *p; | ||
2411 | |||
2412 | spin_lock(&o2hb_live_lock); | ||
2413 | |||
2414 | p = region_uuids; | ||
2415 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | ||
2416 | mlog(0, "Region: %s\n", config_item_name(®->hr_item)); | ||
2417 | if (numregs < max_regions) { | ||
2418 | memcpy(p, config_item_name(®->hr_item), | ||
2419 | O2HB_MAX_REGION_NAME_LEN); | ||
2420 | p += O2HB_MAX_REGION_NAME_LEN; | ||
2421 | } | ||
2422 | numregs++; | ||
2423 | } | ||
2424 | |||
2425 | spin_unlock(&o2hb_live_lock); | ||
2426 | |||
2427 | return numregs; | ||
2428 | } | ||
2429 | EXPORT_SYMBOL_GPL(o2hb_get_all_regions); | ||
2430 | |||
2431 | int o2hb_global_heartbeat_active(void) | ||
2432 | { | ||
2433 | return (o2hb_heartbeat_mode == O2HB_HEARTBEAT_GLOBAL); | ||
2434 | } | ||
2435 | EXPORT_SYMBOL(o2hb_global_heartbeat_active); | ||
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 2f1649253b49..00ad8e8fea51 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h | |||
@@ -31,6 +31,8 @@ | |||
31 | 31 | ||
32 | #define O2HB_REGION_TIMEOUT_MS 2000 | 32 | #define O2HB_REGION_TIMEOUT_MS 2000 |
33 | 33 | ||
34 | #define O2HB_MAX_REGION_NAME_LEN 32 | ||
35 | |||
34 | /* number of changes to be seen as live */ | 36 | /* number of changes to be seen as live */ |
35 | #define O2HB_LIVE_THRESHOLD 2 | 37 | #define O2HB_LIVE_THRESHOLD 2 |
36 | /* number of equal samples to be seen as dead */ | 38 | /* number of equal samples to be seen as dead */ |
@@ -81,5 +83,7 @@ int o2hb_check_node_heartbeating(u8 node_num); | |||
81 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); | 83 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); |
82 | int o2hb_check_local_node_heartbeating(void); | 84 | int o2hb_check_local_node_heartbeating(void); |
83 | void o2hb_stop_all_regions(void); | 85 | void o2hb_stop_all_regions(void); |
86 | int o2hb_get_all_regions(char *region_uuids, u8 numregions); | ||
87 | int o2hb_global_heartbeat_active(void); | ||
84 | 88 | ||
85 | #endif /* O2CLUSTER_HEARTBEAT_H */ | 89 | #endif /* O2CLUSTER_HEARTBEAT_H */ |
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index fd96e2a2fa56..ea2ed9f56c94 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
@@ -119,7 +119,8 @@ | |||
119 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ | 119 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ |
120 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ | 120 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ |
121 | #define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ | 121 | #define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ |
122 | #define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ | 122 | #define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ |
123 | #define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */ | ||
123 | 124 | ||
124 | #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) | 125 | #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) |
125 | #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) | 126 | #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) |
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index ed0c9f367fed..bb240647ca5f 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
@@ -711,6 +711,8 @@ static struct config_item *o2nm_node_group_make_item(struct config_group *group, | |||
711 | config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); | 711 | config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); |
712 | spin_lock_init(&node->nd_lock); | 712 | spin_lock_init(&node->nd_lock); |
713 | 713 | ||
714 | mlog(ML_CLUSTER, "o2nm: Registering node %s\n", name); | ||
715 | |||
714 | return &node->nd_item; | 716 | return &node->nd_item; |
715 | } | 717 | } |
716 | 718 | ||
@@ -744,6 +746,9 @@ static void o2nm_node_group_drop_item(struct config_group *group, | |||
744 | } | 746 | } |
745 | write_unlock(&cluster->cl_nodes_lock); | 747 | write_unlock(&cluster->cl_nodes_lock); |
746 | 748 | ||
749 | mlog(ML_CLUSTER, "o2nm: Unregistered node %s\n", | ||
750 | config_item_name(&node->nd_item)); | ||
751 | |||
747 | config_item_put(item); | 752 | config_item_put(item); |
748 | } | 753 | } |
749 | 754 | ||
diff --git a/fs/ocfs2/cluster/ocfs2_nodemanager.h b/fs/ocfs2/cluster/ocfs2_nodemanager.h index 5b9854bad571..49b594325bec 100644 --- a/fs/ocfs2/cluster/ocfs2_nodemanager.h +++ b/fs/ocfs2/cluster/ocfs2_nodemanager.h | |||
@@ -36,4 +36,10 @@ | |||
36 | /* host name, group name, cluster name all 64 bytes */ | 36 | /* host name, group name, cluster name all 64 bytes */ |
37 | #define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN | 37 | #define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN |
38 | 38 | ||
39 | /* | ||
40 | * Maximum number of global heartbeat regions allowed. | ||
41 | * **CAUTION** Changing this number will break dlm compatibility. | ||
42 | */ | ||
43 | #define O2NM_MAX_REGIONS 32 | ||
44 | |||
39 | #endif /* _OCFS2_NODEMANAGER_H */ | 45 | #endif /* _OCFS2_NODEMANAGER_H */ |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index cbe2f057cc28..9aa426e42123 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -1696,6 +1696,9 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num, | |||
1696 | { | 1696 | { |
1697 | o2quo_hb_down(node_num); | 1697 | o2quo_hb_down(node_num); |
1698 | 1698 | ||
1699 | if (!node) | ||
1700 | return; | ||
1701 | |||
1699 | if (node_num != o2nm_this_node()) | 1702 | if (node_num != o2nm_this_node()) |
1700 | o2net_disconnect_node(node); | 1703 | o2net_disconnect_node(node); |
1701 | 1704 | ||
@@ -1709,6 +1712,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
1709 | 1712 | ||
1710 | o2quo_hb_up(node_num); | 1713 | o2quo_hb_up(node_num); |
1711 | 1714 | ||
1715 | BUG_ON(!node); | ||
1716 | |||
1712 | /* ensure an immediate connect attempt */ | 1717 | /* ensure an immediate connect attempt */ |
1713 | nn->nn_last_connect_attempt = jiffies - | 1718 | nn->nn_last_connect_attempt = jiffies - |
1714 | (msecs_to_jiffies(o2net_reconnect_delay()) + 1); | 1719 | (msecs_to_jiffies(o2net_reconnect_delay()) + 1); |
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index b4957c7d9fe2..edaded48e7e9 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
@@ -40,6 +40,14 @@ | |||
40 | #include "inode.h" | 40 | #include "inode.h" |
41 | #include "super.h" | 41 | #include "super.h" |
42 | 42 | ||
43 | void ocfs2_dentry_attach_gen(struct dentry *dentry) | ||
44 | { | ||
45 | unsigned long gen = | ||
46 | OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; | ||
47 | BUG_ON(dentry->d_inode); | ||
48 | dentry->d_fsdata = (void *)gen; | ||
49 | } | ||
50 | |||
43 | 51 | ||
44 | static int ocfs2_dentry_revalidate(struct dentry *dentry, | 52 | static int ocfs2_dentry_revalidate(struct dentry *dentry, |
45 | struct nameidata *nd) | 53 | struct nameidata *nd) |
@@ -51,11 +59,20 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, | |||
51 | mlog_entry("(0x%p, '%.*s')\n", dentry, | 59 | mlog_entry("(0x%p, '%.*s')\n", dentry, |
52 | dentry->d_name.len, dentry->d_name.name); | 60 | dentry->d_name.len, dentry->d_name.name); |
53 | 61 | ||
54 | /* Never trust a negative dentry - force a new lookup. */ | 62 | /* For a negative dentry - |
63 | * check the generation number of the parent and compare with the | ||
64 | * one stored in the inode. | ||
65 | */ | ||
55 | if (inode == NULL) { | 66 | if (inode == NULL) { |
56 | mlog(0, "negative dentry: %.*s\n", dentry->d_name.len, | 67 | unsigned long gen = (unsigned long) dentry->d_fsdata; |
57 | dentry->d_name.name); | 68 | unsigned long pgen = |
58 | goto bail; | 69 | OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; |
70 | mlog(0, "negative dentry: %.*s parent gen: %lu " | ||
71 | "dentry gen: %lu\n", | ||
72 | dentry->d_name.len, dentry->d_name.name, pgen, gen); | ||
73 | if (gen != pgen) | ||
74 | goto bail; | ||
75 | goto valid; | ||
59 | } | 76 | } |
60 | 77 | ||
61 | BUG_ON(!osb); | 78 | BUG_ON(!osb); |
@@ -96,6 +113,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, | |||
96 | goto bail; | 113 | goto bail; |
97 | } | 114 | } |
98 | 115 | ||
116 | valid: | ||
99 | ret = 1; | 117 | ret = 1; |
100 | 118 | ||
101 | bail: | 119 | bail: |
@@ -227,6 +245,12 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, | |||
227 | if (!inode) | 245 | if (!inode) |
228 | return 0; | 246 | return 0; |
229 | 247 | ||
248 | if (!dentry->d_inode && dentry->d_fsdata) { | ||
249 | /* Converting a negative dentry to positive | ||
250 | Clear dentry->d_fsdata */ | ||
251 | dentry->d_fsdata = dl = NULL; | ||
252 | } | ||
253 | |||
230 | if (dl) { | 254 | if (dl) { |
231 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, | 255 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, |
232 | " \"%.*s\": old parent: %llu, new: %llu\n", | 256 | " \"%.*s\": old parent: %llu, new: %llu\n", |
@@ -452,6 +476,7 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) | |||
452 | 476 | ||
453 | out: | 477 | out: |
454 | iput(inode); | 478 | iput(inode); |
479 | ocfs2_dentry_attach_gen(dentry); | ||
455 | } | 480 | } |
456 | 481 | ||
457 | /* | 482 | /* |
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h index f5dd1789acf1..b79eff709958 100644 --- a/fs/ocfs2/dcache.h +++ b/fs/ocfs2/dcache.h | |||
@@ -64,5 +64,6 @@ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, | |||
64 | struct inode *old_dir, struct inode *new_dir); | 64 | struct inode *old_dir, struct inode *new_dir); |
65 | 65 | ||
66 | extern spinlock_t dentry_attach_lock; | 66 | extern spinlock_t dentry_attach_lock; |
67 | void ocfs2_dentry_attach_gen(struct dentry *dentry); | ||
67 | 68 | ||
68 | #endif /* OCFS2_DCACHE_H */ | 69 | #endif /* OCFS2_DCACHE_H */ |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 765298908f1d..b36d0bf77a5a 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -445,7 +445,9 @@ enum { | |||
445 | DLM_LOCK_REQUEST_MSG, /* 515 */ | 445 | DLM_LOCK_REQUEST_MSG, /* 515 */ |
446 | DLM_RECO_DATA_DONE_MSG, /* 516 */ | 446 | DLM_RECO_DATA_DONE_MSG, /* 516 */ |
447 | DLM_BEGIN_RECO_MSG, /* 517 */ | 447 | DLM_BEGIN_RECO_MSG, /* 517 */ |
448 | DLM_FINALIZE_RECO_MSG /* 518 */ | 448 | DLM_FINALIZE_RECO_MSG, /* 518 */ |
449 | DLM_QUERY_REGION, /* 519 */ | ||
450 | DLM_QUERY_NODEINFO, /* 520 */ | ||
449 | }; | 451 | }; |
450 | 452 | ||
451 | struct dlm_reco_node_data | 453 | struct dlm_reco_node_data |
@@ -727,6 +729,31 @@ struct dlm_cancel_join | |||
727 | u8 domain[O2NM_MAX_NAME_LEN]; | 729 | u8 domain[O2NM_MAX_NAME_LEN]; |
728 | }; | 730 | }; |
729 | 731 | ||
732 | struct dlm_query_region { | ||
733 | u8 qr_node; | ||
734 | u8 qr_numregions; | ||
735 | u8 qr_namelen; | ||
736 | u8 pad1; | ||
737 | u8 qr_domain[O2NM_MAX_NAME_LEN]; | ||
738 | u8 qr_regions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_REGIONS]; | ||
739 | }; | ||
740 | |||
741 | struct dlm_node_info { | ||
742 | u8 ni_nodenum; | ||
743 | u8 pad1; | ||
744 | u16 ni_ipv4_port; | ||
745 | u32 ni_ipv4_address; | ||
746 | }; | ||
747 | |||
748 | struct dlm_query_nodeinfo { | ||
749 | u8 qn_nodenum; | ||
750 | u8 qn_numnodes; | ||
751 | u8 qn_namelen; | ||
752 | u8 pad1; | ||
753 | u8 qn_domain[O2NM_MAX_NAME_LEN]; | ||
754 | struct dlm_node_info qn_nodes[O2NM_MAX_NODES]; | ||
755 | }; | ||
756 | |||
730 | struct dlm_exit_domain | 757 | struct dlm_exit_domain |
731 | { | 758 | { |
732 | u8 node_idx; | 759 | u8 node_idx; |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 901ca52bf86b..272ec8631a51 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -493,7 +493,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
493 | struct hlist_head *bucket; | 493 | struct hlist_head *bucket; |
494 | struct hlist_node *list; | 494 | struct hlist_node *list; |
495 | int i, out = 0; | 495 | int i, out = 0; |
496 | unsigned long total = 0, longest = 0, bktcnt; | 496 | unsigned long total = 0, longest = 0, bucket_count = 0; |
497 | 497 | ||
498 | out += snprintf(db->buf + out, db->len - out, | 498 | out += snprintf(db->buf + out, db->len - out, |
499 | "Dumping MLEs for Domain: %s\n", dlm->name); | 499 | "Dumping MLEs for Domain: %s\n", dlm->name); |
@@ -505,13 +505,13 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
505 | mle = hlist_entry(list, struct dlm_master_list_entry, | 505 | mle = hlist_entry(list, struct dlm_master_list_entry, |
506 | master_hash_node); | 506 | master_hash_node); |
507 | ++total; | 507 | ++total; |
508 | ++bktcnt; | 508 | ++bucket_count; |
509 | if (db->len - out < 200) | 509 | if (db->len - out < 200) |
510 | continue; | 510 | continue; |
511 | out += dump_mle(mle, db->buf + out, db->len - out); | 511 | out += dump_mle(mle, db->buf + out, db->len - out); |
512 | } | 512 | } |
513 | longest = max(longest, bktcnt); | 513 | longest = max(longest, bucket_count); |
514 | bktcnt = 0; | 514 | bucket_count = 0; |
515 | } | 515 | } |
516 | spin_unlock(&dlm->master_lock); | 516 | spin_unlock(&dlm->master_lock); |
517 | 517 | ||
@@ -782,7 +782,9 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
782 | 782 | ||
783 | /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ | 783 | /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ |
784 | out += snprintf(db->buf + out, db->len - out, | 784 | out += snprintf(db->buf + out, db->len - out, |
785 | "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key); | 785 | "Domain: %s Key: 0x%08x Protocol: %d.%d\n", |
786 | dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major, | ||
787 | dlm->dlm_locking_proto.pv_minor); | ||
786 | 788 | ||
787 | /* Thread Pid: xxx Node: xxx State: xxxxx */ | 789 | /* Thread Pid: xxx Node: xxx State: xxxxx */ |
788 | out += snprintf(db->buf + out, db->len - out, | 790 | out += snprintf(db->buf + out, db->len - out, |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 11a5c87fd7f7..58a93b953735 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -128,10 +128,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); | |||
128 | * will have a negotiated version with the same major number and a minor | 128 | * will have a negotiated version with the same major number and a minor |
129 | * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should | 129 | * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should |
130 | * be used to determine what a running domain is actually using. | 130 | * be used to determine what a running domain is actually using. |
131 | * | ||
132 | * New in version 1.1: | ||
133 | * - Message DLM_QUERY_REGION added to support global heartbeat | ||
134 | * - Message DLM_QUERY_NODEINFO added to allow online node removes | ||
131 | */ | 135 | */ |
132 | static const struct dlm_protocol_version dlm_protocol = { | 136 | static const struct dlm_protocol_version dlm_protocol = { |
133 | .pv_major = 1, | 137 | .pv_major = 1, |
134 | .pv_minor = 0, | 138 | .pv_minor = 1, |
135 | }; | 139 | }; |
136 | 140 | ||
137 | #define DLM_DOMAIN_BACKOFF_MS 200 | 141 | #define DLM_DOMAIN_BACKOFF_MS 200 |
@@ -142,6 +146,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, | |||
142 | void **ret_data); | 146 | void **ret_data); |
143 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, | 147 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
144 | void **ret_data); | 148 | void **ret_data); |
149 | static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | ||
150 | void *data, void **ret_data); | ||
145 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, | 151 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, |
146 | void **ret_data); | 152 | void **ret_data); |
147 | static int dlm_protocol_compare(struct dlm_protocol_version *existing, | 153 | static int dlm_protocol_compare(struct dlm_protocol_version *existing, |
@@ -921,6 +927,370 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, | |||
921 | return 0; | 927 | return 0; |
922 | } | 928 | } |
923 | 929 | ||
930 | static int dlm_match_regions(struct dlm_ctxt *dlm, | ||
931 | struct dlm_query_region *qr) | ||
932 | { | ||
933 | char *local = NULL, *remote = qr->qr_regions; | ||
934 | char *l, *r; | ||
935 | int localnr, i, j, foundit; | ||
936 | int status = 0; | ||
937 | |||
938 | if (!o2hb_global_heartbeat_active()) { | ||
939 | if (qr->qr_numregions) { | ||
940 | mlog(ML_ERROR, "Domain %s: Joining node %d has global " | ||
941 | "heartbeat enabled but local node %d does not\n", | ||
942 | qr->qr_domain, qr->qr_node, dlm->node_num); | ||
943 | status = -EINVAL; | ||
944 | } | ||
945 | goto bail; | ||
946 | } | ||
947 | |||
948 | if (o2hb_global_heartbeat_active() && !qr->qr_numregions) { | ||
949 | mlog(ML_ERROR, "Domain %s: Local node %d has global " | ||
950 | "heartbeat enabled but joining node %d does not\n", | ||
951 | qr->qr_domain, dlm->node_num, qr->qr_node); | ||
952 | status = -EINVAL; | ||
953 | goto bail; | ||
954 | } | ||
955 | |||
956 | r = remote; | ||
957 | for (i = 0; i < qr->qr_numregions; ++i) { | ||
958 | mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r); | ||
959 | r += O2HB_MAX_REGION_NAME_LEN; | ||
960 | } | ||
961 | |||
962 | local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); | ||
963 | if (!local) { | ||
964 | status = -ENOMEM; | ||
965 | goto bail; | ||
966 | } | ||
967 | |||
968 | localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS); | ||
969 | |||
970 | /* compare local regions with remote */ | ||
971 | l = local; | ||
972 | for (i = 0; i < localnr; ++i) { | ||
973 | foundit = 0; | ||
974 | r = remote; | ||
975 | for (j = 0; j <= qr->qr_numregions; ++j) { | ||
976 | if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) { | ||
977 | foundit = 1; | ||
978 | break; | ||
979 | } | ||
980 | r += O2HB_MAX_REGION_NAME_LEN; | ||
981 | } | ||
982 | if (!foundit) { | ||
983 | status = -EINVAL; | ||
984 | mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " | ||
985 | "in local node %d but not in joining node %d\n", | ||
986 | qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, l, | ||
987 | dlm->node_num, qr->qr_node); | ||
988 | goto bail; | ||
989 | } | ||
990 | l += O2HB_MAX_REGION_NAME_LEN; | ||
991 | } | ||
992 | |||
993 | /* compare remote with local regions */ | ||
994 | r = remote; | ||
995 | for (i = 0; i < qr->qr_numregions; ++i) { | ||
996 | foundit = 0; | ||
997 | l = local; | ||
998 | for (j = 0; j < localnr; ++j) { | ||
999 | if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) { | ||
1000 | foundit = 1; | ||
1001 | break; | ||
1002 | } | ||
1003 | l += O2HB_MAX_REGION_NAME_LEN; | ||
1004 | } | ||
1005 | if (!foundit) { | ||
1006 | status = -EINVAL; | ||
1007 | mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " | ||
1008 | "in joining node %d but not in local node %d\n", | ||
1009 | qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, r, | ||
1010 | qr->qr_node, dlm->node_num); | ||
1011 | goto bail; | ||
1012 | } | ||
1013 | r += O2HB_MAX_REGION_NAME_LEN; | ||
1014 | } | ||
1015 | |||
1016 | bail: | ||
1017 | kfree(local); | ||
1018 | |||
1019 | return status; | ||
1020 | } | ||
1021 | |||
1022 | static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map) | ||
1023 | { | ||
1024 | struct dlm_query_region *qr = NULL; | ||
1025 | int status, ret = 0, i; | ||
1026 | char *p; | ||
1027 | |||
1028 | if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) | ||
1029 | goto bail; | ||
1030 | |||
1031 | qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL); | ||
1032 | if (!qr) { | ||
1033 | ret = -ENOMEM; | ||
1034 | mlog_errno(ret); | ||
1035 | goto bail; | ||
1036 | } | ||
1037 | |||
1038 | qr->qr_node = dlm->node_num; | ||
1039 | qr->qr_namelen = strlen(dlm->name); | ||
1040 | memcpy(qr->qr_domain, dlm->name, qr->qr_namelen); | ||
1041 | /* if local hb, the numregions will be zero */ | ||
1042 | if (o2hb_global_heartbeat_active()) | ||
1043 | qr->qr_numregions = o2hb_get_all_regions(qr->qr_regions, | ||
1044 | O2NM_MAX_REGIONS); | ||
1045 | |||
1046 | p = qr->qr_regions; | ||
1047 | for (i = 0; i < qr->qr_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN) | ||
1048 | mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p); | ||
1049 | |||
1050 | i = -1; | ||
1051 | while ((i = find_next_bit(node_map, O2NM_MAX_NODES, | ||
1052 | i + 1)) < O2NM_MAX_NODES) { | ||
1053 | if (i == dlm->node_num) | ||
1054 | continue; | ||
1055 | |||
1056 | mlog(0, "Sending regions to node %d\n", i); | ||
1057 | |||
1058 | ret = o2net_send_message(DLM_QUERY_REGION, DLM_MOD_KEY, qr, | ||
1059 | sizeof(struct dlm_query_region), | ||
1060 | i, &status); | ||
1061 | if (ret >= 0) | ||
1062 | ret = status; | ||
1063 | if (ret) { | ||
1064 | mlog(ML_ERROR, "Region mismatch %d, node %d\n", | ||
1065 | ret, i); | ||
1066 | break; | ||
1067 | } | ||
1068 | } | ||
1069 | |||
1070 | bail: | ||
1071 | kfree(qr); | ||
1072 | return ret; | ||
1073 | } | ||
1074 | |||
1075 | static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | ||
1076 | void *data, void **ret_data) | ||
1077 | { | ||
1078 | struct dlm_query_region *qr; | ||
1079 | struct dlm_ctxt *dlm = NULL; | ||
1080 | int status = 0; | ||
1081 | int locked = 0; | ||
1082 | |||
1083 | qr = (struct dlm_query_region *) msg->buf; | ||
1084 | |||
1085 | mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node, | ||
1086 | qr->qr_domain); | ||
1087 | |||
1088 | status = -EINVAL; | ||
1089 | |||
1090 | spin_lock(&dlm_domain_lock); | ||
1091 | dlm = __dlm_lookup_domain_full(qr->qr_domain, qr->qr_namelen); | ||
1092 | if (!dlm) { | ||
1093 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
1094 | "before join domain\n", qr->qr_node, qr->qr_domain); | ||
1095 | goto bail; | ||
1096 | } | ||
1097 | |||
1098 | spin_lock(&dlm->spinlock); | ||
1099 | locked = 1; | ||
1100 | if (dlm->joining_node != qr->qr_node) { | ||
1101 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
1102 | "but joining node is %d\n", qr->qr_node, qr->qr_domain, | ||
1103 | dlm->joining_node); | ||
1104 | goto bail; | ||
1105 | } | ||
1106 | |||
1107 | /* Support for global heartbeat was added in 1.1 */ | ||
1108 | if (dlm->dlm_locking_proto.pv_major == 1 && | ||
1109 | dlm->dlm_locking_proto.pv_minor == 0) { | ||
1110 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
1111 | "but active dlm protocol is %d.%d\n", qr->qr_node, | ||
1112 | qr->qr_domain, dlm->dlm_locking_proto.pv_major, | ||
1113 | dlm->dlm_locking_proto.pv_minor); | ||
1114 | goto bail; | ||
1115 | } | ||
1116 | |||
1117 | status = dlm_match_regions(dlm, qr); | ||
1118 | |||
1119 | bail: | ||
1120 | if (locked) | ||
1121 | spin_unlock(&dlm->spinlock); | ||
1122 | spin_unlock(&dlm_domain_lock); | ||
1123 | |||
1124 | return status; | ||
1125 | } | ||
1126 | |||
1127 | static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn) | ||
1128 | { | ||
1129 | struct o2nm_node *local; | ||
1130 | struct dlm_node_info *remote; | ||
1131 | int i, j; | ||
1132 | int status = 0; | ||
1133 | |||
1134 | for (j = 0; j < qn->qn_numnodes; ++j) | ||
1135 | mlog(0, "Node %3d, %pI4:%u\n", qn->qn_nodes[j].ni_nodenum, | ||
1136 | &(qn->qn_nodes[j].ni_ipv4_address), | ||
1137 | ntohs(qn->qn_nodes[j].ni_ipv4_port)); | ||
1138 | |||
1139 | for (i = 0; i < O2NM_MAX_NODES && !status; ++i) { | ||
1140 | local = o2nm_get_node_by_num(i); | ||
1141 | remote = NULL; | ||
1142 | for (j = 0; j < qn->qn_numnodes; ++j) { | ||
1143 | if (qn->qn_nodes[j].ni_nodenum == i) { | ||
1144 | remote = &(qn->qn_nodes[j]); | ||
1145 | break; | ||
1146 | } | ||
1147 | } | ||
1148 | |||
1149 | if (!local && !remote) | ||
1150 | continue; | ||
1151 | |||
1152 | if ((local && !remote) || (!local && remote)) | ||
1153 | status = -EINVAL; | ||
1154 | |||
1155 | if (!status && | ||
1156 | ((remote->ni_nodenum != local->nd_num) || | ||
1157 | (remote->ni_ipv4_port != local->nd_ipv4_port) || | ||
1158 | (remote->ni_ipv4_address != local->nd_ipv4_address))) | ||
1159 | status = -EINVAL; | ||
1160 | |||
1161 | if (status) { | ||
1162 | if (remote && !local) | ||
1163 | mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " | ||
1164 | "registered in joining node %d but not in " | ||
1165 | "local node %d\n", qn->qn_domain, | ||
1166 | remote->ni_nodenum, | ||
1167 | &(remote->ni_ipv4_address), | ||
1168 | ntohs(remote->ni_ipv4_port), | ||
1169 | qn->qn_nodenum, dlm->node_num); | ||
1170 | if (local && !remote) | ||
1171 | mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " | ||
1172 | "registered in local node %d but not in " | ||
1173 | "joining node %d\n", qn->qn_domain, | ||
1174 | local->nd_num, &(local->nd_ipv4_address), | ||
1175 | ntohs(local->nd_ipv4_port), | ||
1176 | dlm->node_num, qn->qn_nodenum); | ||
1177 | BUG_ON((!local && !remote)); | ||
1178 | } | ||
1179 | |||
1180 | if (local) | ||
1181 | o2nm_node_put(local); | ||
1182 | } | ||
1183 | |||
1184 | return status; | ||
1185 | } | ||
1186 | |||
1187 | static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) | ||
1188 | { | ||
1189 | struct dlm_query_nodeinfo *qn = NULL; | ||
1190 | struct o2nm_node *node; | ||
1191 | int ret = 0, status, count, i; | ||
1192 | |||
1193 | if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) | ||
1194 | goto bail; | ||
1195 | |||
1196 | qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); | ||
1197 | if (!qn) { | ||
1198 | ret = -ENOMEM; | ||
1199 | mlog_errno(ret); | ||
1200 | goto bail; | ||
1201 | } | ||
1202 | |||
1203 | for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) { | ||
1204 | node = o2nm_get_node_by_num(i); | ||
1205 | if (!node) | ||
1206 | continue; | ||
1207 | qn->qn_nodes[count].ni_nodenum = node->nd_num; | ||
1208 | qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port; | ||
1209 | qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address; | ||
1210 | mlog(0, "Node %3d, %pI4:%u\n", node->nd_num, | ||
1211 | &(node->nd_ipv4_address), ntohs(node->nd_ipv4_port)); | ||
1212 | ++count; | ||
1213 | o2nm_node_put(node); | ||
1214 | } | ||
1215 | |||
1216 | qn->qn_nodenum = dlm->node_num; | ||
1217 | qn->qn_numnodes = count; | ||
1218 | qn->qn_namelen = strlen(dlm->name); | ||
1219 | memcpy(qn->qn_domain, dlm->name, qn->qn_namelen); | ||
1220 | |||
1221 | i = -1; | ||
1222 | while ((i = find_next_bit(node_map, O2NM_MAX_NODES, | ||
1223 | i + 1)) < O2NM_MAX_NODES) { | ||
1224 | if (i == dlm->node_num) | ||
1225 | continue; | ||
1226 | |||
1227 | mlog(0, "Sending nodeinfo to node %d\n", i); | ||
1228 | |||
1229 | ret = o2net_send_message(DLM_QUERY_NODEINFO, DLM_MOD_KEY, | ||
1230 | qn, sizeof(struct dlm_query_nodeinfo), | ||
1231 | i, &status); | ||
1232 | if (ret >= 0) | ||
1233 | ret = status; | ||
1234 | if (ret) { | ||
1235 | mlog(ML_ERROR, "node mismatch %d, node %d\n", ret, i); | ||
1236 | break; | ||
1237 | } | ||
1238 | } | ||
1239 | |||
1240 | bail: | ||
1241 | kfree(qn); | ||
1242 | return ret; | ||
1243 | } | ||
1244 | |||
1245 | static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, | ||
1246 | void *data, void **ret_data) | ||
1247 | { | ||
1248 | struct dlm_query_nodeinfo *qn; | ||
1249 | struct dlm_ctxt *dlm = NULL; | ||
1250 | int locked = 0, status = -EINVAL; | ||
1251 | |||
1252 | qn = (struct dlm_query_nodeinfo *) msg->buf; | ||
1253 | |||
1254 | mlog(0, "Node %u queries nodes on domain %s\n", qn->qn_nodenum, | ||
1255 | qn->qn_domain); | ||
1256 | |||
1257 | spin_lock(&dlm_domain_lock); | ||
1258 | dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen); | ||
1259 | if (!dlm) { | ||
1260 | mlog(ML_ERROR, "Node %d queried nodes on domain %s before " | ||
1261 | "join domain\n", qn->qn_nodenum, qn->qn_domain); | ||
1262 | goto bail; | ||
1263 | } | ||
1264 | |||
1265 | spin_lock(&dlm->spinlock); | ||
1266 | locked = 1; | ||
1267 | if (dlm->joining_node != qn->qn_nodenum) { | ||
1268 | mlog(ML_ERROR, "Node %d queried nodes on domain %s but " | ||
1269 | "joining node is %d\n", qn->qn_nodenum, qn->qn_domain, | ||
1270 | dlm->joining_node); | ||
1271 | goto bail; | ||
1272 | } | ||
1273 | |||
1274 | /* Support for node query was added in 1.1 */ | ||
1275 | if (dlm->dlm_locking_proto.pv_major == 1 && | ||
1276 | dlm->dlm_locking_proto.pv_minor == 0) { | ||
1277 | mlog(ML_ERROR, "Node %d queried nodes on domain %s " | ||
1278 | "but active dlm protocol is %d.%d\n", qn->qn_nodenum, | ||
1279 | qn->qn_domain, dlm->dlm_locking_proto.pv_major, | ||
1280 | dlm->dlm_locking_proto.pv_minor); | ||
1281 | goto bail; | ||
1282 | } | ||
1283 | |||
1284 | status = dlm_match_nodes(dlm, qn); | ||
1285 | |||
1286 | bail: | ||
1287 | if (locked) | ||
1288 | spin_unlock(&dlm->spinlock); | ||
1289 | spin_unlock(&dlm_domain_lock); | ||
1290 | |||
1291 | return status; | ||
1292 | } | ||
1293 | |||
924 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, | 1294 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
925 | void **ret_data) | 1295 | void **ret_data) |
926 | { | 1296 | { |
@@ -1241,6 +1611,20 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) | |||
1241 | set_bit(dlm->node_num, dlm->domain_map); | 1611 | set_bit(dlm->node_num, dlm->domain_map); |
1242 | spin_unlock(&dlm->spinlock); | 1612 | spin_unlock(&dlm->spinlock); |
1243 | 1613 | ||
1614 | /* Support for global heartbeat and node info was added in 1.1 */ | ||
1615 | if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) { | ||
1616 | status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map); | ||
1617 | if (status) { | ||
1618 | mlog_errno(status); | ||
1619 | goto bail; | ||
1620 | } | ||
1621 | status = dlm_send_regions(dlm, ctxt->yes_resp_map); | ||
1622 | if (status) { | ||
1623 | mlog_errno(status); | ||
1624 | goto bail; | ||
1625 | } | ||
1626 | } | ||
1627 | |||
1244 | dlm_send_join_asserts(dlm, ctxt->yes_resp_map); | 1628 | dlm_send_join_asserts(dlm, ctxt->yes_resp_map); |
1245 | 1629 | ||
1246 | /* Joined state *must* be set before the joining node | 1630 | /* Joined state *must* be set before the joining node |
@@ -1807,7 +2191,21 @@ static int dlm_register_net_handlers(void) | |||
1807 | sizeof(struct dlm_cancel_join), | 2191 | sizeof(struct dlm_cancel_join), |
1808 | dlm_cancel_join_handler, | 2192 | dlm_cancel_join_handler, |
1809 | NULL, NULL, &dlm_join_handlers); | 2193 | NULL, NULL, &dlm_join_handlers); |
2194 | if (status) | ||
2195 | goto bail; | ||
2196 | |||
2197 | status = o2net_register_handler(DLM_QUERY_REGION, DLM_MOD_KEY, | ||
2198 | sizeof(struct dlm_query_region), | ||
2199 | dlm_query_region_handler, | ||
2200 | NULL, NULL, &dlm_join_handlers); | ||
1810 | 2201 | ||
2202 | if (status) | ||
2203 | goto bail; | ||
2204 | |||
2205 | status = o2net_register_handler(DLM_QUERY_NODEINFO, DLM_MOD_KEY, | ||
2206 | sizeof(struct dlm_query_nodeinfo), | ||
2207 | dlm_query_nodeinfo_handler, | ||
2208 | NULL, NULL, &dlm_join_handlers); | ||
1811 | bail: | 2209 | bail: |
1812 | if (status < 0) | 2210 | if (status < 0) |
1813 | dlm_unregister_net_handlers(); | 2211 | dlm_unregister_net_handlers(); |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 5e02a893f46e..e8d94d722ecb 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -3635,10 +3635,18 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
3635 | { | 3635 | { |
3636 | struct inode *inode; | 3636 | struct inode *inode; |
3637 | struct address_space *mapping; | 3637 | struct address_space *mapping; |
3638 | struct ocfs2_inode_info *oi; | ||
3638 | 3639 | ||
3639 | inode = ocfs2_lock_res_inode(lockres); | 3640 | inode = ocfs2_lock_res_inode(lockres); |
3640 | mapping = inode->i_mapping; | 3641 | mapping = inode->i_mapping; |
3641 | 3642 | ||
3643 | if (S_ISDIR(inode->i_mode)) { | ||
3644 | oi = OCFS2_I(inode); | ||
3645 | oi->ip_dir_lock_gen++; | ||
3646 | mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); | ||
3647 | goto out; | ||
3648 | } | ||
3649 | |||
3642 | if (!S_ISREG(inode->i_mode)) | 3650 | if (!S_ISREG(inode->i_mode)) |
3643 | goto out; | 3651 | goto out; |
3644 | 3652 | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9a03c151b5ce..9e8cc4346b76 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -64,12 +64,6 @@ | |||
64 | 64 | ||
65 | #include "buffer_head_io.h" | 65 | #include "buffer_head_io.h" |
66 | 66 | ||
67 | static int ocfs2_sync_inode(struct inode *inode) | ||
68 | { | ||
69 | filemap_fdatawrite(inode->i_mapping); | ||
70 | return sync_mapping_buffers(inode->i_mapping); | ||
71 | } | ||
72 | |||
73 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) | 67 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) |
74 | { | 68 | { |
75 | struct ocfs2_file_private *fp; | 69 | struct ocfs2_file_private *fp; |
@@ -180,16 +174,12 @@ static int ocfs2_sync_file(struct file *file, int datasync) | |||
180 | { | 174 | { |
181 | int err = 0; | 175 | int err = 0; |
182 | journal_t *journal; | 176 | journal_t *journal; |
183 | struct dentry *dentry = file->f_path.dentry; | ||
184 | struct inode *inode = file->f_mapping->host; | 177 | struct inode *inode = file->f_mapping->host; |
185 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 178 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
186 | 179 | ||
187 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, | 180 | mlog_entry("(0x%p, %d, 0x%p, '%.*s')\n", file, datasync, |
188 | dentry->d_name.len, dentry->d_name.name); | 181 | file->f_path.dentry, file->f_path.dentry->d_name.len, |
189 | 182 | file->f_path.dentry->d_name.name); | |
190 | err = ocfs2_sync_inode(dentry->d_inode); | ||
191 | if (err) | ||
192 | goto bail; | ||
193 | 183 | ||
194 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { | 184 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { |
195 | /* | 185 | /* |
@@ -370,7 +360,7 @@ static int ocfs2_cow_file_pos(struct inode *inode, | |||
370 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) | 360 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) |
371 | goto out; | 361 | goto out; |
372 | 362 | ||
373 | return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1); | 363 | return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); |
374 | 364 | ||
375 | out: | 365 | out: |
376 | return status; | 366 | return status; |
@@ -913,8 +903,8 @@ static int ocfs2_zero_extend_get_range(struct inode *inode, | |||
913 | zero_clusters = last_cpos - zero_cpos; | 903 | zero_clusters = last_cpos - zero_cpos; |
914 | 904 | ||
915 | if (needs_cow) { | 905 | if (needs_cow) { |
916 | rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters, | 906 | rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, |
917 | UINT_MAX); | 907 | zero_clusters, UINT_MAX); |
918 | if (rc) { | 908 | if (rc) { |
919 | mlog_errno(rc); | 909 | mlog_errno(rc); |
920 | goto out; | 910 | goto out; |
@@ -2062,6 +2052,7 @@ out: | |||
2062 | } | 2052 | } |
2063 | 2053 | ||
2064 | static int ocfs2_prepare_inode_for_refcount(struct inode *inode, | 2054 | static int ocfs2_prepare_inode_for_refcount(struct inode *inode, |
2055 | struct file *file, | ||
2065 | loff_t pos, size_t count, | 2056 | loff_t pos, size_t count, |
2066 | int *meta_level) | 2057 | int *meta_level) |
2067 | { | 2058 | { |
@@ -2079,7 +2070,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, | |||
2079 | 2070 | ||
2080 | *meta_level = 1; | 2071 | *meta_level = 1; |
2081 | 2072 | ||
2082 | ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); | 2073 | ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); |
2083 | if (ret) | 2074 | if (ret) |
2084 | mlog_errno(ret); | 2075 | mlog_errno(ret); |
2085 | out: | 2076 | out: |
@@ -2087,7 +2078,7 @@ out: | |||
2087 | return ret; | 2078 | return ret; |
2088 | } | 2079 | } |
2089 | 2080 | ||
2090 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | 2081 | static int ocfs2_prepare_inode_for_write(struct file *file, |
2091 | loff_t *ppos, | 2082 | loff_t *ppos, |
2092 | size_t count, | 2083 | size_t count, |
2093 | int appending, | 2084 | int appending, |
@@ -2095,6 +2086,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
2095 | int *has_refcount) | 2086 | int *has_refcount) |
2096 | { | 2087 | { |
2097 | int ret = 0, meta_level = 0; | 2088 | int ret = 0, meta_level = 0; |
2089 | struct dentry *dentry = file->f_path.dentry; | ||
2098 | struct inode *inode = dentry->d_inode; | 2090 | struct inode *inode = dentry->d_inode; |
2099 | loff_t saved_pos, end; | 2091 | loff_t saved_pos, end; |
2100 | 2092 | ||
@@ -2150,6 +2142,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
2150 | meta_level = -1; | 2142 | meta_level = -1; |
2151 | 2143 | ||
2152 | ret = ocfs2_prepare_inode_for_refcount(inode, | 2144 | ret = ocfs2_prepare_inode_for_refcount(inode, |
2145 | file, | ||
2153 | saved_pos, | 2146 | saved_pos, |
2154 | count, | 2147 | count, |
2155 | &meta_level); | 2148 | &meta_level); |
@@ -2232,6 +2225,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
2232 | struct file *file = iocb->ki_filp; | 2225 | struct file *file = iocb->ki_filp; |
2233 | struct inode *inode = file->f_path.dentry->d_inode; | 2226 | struct inode *inode = file->f_path.dentry->d_inode; |
2234 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2227 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2228 | int full_coherency = !(osb->s_mount_opt & | ||
2229 | OCFS2_MOUNT_COHERENCY_BUFFERED); | ||
2235 | 2230 | ||
2236 | mlog_entry("(0x%p, %u, '%.*s')\n", file, | 2231 | mlog_entry("(0x%p, %u, '%.*s')\n", file, |
2237 | (unsigned int)nr_segs, | 2232 | (unsigned int)nr_segs, |
@@ -2255,16 +2250,39 @@ relock: | |||
2255 | have_alloc_sem = 1; | 2250 | have_alloc_sem = 1; |
2256 | } | 2251 | } |
2257 | 2252 | ||
2258 | /* concurrent O_DIRECT writes are allowed */ | 2253 | /* |
2259 | rw_level = !direct_io; | 2254 | * Concurrent O_DIRECT writes are allowed with |
2255 | * mount_option "coherency=buffered". | ||
2256 | */ | ||
2257 | rw_level = (!direct_io || full_coherency); | ||
2258 | |||
2260 | ret = ocfs2_rw_lock(inode, rw_level); | 2259 | ret = ocfs2_rw_lock(inode, rw_level); |
2261 | if (ret < 0) { | 2260 | if (ret < 0) { |
2262 | mlog_errno(ret); | 2261 | mlog_errno(ret); |
2263 | goto out_sems; | 2262 | goto out_sems; |
2264 | } | 2263 | } |
2265 | 2264 | ||
2265 | /* | ||
2266 | * O_DIRECT writes with "coherency=full" need to take EX cluster | ||
2267 | * inode_lock to guarantee coherency. | ||
2268 | */ | ||
2269 | if (direct_io && full_coherency) { | ||
2270 | /* | ||
2271 | * We need to take and drop the inode lock to force | ||
2272 | * other nodes to drop their caches. Buffered I/O | ||
2273 | * already does this in write_begin(). | ||
2274 | */ | ||
2275 | ret = ocfs2_inode_lock(inode, NULL, 1); | ||
2276 | if (ret < 0) { | ||
2277 | mlog_errno(ret); | ||
2278 | goto out_sems; | ||
2279 | } | ||
2280 | |||
2281 | ocfs2_inode_unlock(inode, 1); | ||
2282 | } | ||
2283 | |||
2266 | can_do_direct = direct_io; | 2284 | can_do_direct = direct_io; |
2267 | ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, | 2285 | ret = ocfs2_prepare_inode_for_write(file, ppos, |
2268 | iocb->ki_left, appending, | 2286 | iocb->ki_left, appending, |
2269 | &can_do_direct, &has_refcount); | 2287 | &can_do_direct, &has_refcount); |
2270 | if (ret < 0) { | 2288 | if (ret < 0) { |
@@ -2312,17 +2330,6 @@ relock: | |||
2312 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, | 2330 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, |
2313 | ppos, count, ocount); | 2331 | ppos, count, ocount); |
2314 | if (written < 0) { | 2332 | if (written < 0) { |
2315 | /* | ||
2316 | * direct write may have instantiated a few | ||
2317 | * blocks outside i_size. Trim these off again. | ||
2318 | * Don't need i_size_read because we hold i_mutex. | ||
2319 | * | ||
2320 | * XXX(truncate): this looks buggy because ocfs2 did not | ||
2321 | * actually implement ->truncate. Take a look at | ||
2322 | * the new truncate sequence and update this accordingly | ||
2323 | */ | ||
2324 | if (*ppos + count > inode->i_size) | ||
2325 | truncate_setsize(inode, inode->i_size); | ||
2326 | ret = written; | 2333 | ret = written; |
2327 | goto out_dio; | 2334 | goto out_dio; |
2328 | } | 2335 | } |
@@ -2394,7 +2401,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, | |||
2394 | { | 2401 | { |
2395 | int ret; | 2402 | int ret; |
2396 | 2403 | ||
2397 | ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, | 2404 | ret = ocfs2_prepare_inode_for_write(out, &sd->pos, |
2398 | sd->total_len, 0, NULL, NULL); | 2405 | sd->total_len, 0, NULL, NULL); |
2399 | if (ret < 0) { | 2406 | if (ret < 0) { |
2400 | mlog_errno(ret); | 2407 | mlog_errno(ret); |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index eece3e05d9d0..f935fd6600dd 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -335,6 +335,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
335 | else | 335 | else |
336 | inode->i_fop = &ocfs2_dops_no_plocks; | 336 | inode->i_fop = &ocfs2_dops_no_plocks; |
337 | i_size_write(inode, le64_to_cpu(fe->i_size)); | 337 | i_size_write(inode, le64_to_cpu(fe->i_size)); |
338 | OCFS2_I(inode)->ip_dir_lock_gen = 1; | ||
338 | break; | 339 | break; |
339 | case S_IFLNK: | 340 | case S_IFLNK: |
340 | if (ocfs2_inode_is_fast_symlink(inode)) | 341 | if (ocfs2_inode_is_fast_symlink(inode)) |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 6de5a869db30..1c508b149b3a 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -46,30 +46,28 @@ struct ocfs2_inode_info | |||
46 | /* These fields are protected by ip_lock */ | 46 | /* These fields are protected by ip_lock */ |
47 | spinlock_t ip_lock; | 47 | spinlock_t ip_lock; |
48 | u32 ip_open_count; | 48 | u32 ip_open_count; |
49 | u32 ip_clusters; | ||
50 | struct list_head ip_io_markers; | 49 | struct list_head ip_io_markers; |
50 | u32 ip_clusters; | ||
51 | 51 | ||
52 | u16 ip_dyn_features; | ||
52 | struct mutex ip_io_mutex; | 53 | struct mutex ip_io_mutex; |
53 | |||
54 | u32 ip_flags; /* see below */ | 54 | u32 ip_flags; /* see below */ |
55 | u32 ip_attr; /* inode attributes */ | 55 | u32 ip_attr; /* inode attributes */ |
56 | u16 ip_dyn_features; | ||
57 | 56 | ||
58 | /* protected by recovery_lock. */ | 57 | /* protected by recovery_lock. */ |
59 | struct inode *ip_next_orphan; | 58 | struct inode *ip_next_orphan; |
60 | 59 | ||
61 | u32 ip_dir_start_lookup; | ||
62 | |||
63 | struct ocfs2_caching_info ip_metadata_cache; | 60 | struct ocfs2_caching_info ip_metadata_cache; |
64 | |||
65 | struct ocfs2_extent_map ip_extent_map; | 61 | struct ocfs2_extent_map ip_extent_map; |
66 | |||
67 | struct inode vfs_inode; | 62 | struct inode vfs_inode; |
68 | struct jbd2_inode ip_jinode; | 63 | struct jbd2_inode ip_jinode; |
69 | 64 | ||
65 | u32 ip_dir_start_lookup; | ||
66 | |||
70 | /* Only valid if the inode is the dir. */ | 67 | /* Only valid if the inode is the dir. */ |
71 | u32 ip_last_used_slot; | 68 | u32 ip_last_used_slot; |
72 | u64 ip_last_used_group; | 69 | u64 ip_last_used_group; |
70 | u32 ip_dir_lock_gen; | ||
73 | 71 | ||
74 | struct ocfs2_alloc_reservation ip_la_data_resv; | 72 | struct ocfs2_alloc_reservation ip_la_data_resv; |
75 | }; | 73 | }; |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 7d9d9c132cef..7a4868196152 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -26,6 +26,26 @@ | |||
26 | 26 | ||
27 | #include <linux/ext2_fs.h> | 27 | #include <linux/ext2_fs.h> |
28 | 28 | ||
29 | #define o2info_from_user(a, b) \ | ||
30 | copy_from_user(&(a), (b), sizeof(a)) | ||
31 | #define o2info_to_user(a, b) \ | ||
32 | copy_to_user((typeof(a) __user *)b, &(a), sizeof(a)) | ||
33 | |||
34 | /* | ||
35 | * This call is void because we are already reporting an error that may | ||
36 | * be -EFAULT. The error will be returned from the ioctl(2) call. It's | ||
37 | * just a best-effort to tell userspace that this request caused the error. | ||
38 | */ | ||
39 | static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq, | ||
40 | struct ocfs2_info_request __user *req) | ||
41 | { | ||
42 | kreq->ir_flags |= OCFS2_INFO_FL_ERROR; | ||
43 | (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); | ||
44 | } | ||
45 | |||
46 | #define o2info_set_request_error(a, b) \ | ||
47 | __o2info_set_request_error((struct ocfs2_info_request *)&(a), b) | ||
48 | |||
29 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) | 49 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) |
30 | { | 50 | { |
31 | int status; | 51 | int status; |
@@ -109,6 +129,328 @@ bail: | |||
109 | return status; | 129 | return status; |
110 | } | 130 | } |
111 | 131 | ||
132 | int ocfs2_info_handle_blocksize(struct inode *inode, | ||
133 | struct ocfs2_info_request __user *req) | ||
134 | { | ||
135 | int status = -EFAULT; | ||
136 | struct ocfs2_info_blocksize oib; | ||
137 | |||
138 | if (o2info_from_user(oib, req)) | ||
139 | goto bail; | ||
140 | |||
141 | oib.ib_blocksize = inode->i_sb->s_blocksize; | ||
142 | oib.ib_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
143 | |||
144 | if (o2info_to_user(oib, req)) | ||
145 | goto bail; | ||
146 | |||
147 | status = 0; | ||
148 | bail: | ||
149 | if (status) | ||
150 | o2info_set_request_error(oib, req); | ||
151 | |||
152 | return status; | ||
153 | } | ||
154 | |||
155 | int ocfs2_info_handle_clustersize(struct inode *inode, | ||
156 | struct ocfs2_info_request __user *req) | ||
157 | { | ||
158 | int status = -EFAULT; | ||
159 | struct ocfs2_info_clustersize oic; | ||
160 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
161 | |||
162 | if (o2info_from_user(oic, req)) | ||
163 | goto bail; | ||
164 | |||
165 | oic.ic_clustersize = osb->s_clustersize; | ||
166 | oic.ic_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
167 | |||
168 | if (o2info_to_user(oic, req)) | ||
169 | goto bail; | ||
170 | |||
171 | status = 0; | ||
172 | bail: | ||
173 | if (status) | ||
174 | o2info_set_request_error(oic, req); | ||
175 | |||
176 | return status; | ||
177 | } | ||
178 | |||
179 | int ocfs2_info_handle_maxslots(struct inode *inode, | ||
180 | struct ocfs2_info_request __user *req) | ||
181 | { | ||
182 | int status = -EFAULT; | ||
183 | struct ocfs2_info_maxslots oim; | ||
184 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
185 | |||
186 | if (o2info_from_user(oim, req)) | ||
187 | goto bail; | ||
188 | |||
189 | oim.im_max_slots = osb->max_slots; | ||
190 | oim.im_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
191 | |||
192 | if (o2info_to_user(oim, req)) | ||
193 | goto bail; | ||
194 | |||
195 | status = 0; | ||
196 | bail: | ||
197 | if (status) | ||
198 | o2info_set_request_error(oim, req); | ||
199 | |||
200 | return status; | ||
201 | } | ||
202 | |||
203 | int ocfs2_info_handle_label(struct inode *inode, | ||
204 | struct ocfs2_info_request __user *req) | ||
205 | { | ||
206 | int status = -EFAULT; | ||
207 | struct ocfs2_info_label oil; | ||
208 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
209 | |||
210 | if (o2info_from_user(oil, req)) | ||
211 | goto bail; | ||
212 | |||
213 | memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); | ||
214 | oil.il_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
215 | |||
216 | if (o2info_to_user(oil, req)) | ||
217 | goto bail; | ||
218 | |||
219 | status = 0; | ||
220 | bail: | ||
221 | if (status) | ||
222 | o2info_set_request_error(oil, req); | ||
223 | |||
224 | return status; | ||
225 | } | ||
226 | |||
227 | int ocfs2_info_handle_uuid(struct inode *inode, | ||
228 | struct ocfs2_info_request __user *req) | ||
229 | { | ||
230 | int status = -EFAULT; | ||
231 | struct ocfs2_info_uuid oiu; | ||
232 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
233 | |||
234 | if (o2info_from_user(oiu, req)) | ||
235 | goto bail; | ||
236 | |||
237 | memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); | ||
238 | oiu.iu_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
239 | |||
240 | if (o2info_to_user(oiu, req)) | ||
241 | goto bail; | ||
242 | |||
243 | status = 0; | ||
244 | bail: | ||
245 | if (status) | ||
246 | o2info_set_request_error(oiu, req); | ||
247 | |||
248 | return status; | ||
249 | } | ||
250 | |||
251 | int ocfs2_info_handle_fs_features(struct inode *inode, | ||
252 | struct ocfs2_info_request __user *req) | ||
253 | { | ||
254 | int status = -EFAULT; | ||
255 | struct ocfs2_info_fs_features oif; | ||
256 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
257 | |||
258 | if (o2info_from_user(oif, req)) | ||
259 | goto bail; | ||
260 | |||
261 | oif.if_compat_features = osb->s_feature_compat; | ||
262 | oif.if_incompat_features = osb->s_feature_incompat; | ||
263 | oif.if_ro_compat_features = osb->s_feature_ro_compat; | ||
264 | oif.if_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
265 | |||
266 | if (o2info_to_user(oif, req)) | ||
267 | goto bail; | ||
268 | |||
269 | status = 0; | ||
270 | bail: | ||
271 | if (status) | ||
272 | o2info_set_request_error(oif, req); | ||
273 | |||
274 | return status; | ||
275 | } | ||
276 | |||
277 | int ocfs2_info_handle_journal_size(struct inode *inode, | ||
278 | struct ocfs2_info_request __user *req) | ||
279 | { | ||
280 | int status = -EFAULT; | ||
281 | struct ocfs2_info_journal_size oij; | ||
282 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
283 | |||
284 | if (o2info_from_user(oij, req)) | ||
285 | goto bail; | ||
286 | |||
287 | oij.ij_journal_size = osb->journal->j_inode->i_size; | ||
288 | |||
289 | oij.ij_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
290 | |||
291 | if (o2info_to_user(oij, req)) | ||
292 | goto bail; | ||
293 | |||
294 | status = 0; | ||
295 | bail: | ||
296 | if (status) | ||
297 | o2info_set_request_error(oij, req); | ||
298 | |||
299 | return status; | ||
300 | } | ||
301 | |||
302 | int ocfs2_info_handle_unknown(struct inode *inode, | ||
303 | struct ocfs2_info_request __user *req) | ||
304 | { | ||
305 | int status = -EFAULT; | ||
306 | struct ocfs2_info_request oir; | ||
307 | |||
308 | if (o2info_from_user(oir, req)) | ||
309 | goto bail; | ||
310 | |||
311 | oir.ir_flags &= ~OCFS2_INFO_FL_FILLED; | ||
312 | |||
313 | if (o2info_to_user(oir, req)) | ||
314 | goto bail; | ||
315 | |||
316 | status = 0; | ||
317 | bail: | ||
318 | if (status) | ||
319 | o2info_set_request_error(oir, req); | ||
320 | |||
321 | return status; | ||
322 | } | ||
323 | |||
324 | /* | ||
325 | * Validate and distinguish OCFS2_IOC_INFO requests. | ||
326 | * | ||
327 | * - validate the magic number. | ||
328 | * - distinguish different requests. | ||
329 | * - validate size of different requests. | ||
330 | */ | ||
331 | int ocfs2_info_handle_request(struct inode *inode, | ||
332 | struct ocfs2_info_request __user *req) | ||
333 | { | ||
334 | int status = -EFAULT; | ||
335 | struct ocfs2_info_request oir; | ||
336 | |||
337 | if (o2info_from_user(oir, req)) | ||
338 | goto bail; | ||
339 | |||
340 | status = -EINVAL; | ||
341 | if (oir.ir_magic != OCFS2_INFO_MAGIC) | ||
342 | goto bail; | ||
343 | |||
344 | switch (oir.ir_code) { | ||
345 | case OCFS2_INFO_BLOCKSIZE: | ||
346 | if (oir.ir_size == sizeof(struct ocfs2_info_blocksize)) | ||
347 | status = ocfs2_info_handle_blocksize(inode, req); | ||
348 | break; | ||
349 | case OCFS2_INFO_CLUSTERSIZE: | ||
350 | if (oir.ir_size == sizeof(struct ocfs2_info_clustersize)) | ||
351 | status = ocfs2_info_handle_clustersize(inode, req); | ||
352 | break; | ||
353 | case OCFS2_INFO_MAXSLOTS: | ||
354 | if (oir.ir_size == sizeof(struct ocfs2_info_maxslots)) | ||
355 | status = ocfs2_info_handle_maxslots(inode, req); | ||
356 | break; | ||
357 | case OCFS2_INFO_LABEL: | ||
358 | if (oir.ir_size == sizeof(struct ocfs2_info_label)) | ||
359 | status = ocfs2_info_handle_label(inode, req); | ||
360 | break; | ||
361 | case OCFS2_INFO_UUID: | ||
362 | if (oir.ir_size == sizeof(struct ocfs2_info_uuid)) | ||
363 | status = ocfs2_info_handle_uuid(inode, req); | ||
364 | break; | ||
365 | case OCFS2_INFO_FS_FEATURES: | ||
366 | if (oir.ir_size == sizeof(struct ocfs2_info_fs_features)) | ||
367 | status = ocfs2_info_handle_fs_features(inode, req); | ||
368 | break; | ||
369 | case OCFS2_INFO_JOURNAL_SIZE: | ||
370 | if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) | ||
371 | status = ocfs2_info_handle_journal_size(inode, req); | ||
372 | break; | ||
373 | default: | ||
374 | status = ocfs2_info_handle_unknown(inode, req); | ||
375 | break; | ||
376 | } | ||
377 | |||
378 | bail: | ||
379 | return status; | ||
380 | } | ||
381 | |||
382 | int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx, | ||
383 | u64 *req_addr, int compat_flag) | ||
384 | { | ||
385 | int status = -EFAULT; | ||
386 | u64 __user *bp = NULL; | ||
387 | |||
388 | if (compat_flag) { | ||
389 | #ifdef CONFIG_COMPAT | ||
390 | /* | ||
391 | * pointer bp stores the base address of a pointers array, | ||
392 | * which collects all addresses of separate request. | ||
393 | */ | ||
394 | bp = (u64 __user *)(unsigned long)compat_ptr(info->oi_requests); | ||
395 | #else | ||
396 | BUG(); | ||
397 | #endif | ||
398 | } else | ||
399 | bp = (u64 __user *)(unsigned long)(info->oi_requests); | ||
400 | |||
401 | if (o2info_from_user(*req_addr, bp + idx)) | ||
402 | goto bail; | ||
403 | |||
404 | status = 0; | ||
405 | bail: | ||
406 | return status; | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * OCFS2_IOC_INFO handles an array of requests passed from userspace. | ||
411 | * | ||
412 | * ocfs2_info_handle() recevies a large info aggregation, grab and | ||
413 | * validate the request count from header, then break it into small | ||
414 | * pieces, later specific handlers can handle them one by one. | ||
415 | * | ||
416 | * Idea here is to make each separate request small enough to ensure | ||
417 | * a better backward&forward compatibility, since a small piece of | ||
418 | * request will be less likely to be broken if disk layout get changed. | ||
419 | */ | ||
420 | int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, | ||
421 | int compat_flag) | ||
422 | { | ||
423 | int i, status = 0; | ||
424 | u64 req_addr; | ||
425 | struct ocfs2_info_request __user *reqp; | ||
426 | |||
427 | if ((info->oi_count > OCFS2_INFO_MAX_REQUEST) || | ||
428 | (!info->oi_requests)) { | ||
429 | status = -EINVAL; | ||
430 | goto bail; | ||
431 | } | ||
432 | |||
433 | for (i = 0; i < info->oi_count; i++) { | ||
434 | |||
435 | status = ocfs2_get_request_ptr(info, i, &req_addr, compat_flag); | ||
436 | if (status) | ||
437 | break; | ||
438 | |||
439 | reqp = (struct ocfs2_info_request *)(unsigned long)req_addr; | ||
440 | if (!reqp) { | ||
441 | status = -EINVAL; | ||
442 | goto bail; | ||
443 | } | ||
444 | |||
445 | status = ocfs2_info_handle_request(inode, reqp); | ||
446 | if (status) | ||
447 | break; | ||
448 | } | ||
449 | |||
450 | bail: | ||
451 | return status; | ||
452 | } | ||
453 | |||
112 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 454 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
113 | { | 455 | { |
114 | struct inode *inode = filp->f_path.dentry->d_inode; | 456 | struct inode *inode = filp->f_path.dentry->d_inode; |
@@ -120,6 +462,7 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
120 | struct reflink_arguments args; | 462 | struct reflink_arguments args; |
121 | const char *old_path, *new_path; | 463 | const char *old_path, *new_path; |
122 | bool preserve; | 464 | bool preserve; |
465 | struct ocfs2_info info; | ||
123 | 466 | ||
124 | switch (cmd) { | 467 | switch (cmd) { |
125 | case OCFS2_IOC_GETFLAGS: | 468 | case OCFS2_IOC_GETFLAGS: |
@@ -174,6 +517,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
174 | preserve = (args.preserve != 0); | 517 | preserve = (args.preserve != 0); |
175 | 518 | ||
176 | return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); | 519 | return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); |
520 | case OCFS2_IOC_INFO: | ||
521 | if (copy_from_user(&info, (struct ocfs2_info __user *)arg, | ||
522 | sizeof(struct ocfs2_info))) | ||
523 | return -EFAULT; | ||
524 | |||
525 | return ocfs2_info_handle(inode, &info, 0); | ||
177 | default: | 526 | default: |
178 | return -ENOTTY; | 527 | return -ENOTTY; |
179 | } | 528 | } |
@@ -185,6 +534,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
185 | bool preserve; | 534 | bool preserve; |
186 | struct reflink_arguments args; | 535 | struct reflink_arguments args; |
187 | struct inode *inode = file->f_path.dentry->d_inode; | 536 | struct inode *inode = file->f_path.dentry->d_inode; |
537 | struct ocfs2_info info; | ||
188 | 538 | ||
189 | switch (cmd) { | 539 | switch (cmd) { |
190 | case OCFS2_IOC32_GETFLAGS: | 540 | case OCFS2_IOC32_GETFLAGS: |
@@ -209,6 +559,12 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
209 | 559 | ||
210 | return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), | 560 | return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), |
211 | compat_ptr(args.new_path), preserve); | 561 | compat_ptr(args.new_path), preserve); |
562 | case OCFS2_IOC_INFO: | ||
563 | if (copy_from_user(&info, (struct ocfs2_info __user *)arg, | ||
564 | sizeof(struct ocfs2_info))) | ||
565 | return -EFAULT; | ||
566 | |||
567 | return ocfs2_info_handle(inode, &info, 1); | ||
212 | default: | 568 | default: |
213 | return -ENOIOCTLCMD; | 569 | return -ENOIOCTLCMD; |
214 | } | 570 | } |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 9b57c0350ff9..faa2303dbf0a 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -301,7 +301,6 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
301 | { | 301 | { |
302 | int status = 0; | 302 | int status = 0; |
303 | unsigned int flushed; | 303 | unsigned int flushed; |
304 | unsigned long old_id; | ||
305 | struct ocfs2_journal *journal = NULL; | 304 | struct ocfs2_journal *journal = NULL; |
306 | 305 | ||
307 | mlog_entry_void(); | 306 | mlog_entry_void(); |
@@ -326,7 +325,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
326 | goto finally; | 325 | goto finally; |
327 | } | 326 | } |
328 | 327 | ||
329 | old_id = ocfs2_inc_trans_id(journal); | 328 | ocfs2_inc_trans_id(journal); |
330 | 329 | ||
331 | flushed = atomic_read(&journal->j_num_trans); | 330 | flushed = atomic_read(&journal->j_num_trans); |
332 | atomic_set(&journal->j_num_trans, 0); | 331 | atomic_set(&journal->j_num_trans, 0); |
@@ -342,9 +341,6 @@ finally: | |||
342 | return status; | 341 | return status; |
343 | } | 342 | } |
344 | 343 | ||
345 | /* pass it NULL and it will allocate a new handle object for you. If | ||
346 | * you pass it a handle however, it may still return error, in which | ||
347 | * case it has free'd the passed handle for you. */ | ||
348 | handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) | 344 | handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) |
349 | { | 345 | { |
350 | journal_t *journal = osb->journal->j_journal; | 346 | journal_t *journal = osb->journal->j_journal; |
@@ -1888,6 +1884,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) | |||
1888 | 1884 | ||
1889 | os = &osb->osb_orphan_scan; | 1885 | os = &osb->osb_orphan_scan; |
1890 | 1886 | ||
1887 | mlog(0, "Begin orphan scan\n"); | ||
1888 | |||
1891 | if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) | 1889 | if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) |
1892 | goto out; | 1890 | goto out; |
1893 | 1891 | ||
@@ -1920,6 +1918,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) | |||
1920 | unlock: | 1918 | unlock: |
1921 | ocfs2_orphan_scan_unlock(osb, seqno); | 1919 | ocfs2_orphan_scan_unlock(osb, seqno); |
1922 | out: | 1920 | out: |
1921 | mlog(0, "Orphan scan completed\n"); | ||
1923 | return; | 1922 | return; |
1924 | } | 1923 | } |
1925 | 1924 | ||
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index b5baaa8e710f..43e56b97f9c0 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -67,11 +67,12 @@ struct ocfs2_journal { | |||
67 | struct buffer_head *j_bh; /* Journal disk inode block */ | 67 | struct buffer_head *j_bh; /* Journal disk inode block */ |
68 | atomic_t j_num_trans; /* Number of transactions | 68 | atomic_t j_num_trans; /* Number of transactions |
69 | * currently in the system. */ | 69 | * currently in the system. */ |
70 | spinlock_t j_lock; | ||
70 | unsigned long j_trans_id; | 71 | unsigned long j_trans_id; |
71 | struct rw_semaphore j_trans_barrier; | 72 | struct rw_semaphore j_trans_barrier; |
72 | wait_queue_head_t j_checkpointed; | 73 | wait_queue_head_t j_checkpointed; |
73 | 74 | ||
74 | spinlock_t j_lock; | 75 | /* both fields protected by j_lock*/ |
75 | struct list_head j_la_cleanups; | 76 | struct list_head j_la_cleanups; |
76 | struct work_struct j_recovery_work; | 77 | struct work_struct j_recovery_work; |
77 | }; | 78 | }; |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 4c18f4ad93b4..7e32db9c2c99 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -59,10 +59,11 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) | |||
59 | return ret; | 59 | return ret; |
60 | } | 60 | } |
61 | 61 | ||
62 | static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, | 62 | static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, |
63 | struct page *page) | 63 | struct page *page) |
64 | { | 64 | { |
65 | int ret; | 65 | int ret; |
66 | struct inode *inode = file->f_path.dentry->d_inode; | ||
66 | struct address_space *mapping = inode->i_mapping; | 67 | struct address_space *mapping = inode->i_mapping; |
67 | loff_t pos = page_offset(page); | 68 | loff_t pos = page_offset(page); |
68 | unsigned int len = PAGE_CACHE_SIZE; | 69 | unsigned int len = PAGE_CACHE_SIZE; |
@@ -111,7 +112,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, | |||
111 | if (page->index == last_index) | 112 | if (page->index == last_index) |
112 | len = ((size - 1) & ~PAGE_CACHE_MASK) + 1; | 113 | len = ((size - 1) & ~PAGE_CACHE_MASK) + 1; |
113 | 114 | ||
114 | ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, | 115 | ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page, |
115 | &fsdata, di_bh, page); | 116 | &fsdata, di_bh, page); |
116 | if (ret) { | 117 | if (ret) { |
117 | if (ret != -ENOSPC) | 118 | if (ret != -ENOSPC) |
@@ -159,7 +160,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
159 | */ | 160 | */ |
160 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 161 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
161 | 162 | ||
162 | ret = __ocfs2_page_mkwrite(inode, di_bh, page); | 163 | ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page); |
163 | 164 | ||
164 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 165 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
165 | 166 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index a00dda2e4f16..e7bde21149ae 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -171,7 +171,8 @@ bail_add: | |||
171 | ret = ERR_PTR(status); | 171 | ret = ERR_PTR(status); |
172 | goto bail_unlock; | 172 | goto bail_unlock; |
173 | } | 173 | } |
174 | } | 174 | } else |
175 | ocfs2_dentry_attach_gen(dentry); | ||
175 | 176 | ||
176 | bail_unlock: | 177 | bail_unlock: |
177 | /* Don't drop the cluster lock until *after* the d_add -- | 178 | /* Don't drop the cluster lock until *after* the d_add -- |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index c67003b6b5a2..d8408217e3bd 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -150,26 +150,33 @@ typedef void (*ocfs2_lock_callback)(int status, unsigned long data); | |||
150 | struct ocfs2_lock_res { | 150 | struct ocfs2_lock_res { |
151 | void *l_priv; | 151 | void *l_priv; |
152 | struct ocfs2_lock_res_ops *l_ops; | 152 | struct ocfs2_lock_res_ops *l_ops; |
153 | spinlock_t l_lock; | 153 | |
154 | 154 | ||
155 | struct list_head l_blocked_list; | 155 | struct list_head l_blocked_list; |
156 | struct list_head l_mask_waiters; | 156 | struct list_head l_mask_waiters; |
157 | 157 | ||
158 | enum ocfs2_lock_type l_type; | ||
159 | unsigned long l_flags; | 158 | unsigned long l_flags; |
160 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; | 159 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; |
161 | int l_level; | ||
162 | unsigned int l_ro_holders; | 160 | unsigned int l_ro_holders; |
163 | unsigned int l_ex_holders; | 161 | unsigned int l_ex_holders; |
164 | struct ocfs2_dlm_lksb l_lksb; | 162 | unsigned char l_level; |
163 | |||
164 | /* Data packed - type enum ocfs2_lock_type */ | ||
165 | unsigned char l_type; | ||
165 | 166 | ||
166 | /* used from AST/BAST funcs. */ | 167 | /* used from AST/BAST funcs. */ |
167 | enum ocfs2_ast_action l_action; | 168 | /* Data packed - enum type ocfs2_ast_action */ |
168 | enum ocfs2_unlock_action l_unlock_action; | 169 | unsigned char l_action; |
169 | int l_requested; | 170 | /* Data packed - enum type ocfs2_unlock_action */ |
170 | int l_blocking; | 171 | unsigned char l_unlock_action; |
172 | unsigned char l_requested; | ||
173 | unsigned char l_blocking; | ||
171 | unsigned int l_pending_gen; | 174 | unsigned int l_pending_gen; |
172 | 175 | ||
176 | spinlock_t l_lock; | ||
177 | |||
178 | struct ocfs2_dlm_lksb l_lksb; | ||
179 | |||
173 | wait_queue_head_t l_event; | 180 | wait_queue_head_t l_event; |
174 | 181 | ||
175 | struct list_head l_debug_list; | 182 | struct list_head l_debug_list; |
@@ -243,7 +250,7 @@ enum ocfs2_local_alloc_state | |||
243 | 250 | ||
244 | enum ocfs2_mount_options | 251 | enum ocfs2_mount_options |
245 | { | 252 | { |
246 | OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Heartbeat started in local mode */ | 253 | OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */ |
247 | OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ | 254 | OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ |
248 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ | 255 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ |
249 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | 256 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ |
@@ -256,6 +263,10 @@ enum ocfs2_mount_options | |||
256 | control lists */ | 263 | control lists */ |
257 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ | 264 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ |
258 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ | 265 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ |
266 | OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12, /* Allow concurrent O_DIRECT | ||
267 | writes */ | ||
268 | OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */ | ||
269 | OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ | ||
259 | }; | 270 | }; |
260 | 271 | ||
261 | #define OCFS2_OSB_SOFT_RO 0x0001 | 272 | #define OCFS2_OSB_SOFT_RO 0x0001 |
@@ -277,7 +288,8 @@ struct ocfs2_super | |||
277 | struct super_block *sb; | 288 | struct super_block *sb; |
278 | struct inode *root_inode; | 289 | struct inode *root_inode; |
279 | struct inode *sys_root_inode; | 290 | struct inode *sys_root_inode; |
280 | struct inode *system_inodes[NUM_SYSTEM_INODES]; | 291 | struct inode *global_system_inodes[NUM_GLOBAL_SYSTEM_INODES]; |
292 | struct inode **local_system_inodes; | ||
281 | 293 | ||
282 | struct ocfs2_slot_info *slot_info; | 294 | struct ocfs2_slot_info *slot_info; |
283 | 295 | ||
@@ -368,6 +380,8 @@ struct ocfs2_super | |||
368 | struct ocfs2_alloc_stats alloc_stats; | 380 | struct ocfs2_alloc_stats alloc_stats; |
369 | char dev_str[20]; /* "major,minor" of the device */ | 381 | char dev_str[20]; /* "major,minor" of the device */ |
370 | 382 | ||
383 | u8 osb_stackflags; | ||
384 | |||
371 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; | 385 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; |
372 | struct ocfs2_cluster_connection *cconn; | 386 | struct ocfs2_cluster_connection *cconn; |
373 | struct ocfs2_lock_res osb_super_lockres; | 387 | struct ocfs2_lock_res osb_super_lockres; |
@@ -601,10 +615,35 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) | |||
601 | return ret; | 615 | return ret; |
602 | } | 616 | } |
603 | 617 | ||
604 | static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) | 618 | static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb) |
605 | { | 619 | { |
606 | return (osb->s_feature_incompat & | 620 | return (osb->s_feature_incompat & |
607 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK); | 621 | (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK | |
622 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)); | ||
623 | } | ||
624 | |||
625 | static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) | ||
626 | { | ||
627 | if (ocfs2_clusterinfo_valid(osb) && | ||
628 | memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, | ||
629 | OCFS2_STACK_LABEL_LEN)) | ||
630 | return 1; | ||
631 | return 0; | ||
632 | } | ||
633 | |||
634 | static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb) | ||
635 | { | ||
636 | if (ocfs2_clusterinfo_valid(osb) && | ||
637 | !memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, | ||
638 | OCFS2_STACK_LABEL_LEN)) | ||
639 | return 1; | ||
640 | return 0; | ||
641 | } | ||
642 | |||
643 | static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) | ||
644 | { | ||
645 | return ocfs2_o2cb_stack(osb) && | ||
646 | (osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT); | ||
608 | } | 647 | } |
609 | 648 | ||
610 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) | 649 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index fa31d05e41b7..c2e4f8222e2f 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -101,7 +101,8 @@ | |||
101 | | OCFS2_FEATURE_INCOMPAT_META_ECC \ | 101 | | OCFS2_FEATURE_INCOMPAT_META_ECC \ |
102 | | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ | 102 | | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ |
103 | | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ | 103 | | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ |
104 | | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG) | 104 | | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \ |
105 | | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) | ||
105 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ | 106 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ |
106 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ | 107 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ |
107 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) | 108 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) |
@@ -170,6 +171,13 @@ | |||
170 | #define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 | 171 | #define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 |
171 | 172 | ||
172 | /* | 173 | /* |
174 | * Incompat bit to indicate useable clusterinfo with stackflags for all | ||
175 | * cluster stacks (userspace adnd o2cb). If this bit is set, | ||
176 | * INCOMPAT_USERSPACE_STACK becomes superfluous and thus should not be set. | ||
177 | */ | ||
178 | #define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000 | ||
179 | |||
180 | /* | ||
173 | * backup superblock flag is used to indicate that this volume | 181 | * backup superblock flag is used to indicate that this volume |
174 | * has backup superblocks. | 182 | * has backup superblocks. |
175 | */ | 183 | */ |
@@ -292,10 +300,13 @@ | |||
292 | #define OCFS2_VOL_UUID_LEN 16 | 300 | #define OCFS2_VOL_UUID_LEN 16 |
293 | #define OCFS2_MAX_VOL_LABEL_LEN 64 | 301 | #define OCFS2_MAX_VOL_LABEL_LEN 64 |
294 | 302 | ||
295 | /* The alternate, userspace stack fields */ | 303 | /* The cluster stack fields */ |
296 | #define OCFS2_STACK_LABEL_LEN 4 | 304 | #define OCFS2_STACK_LABEL_LEN 4 |
297 | #define OCFS2_CLUSTER_NAME_LEN 16 | 305 | #define OCFS2_CLUSTER_NAME_LEN 16 |
298 | 306 | ||
307 | /* Classic (historically speaking) cluster stack */ | ||
308 | #define OCFS2_CLASSIC_CLUSTER_STACK "o2cb" | ||
309 | |||
299 | /* Journal limits (in bytes) */ | 310 | /* Journal limits (in bytes) */ |
300 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) | 311 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) |
301 | 312 | ||
@@ -305,6 +316,11 @@ | |||
305 | */ | 316 | */ |
306 | #define OCFS2_MIN_XATTR_INLINE_SIZE 256 | 317 | #define OCFS2_MIN_XATTR_INLINE_SIZE 256 |
307 | 318 | ||
319 | /* | ||
320 | * Cluster info flags (ocfs2_cluster_info.ci_stackflags) | ||
321 | */ | ||
322 | #define OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT (0x01) | ||
323 | |||
308 | struct ocfs2_system_inode_info { | 324 | struct ocfs2_system_inode_info { |
309 | char *si_name; | 325 | char *si_name; |
310 | int si_iflags; | 326 | int si_iflags; |
@@ -322,6 +338,7 @@ enum { | |||
322 | USER_QUOTA_SYSTEM_INODE, | 338 | USER_QUOTA_SYSTEM_INODE, |
323 | GROUP_QUOTA_SYSTEM_INODE, | 339 | GROUP_QUOTA_SYSTEM_INODE, |
324 | #define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE | 340 | #define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE |
341 | #define OCFS2_FIRST_LOCAL_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE | ||
325 | ORPHAN_DIR_SYSTEM_INODE, | 342 | ORPHAN_DIR_SYSTEM_INODE, |
326 | EXTENT_ALLOC_SYSTEM_INODE, | 343 | EXTENT_ALLOC_SYSTEM_INODE, |
327 | INODE_ALLOC_SYSTEM_INODE, | 344 | INODE_ALLOC_SYSTEM_INODE, |
@@ -330,8 +347,12 @@ enum { | |||
330 | TRUNCATE_LOG_SYSTEM_INODE, | 347 | TRUNCATE_LOG_SYSTEM_INODE, |
331 | LOCAL_USER_QUOTA_SYSTEM_INODE, | 348 | LOCAL_USER_QUOTA_SYSTEM_INODE, |
332 | LOCAL_GROUP_QUOTA_SYSTEM_INODE, | 349 | LOCAL_GROUP_QUOTA_SYSTEM_INODE, |
350 | #define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE | ||
333 | NUM_SYSTEM_INODES | 351 | NUM_SYSTEM_INODES |
334 | }; | 352 | }; |
353 | #define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE | ||
354 | #define NUM_LOCAL_SYSTEM_INODES \ | ||
355 | (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE) | ||
335 | 356 | ||
336 | static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { | 357 | static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { |
337 | /* Global system inodes (single copy) */ | 358 | /* Global system inodes (single copy) */ |
@@ -360,6 +381,7 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { | |||
360 | /* Parameter passed from mount.ocfs2 to module */ | 381 | /* Parameter passed from mount.ocfs2 to module */ |
361 | #define OCFS2_HB_NONE "heartbeat=none" | 382 | #define OCFS2_HB_NONE "heartbeat=none" |
362 | #define OCFS2_HB_LOCAL "heartbeat=local" | 383 | #define OCFS2_HB_LOCAL "heartbeat=local" |
384 | #define OCFS2_HB_GLOBAL "heartbeat=global" | ||
363 | 385 | ||
364 | /* | 386 | /* |
365 | * OCFS2 directory file types. Only the low 3 bits are used. The | 387 | * OCFS2 directory file types. Only the low 3 bits are used. The |
@@ -566,9 +588,21 @@ struct ocfs2_slot_map_extended { | |||
566 | */ | 588 | */ |
567 | }; | 589 | }; |
568 | 590 | ||
591 | /* | ||
592 | * ci_stackflags is only valid if the incompat bit | ||
593 | * OCFS2_FEATURE_INCOMPAT_CLUSTERINFO is set. | ||
594 | */ | ||
569 | struct ocfs2_cluster_info { | 595 | struct ocfs2_cluster_info { |
570 | /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; | 596 | /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; |
571 | __le32 ci_reserved; | 597 | union { |
598 | __le32 ci_reserved; | ||
599 | struct { | ||
600 | __u8 ci_stackflags; | ||
601 | __u8 ci_reserved1; | ||
602 | __u8 ci_reserved2; | ||
603 | __u8 ci_reserved3; | ||
604 | }; | ||
605 | }; | ||
572 | /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; | 606 | /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; |
573 | /*18*/ | 607 | /*18*/ |
574 | }; | 608 | }; |
@@ -605,9 +639,9 @@ struct ocfs2_super_block { | |||
605 | * group header */ | 639 | * group header */ |
606 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ | 640 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ |
607 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ | 641 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ |
608 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace | 642 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Only valid if either |
609 | stack. Only valid | 643 | userspace or clusterinfo |
610 | with INCOMPAT flag. */ | 644 | INCOMPAT flag set. */ |
611 | /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size | 645 | /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size |
612 | for this fs*/ | 646 | for this fs*/ |
613 | __le16 s_reserved0; | 647 | __le16 s_reserved0; |
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 5d241505690b..b46f39bf7438 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h | |||
@@ -76,4 +76,99 @@ struct reflink_arguments { | |||
76 | }; | 76 | }; |
77 | #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) | 77 | #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) |
78 | 78 | ||
79 | /* Following definitions dedicated for ocfs2_info_request ioctls. */ | ||
80 | #define OCFS2_INFO_MAX_REQUEST (50) | ||
81 | #define OCFS2_TEXT_UUID_LEN (OCFS2_VOL_UUID_LEN * 2) | ||
82 | |||
83 | /* Magic number of all requests */ | ||
84 | #define OCFS2_INFO_MAGIC (0x4F32494E) | ||
85 | |||
86 | /* | ||
87 | * Always try to separate info request into small pieces to | ||
88 | * guarantee the backward&forward compatibility. | ||
89 | */ | ||
90 | struct ocfs2_info { | ||
91 | __u64 oi_requests; /* Array of __u64 pointers to requests */ | ||
92 | __u32 oi_count; /* Number of requests in info_requests */ | ||
93 | __u32 oi_pad; | ||
94 | }; | ||
95 | |||
96 | struct ocfs2_info_request { | ||
97 | /*00*/ __u32 ir_magic; /* Magic number */ | ||
98 | __u32 ir_code; /* Info request code */ | ||
99 | __u32 ir_size; /* Size of request */ | ||
100 | __u32 ir_flags; /* Request flags */ | ||
101 | /*10*/ /* Request specific fields */ | ||
102 | }; | ||
103 | |||
104 | struct ocfs2_info_clustersize { | ||
105 | struct ocfs2_info_request ic_req; | ||
106 | __u32 ic_clustersize; | ||
107 | __u32 ic_pad; | ||
108 | }; | ||
109 | |||
110 | struct ocfs2_info_blocksize { | ||
111 | struct ocfs2_info_request ib_req; | ||
112 | __u32 ib_blocksize; | ||
113 | __u32 ib_pad; | ||
114 | }; | ||
115 | |||
116 | struct ocfs2_info_maxslots { | ||
117 | struct ocfs2_info_request im_req; | ||
118 | __u32 im_max_slots; | ||
119 | __u32 im_pad; | ||
120 | }; | ||
121 | |||
122 | struct ocfs2_info_label { | ||
123 | struct ocfs2_info_request il_req; | ||
124 | __u8 il_label[OCFS2_MAX_VOL_LABEL_LEN]; | ||
125 | } __attribute__ ((packed)); | ||
126 | |||
127 | struct ocfs2_info_uuid { | ||
128 | struct ocfs2_info_request iu_req; | ||
129 | __u8 iu_uuid_str[OCFS2_TEXT_UUID_LEN + 1]; | ||
130 | } __attribute__ ((packed)); | ||
131 | |||
132 | struct ocfs2_info_fs_features { | ||
133 | struct ocfs2_info_request if_req; | ||
134 | __u32 if_compat_features; | ||
135 | __u32 if_incompat_features; | ||
136 | __u32 if_ro_compat_features; | ||
137 | __u32 if_pad; | ||
138 | }; | ||
139 | |||
140 | struct ocfs2_info_journal_size { | ||
141 | struct ocfs2_info_request ij_req; | ||
142 | __u64 ij_journal_size; | ||
143 | }; | ||
144 | |||
145 | /* Codes for ocfs2_info_request */ | ||
146 | enum ocfs2_info_type { | ||
147 | OCFS2_INFO_CLUSTERSIZE = 1, | ||
148 | OCFS2_INFO_BLOCKSIZE, | ||
149 | OCFS2_INFO_MAXSLOTS, | ||
150 | OCFS2_INFO_LABEL, | ||
151 | OCFS2_INFO_UUID, | ||
152 | OCFS2_INFO_FS_FEATURES, | ||
153 | OCFS2_INFO_JOURNAL_SIZE, | ||
154 | OCFS2_INFO_NUM_TYPES | ||
155 | }; | ||
156 | |||
157 | /* Flags for struct ocfs2_info_request */ | ||
158 | /* Filled by the caller */ | ||
159 | #define OCFS2_INFO_FL_NON_COHERENT (0x00000001) /* Cluster coherency not | ||
160 | required. This is a hint. | ||
161 | It is up to ocfs2 whether | ||
162 | the request can be fulfilled | ||
163 | without locking. */ | ||
164 | /* Filled by ocfs2 */ | ||
165 | #define OCFS2_INFO_FL_FILLED (0x40000000) /* Filesystem understood | ||
166 | this request and | ||
167 | filled in the answer */ | ||
168 | |||
169 | #define OCFS2_INFO_FL_ERROR (0x80000000) /* Error happened during | ||
170 | request handling. */ | ||
171 | |||
172 | #define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) | ||
173 | |||
79 | #endif /* OCFS2_IOCTL_H */ | 174 | #endif /* OCFS2_IOCTL_H */ |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index efdd75607406..b5f9160e93e9 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -49,6 +49,7 @@ | |||
49 | 49 | ||
50 | struct ocfs2_cow_context { | 50 | struct ocfs2_cow_context { |
51 | struct inode *inode; | 51 | struct inode *inode; |
52 | struct file *file; | ||
52 | u32 cow_start; | 53 | u32 cow_start; |
53 | u32 cow_len; | 54 | u32 cow_len; |
54 | struct ocfs2_extent_tree data_et; | 55 | struct ocfs2_extent_tree data_et; |
@@ -2932,13 +2933,16 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2932 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | 2933 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); |
2933 | struct page *page; | 2934 | struct page *page; |
2934 | pgoff_t page_index; | 2935 | pgoff_t page_index; |
2935 | unsigned int from, to; | 2936 | unsigned int from, to, readahead_pages; |
2936 | loff_t offset, end, map_end; | 2937 | loff_t offset, end, map_end; |
2937 | struct address_space *mapping = context->inode->i_mapping; | 2938 | struct address_space *mapping = context->inode->i_mapping; |
2938 | 2939 | ||
2939 | mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, | 2940 | mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, |
2940 | new_cluster, new_len, cpos); | 2941 | new_cluster, new_len, cpos); |
2941 | 2942 | ||
2943 | readahead_pages = | ||
2944 | (ocfs2_cow_contig_clusters(sb) << | ||
2945 | OCFS2_SB(sb)->s_clustersize_bits) >> PAGE_CACHE_SHIFT; | ||
2942 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; | 2946 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; |
2943 | end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); | 2947 | end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); |
2944 | /* | 2948 | /* |
@@ -2969,6 +2973,14 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2969 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) | 2973 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) |
2970 | BUG_ON(PageDirty(page)); | 2974 | BUG_ON(PageDirty(page)); |
2971 | 2975 | ||
2976 | if (PageReadahead(page) && context->file) { | ||
2977 | page_cache_async_readahead(mapping, | ||
2978 | &context->file->f_ra, | ||
2979 | context->file, | ||
2980 | page, page_index, | ||
2981 | readahead_pages); | ||
2982 | } | ||
2983 | |||
2972 | if (!PageUptodate(page)) { | 2984 | if (!PageUptodate(page)) { |
2973 | ret = block_read_full_page(page, ocfs2_get_block); | 2985 | ret = block_read_full_page(page, ocfs2_get_block); |
2974 | if (ret) { | 2986 | if (ret) { |
@@ -3409,12 +3421,35 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context) | |||
3409 | return ret; | 3421 | return ret; |
3410 | } | 3422 | } |
3411 | 3423 | ||
3424 | static void ocfs2_readahead_for_cow(struct inode *inode, | ||
3425 | struct file *file, | ||
3426 | u32 start, u32 len) | ||
3427 | { | ||
3428 | struct address_space *mapping; | ||
3429 | pgoff_t index; | ||
3430 | unsigned long num_pages; | ||
3431 | int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
3432 | |||
3433 | if (!file) | ||
3434 | return; | ||
3435 | |||
3436 | mapping = file->f_mapping; | ||
3437 | num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT; | ||
3438 | if (!num_pages) | ||
3439 | num_pages = 1; | ||
3440 | |||
3441 | index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT; | ||
3442 | page_cache_sync_readahead(mapping, &file->f_ra, file, | ||
3443 | index, num_pages); | ||
3444 | } | ||
3445 | |||
3412 | /* | 3446 | /* |
3413 | * Starting at cpos, try to CoW write_len clusters. Don't CoW | 3447 | * Starting at cpos, try to CoW write_len clusters. Don't CoW |
3414 | * past max_cpos. This will stop when it runs into a hole or an | 3448 | * past max_cpos. This will stop when it runs into a hole or an |
3415 | * unrefcounted extent. | 3449 | * unrefcounted extent. |
3416 | */ | 3450 | */ |
3417 | static int ocfs2_refcount_cow_hunk(struct inode *inode, | 3451 | static int ocfs2_refcount_cow_hunk(struct inode *inode, |
3452 | struct file *file, | ||
3418 | struct buffer_head *di_bh, | 3453 | struct buffer_head *di_bh, |
3419 | u32 cpos, u32 write_len, u32 max_cpos) | 3454 | u32 cpos, u32 write_len, u32 max_cpos) |
3420 | { | 3455 | { |
@@ -3443,6 +3478,8 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, | |||
3443 | 3478 | ||
3444 | BUG_ON(cow_len == 0); | 3479 | BUG_ON(cow_len == 0); |
3445 | 3480 | ||
3481 | ocfs2_readahead_for_cow(inode, file, cow_start, cow_len); | ||
3482 | |||
3446 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); | 3483 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); |
3447 | if (!context) { | 3484 | if (!context) { |
3448 | ret = -ENOMEM; | 3485 | ret = -ENOMEM; |
@@ -3464,6 +3501,7 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, | |||
3464 | context->ref_root_bh = ref_root_bh; | 3501 | context->ref_root_bh = ref_root_bh; |
3465 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; | 3502 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; |
3466 | context->get_clusters = ocfs2_di_get_clusters; | 3503 | context->get_clusters = ocfs2_di_get_clusters; |
3504 | context->file = file; | ||
3467 | 3505 | ||
3468 | ocfs2_init_dinode_extent_tree(&context->data_et, | 3506 | ocfs2_init_dinode_extent_tree(&context->data_et, |
3469 | INODE_CACHE(inode), di_bh); | 3507 | INODE_CACHE(inode), di_bh); |
@@ -3492,6 +3530,7 @@ out: | |||
3492 | * clusters between cpos and cpos+write_len are safe to modify. | 3530 | * clusters between cpos and cpos+write_len are safe to modify. |
3493 | */ | 3531 | */ |
3494 | int ocfs2_refcount_cow(struct inode *inode, | 3532 | int ocfs2_refcount_cow(struct inode *inode, |
3533 | struct file *file, | ||
3495 | struct buffer_head *di_bh, | 3534 | struct buffer_head *di_bh, |
3496 | u32 cpos, u32 write_len, u32 max_cpos) | 3535 | u32 cpos, u32 write_len, u32 max_cpos) |
3497 | { | 3536 | { |
@@ -3511,7 +3550,7 @@ int ocfs2_refcount_cow(struct inode *inode, | |||
3511 | num_clusters = write_len; | 3550 | num_clusters = write_len; |
3512 | 3551 | ||
3513 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { | 3552 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { |
3514 | ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, | 3553 | ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos, |
3515 | num_clusters, max_cpos); | 3554 | num_clusters, max_cpos); |
3516 | if (ret) { | 3555 | if (ret) { |
3517 | mlog_errno(ret); | 3556 | mlog_errno(ret); |
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 9983ba1570e2..c8ce46f7d8e3 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h | |||
@@ -21,14 +21,14 @@ struct ocfs2_refcount_tree { | |||
21 | struct rb_node rf_node; | 21 | struct rb_node rf_node; |
22 | u64 rf_blkno; | 22 | u64 rf_blkno; |
23 | u32 rf_generation; | 23 | u32 rf_generation; |
24 | struct kref rf_getcnt; | ||
24 | struct rw_semaphore rf_sem; | 25 | struct rw_semaphore rf_sem; |
25 | struct ocfs2_lock_res rf_lockres; | 26 | struct ocfs2_lock_res rf_lockres; |
26 | struct kref rf_getcnt; | ||
27 | int rf_removed; | 27 | int rf_removed; |
28 | 28 | ||
29 | /* the following 4 fields are used by caching_info. */ | 29 | /* the following 4 fields are used by caching_info. */ |
30 | struct ocfs2_caching_info rf_ci; | ||
31 | spinlock_t rf_lock; | 30 | spinlock_t rf_lock; |
31 | struct ocfs2_caching_info rf_ci; | ||
32 | struct mutex rf_io_mutex; | 32 | struct mutex rf_io_mutex; |
33 | struct super_block *rf_sb; | 33 | struct super_block *rf_sb; |
34 | }; | 34 | }; |
@@ -52,7 +52,8 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, | |||
52 | u32 clusters, | 52 | u32 clusters, |
53 | int *credits, | 53 | int *credits, |
54 | int *ref_blocks); | 54 | int *ref_blocks); |
55 | int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, | 55 | int ocfs2_refcount_cow(struct inode *inode, |
56 | struct file *filep, struct buffer_head *di_bh, | ||
56 | u32 cpos, u32 write_len, u32 max_cpos); | 57 | u32 cpos, u32 write_len, u32 max_cpos); |
57 | 58 | ||
58 | typedef int (ocfs2_post_refcount_func)(struct inode *inode, | 59 | typedef int (ocfs2_post_refcount_func)(struct inode *inode, |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index bfbd7e9e949f..ab4e0172cc1d 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
@@ -357,7 +357,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, | |||
357 | { | 357 | { |
358 | int status = 0; | 358 | int status = 0; |
359 | u64 blkno; | 359 | u64 blkno; |
360 | unsigned long long blocks, bytes; | 360 | unsigned long long blocks, bytes = 0; |
361 | unsigned int i; | 361 | unsigned int i; |
362 | struct buffer_head *bh; | 362 | struct buffer_head *bh; |
363 | 363 | ||
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 0d3049f696c5..19965b00c43c 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c | |||
@@ -283,6 +283,8 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
283 | /* for now we only have one cluster/node, make sure we see it | 283 | /* for now we only have one cluster/node, make sure we see it |
284 | * in the heartbeat universe */ | 284 | * in the heartbeat universe */ |
285 | if (!o2hb_check_local_node_heartbeating()) { | 285 | if (!o2hb_check_local_node_heartbeating()) { |
286 | if (o2hb_global_heartbeat_active()) | ||
287 | mlog(ML_ERROR, "Global heartbeat not started\n"); | ||
286 | rc = -EINVAL; | 288 | rc = -EINVAL; |
287 | goto out; | 289 | goto out; |
288 | } | 290 | } |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 849c2f0e0a0e..5fed60de7630 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -1380,6 +1380,14 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, | |||
1380 | } | 1380 | } |
1381 | 1381 | ||
1382 | le16_add_cpu(&bg->bg_free_bits_count, -num_bits); | 1382 | le16_add_cpu(&bg->bg_free_bits_count, -num_bits); |
1383 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { | ||
1384 | ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" | ||
1385 | " count %u but claims %u are freed. num_bits %d", | ||
1386 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | ||
1387 | le16_to_cpu(bg->bg_bits), | ||
1388 | le16_to_cpu(bg->bg_free_bits_count), num_bits); | ||
1389 | return -EROFS; | ||
1390 | } | ||
1383 | while(num_bits--) | 1391 | while(num_bits--) |
1384 | ocfs2_set_bit(bit_off++, bitmap); | 1392 | ocfs2_set_bit(bit_off++, bitmap); |
1385 | 1393 | ||
@@ -2419,6 +2427,14 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, | |||
2419 | (unsigned long *) undo_bg->bg_bitmap); | 2427 | (unsigned long *) undo_bg->bg_bitmap); |
2420 | } | 2428 | } |
2421 | le16_add_cpu(&bg->bg_free_bits_count, num_bits); | 2429 | le16_add_cpu(&bg->bg_free_bits_count, num_bits); |
2430 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { | ||
2431 | ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" | ||
2432 | " count %u but claims %u are freed. num_bits %d", | ||
2433 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | ||
2434 | le16_to_cpu(bg->bg_bits), | ||
2435 | le16_to_cpu(bg->bg_free_bits_count), num_bits); | ||
2436 | return -EROFS; | ||
2437 | } | ||
2422 | 2438 | ||
2423 | if (undo_fn) | 2439 | if (undo_fn) |
2424 | jbd_unlock_bh_state(group_bh); | 2440 | jbd_unlock_bh_state(group_bh); |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index fa1be1b304d1..a8a0ca44f88f 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -162,6 +162,7 @@ enum { | |||
162 | Opt_nointr, | 162 | Opt_nointr, |
163 | Opt_hb_none, | 163 | Opt_hb_none, |
164 | Opt_hb_local, | 164 | Opt_hb_local, |
165 | Opt_hb_global, | ||
165 | Opt_data_ordered, | 166 | Opt_data_ordered, |
166 | Opt_data_writeback, | 167 | Opt_data_writeback, |
167 | Opt_atime_quantum, | 168 | Opt_atime_quantum, |
@@ -177,6 +178,8 @@ enum { | |||
177 | Opt_noacl, | 178 | Opt_noacl, |
178 | Opt_usrquota, | 179 | Opt_usrquota, |
179 | Opt_grpquota, | 180 | Opt_grpquota, |
181 | Opt_coherency_buffered, | ||
182 | Opt_coherency_full, | ||
180 | Opt_resv_level, | 183 | Opt_resv_level, |
181 | Opt_dir_resv_level, | 184 | Opt_dir_resv_level, |
182 | Opt_err, | 185 | Opt_err, |
@@ -190,6 +193,7 @@ static const match_table_t tokens = { | |||
190 | {Opt_nointr, "nointr"}, | 193 | {Opt_nointr, "nointr"}, |
191 | {Opt_hb_none, OCFS2_HB_NONE}, | 194 | {Opt_hb_none, OCFS2_HB_NONE}, |
192 | {Opt_hb_local, OCFS2_HB_LOCAL}, | 195 | {Opt_hb_local, OCFS2_HB_LOCAL}, |
196 | {Opt_hb_global, OCFS2_HB_GLOBAL}, | ||
193 | {Opt_data_ordered, "data=ordered"}, | 197 | {Opt_data_ordered, "data=ordered"}, |
194 | {Opt_data_writeback, "data=writeback"}, | 198 | {Opt_data_writeback, "data=writeback"}, |
195 | {Opt_atime_quantum, "atime_quantum=%u"}, | 199 | {Opt_atime_quantum, "atime_quantum=%u"}, |
@@ -205,6 +209,8 @@ static const match_table_t tokens = { | |||
205 | {Opt_noacl, "noacl"}, | 209 | {Opt_noacl, "noacl"}, |
206 | {Opt_usrquota, "usrquota"}, | 210 | {Opt_usrquota, "usrquota"}, |
207 | {Opt_grpquota, "grpquota"}, | 211 | {Opt_grpquota, "grpquota"}, |
212 | {Opt_coherency_buffered, "coherency=buffered"}, | ||
213 | {Opt_coherency_full, "coherency=full"}, | ||
208 | {Opt_resv_level, "resv_level=%u"}, | 214 | {Opt_resv_level, "resv_level=%u"}, |
209 | {Opt_dir_resv_level, "dir_resv_level=%u"}, | 215 | {Opt_dir_resv_level, "dir_resv_level=%u"}, |
210 | {Opt_err, NULL} | 216 | {Opt_err, NULL} |
@@ -514,11 +520,11 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb) | |||
514 | 520 | ||
515 | mlog_entry_void(); | 521 | mlog_entry_void(); |
516 | 522 | ||
517 | for (i = 0; i < NUM_SYSTEM_INODES; i++) { | 523 | for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) { |
518 | inode = osb->system_inodes[i]; | 524 | inode = osb->global_system_inodes[i]; |
519 | if (inode) { | 525 | if (inode) { |
520 | iput(inode); | 526 | iput(inode); |
521 | osb->system_inodes[i] = NULL; | 527 | osb->global_system_inodes[i] = NULL; |
522 | } | 528 | } |
523 | } | 529 | } |
524 | 530 | ||
@@ -534,6 +540,20 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb) | |||
534 | osb->root_inode = NULL; | 540 | osb->root_inode = NULL; |
535 | } | 541 | } |
536 | 542 | ||
543 | if (!osb->local_system_inodes) | ||
544 | goto out; | ||
545 | |||
546 | for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) { | ||
547 | if (osb->local_system_inodes[i]) { | ||
548 | iput(osb->local_system_inodes[i]); | ||
549 | osb->local_system_inodes[i] = NULL; | ||
550 | } | ||
551 | } | ||
552 | |||
553 | kfree(osb->local_system_inodes); | ||
554 | osb->local_system_inodes = NULL; | ||
555 | |||
556 | out: | ||
537 | mlog_exit(0); | 557 | mlog_exit(0); |
538 | } | 558 | } |
539 | 559 | ||
@@ -608,6 +628,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
608 | int ret = 0; | 628 | int ret = 0; |
609 | struct mount_options parsed_options; | 629 | struct mount_options parsed_options; |
610 | struct ocfs2_super *osb = OCFS2_SB(sb); | 630 | struct ocfs2_super *osb = OCFS2_SB(sb); |
631 | u32 tmp; | ||
611 | 632 | ||
612 | lock_kernel(); | 633 | lock_kernel(); |
613 | 634 | ||
@@ -617,8 +638,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
617 | goto out; | 638 | goto out; |
618 | } | 639 | } |
619 | 640 | ||
620 | if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) != | 641 | tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | |
621 | (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) { | 642 | OCFS2_MOUNT_HB_NONE; |
643 | if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { | ||
622 | ret = -EINVAL; | 644 | ret = -EINVAL; |
623 | mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); | 645 | mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); |
624 | goto out; | 646 | goto out; |
@@ -809,23 +831,29 @@ bail: | |||
809 | 831 | ||
810 | static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) | 832 | static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) |
811 | { | 833 | { |
812 | if (ocfs2_mount_local(osb)) { | 834 | u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL; |
813 | if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { | 835 | |
836 | if (osb->s_mount_opt & hb_enabled) { | ||
837 | if (ocfs2_mount_local(osb)) { | ||
814 | mlog(ML_ERROR, "Cannot heartbeat on a locally " | 838 | mlog(ML_ERROR, "Cannot heartbeat on a locally " |
815 | "mounted device.\n"); | 839 | "mounted device.\n"); |
816 | return -EINVAL; | 840 | return -EINVAL; |
817 | } | 841 | } |
818 | } | 842 | if (ocfs2_userspace_stack(osb)) { |
819 | |||
820 | if (ocfs2_userspace_stack(osb)) { | ||
821 | if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { | ||
822 | mlog(ML_ERROR, "Userspace stack expected, but " | 843 | mlog(ML_ERROR, "Userspace stack expected, but " |
823 | "o2cb heartbeat arguments passed to mount\n"); | 844 | "o2cb heartbeat arguments passed to mount\n"); |
824 | return -EINVAL; | 845 | return -EINVAL; |
825 | } | 846 | } |
847 | if (((osb->s_mount_opt & OCFS2_MOUNT_HB_GLOBAL) && | ||
848 | !ocfs2_cluster_o2cb_global_heartbeat(osb)) || | ||
849 | ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) && | ||
850 | ocfs2_cluster_o2cb_global_heartbeat(osb))) { | ||
851 | mlog(ML_ERROR, "Mismatching o2cb heartbeat modes\n"); | ||
852 | return -EINVAL; | ||
853 | } | ||
826 | } | 854 | } |
827 | 855 | ||
828 | if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { | 856 | if (!(osb->s_mount_opt & hb_enabled)) { |
829 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && | 857 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && |
830 | !ocfs2_userspace_stack(osb)) { | 858 | !ocfs2_userspace_stack(osb)) { |
831 | mlog(ML_ERROR, "Heartbeat has to be started to mount " | 859 | mlog(ML_ERROR, "Heartbeat has to be started to mount " |
@@ -1291,6 +1319,7 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1291 | { | 1319 | { |
1292 | int status; | 1320 | int status; |
1293 | char *p; | 1321 | char *p; |
1322 | u32 tmp; | ||
1294 | 1323 | ||
1295 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, | 1324 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, |
1296 | options ? options : "(none)"); | 1325 | options ? options : "(none)"); |
@@ -1322,7 +1351,10 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1322 | mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; | 1351 | mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; |
1323 | break; | 1352 | break; |
1324 | case Opt_hb_none: | 1353 | case Opt_hb_none: |
1325 | mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL; | 1354 | mopt->mount_opt |= OCFS2_MOUNT_HB_NONE; |
1355 | break; | ||
1356 | case Opt_hb_global: | ||
1357 | mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL; | ||
1326 | break; | 1358 | break; |
1327 | case Opt_barrier: | 1359 | case Opt_barrier: |
1328 | if (match_int(&args[0], &option)) { | 1360 | if (match_int(&args[0], &option)) { |
@@ -1438,6 +1470,12 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1438 | case Opt_grpquota: | 1470 | case Opt_grpquota: |
1439 | mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; | 1471 | mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; |
1440 | break; | 1472 | break; |
1473 | case Opt_coherency_buffered: | ||
1474 | mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED; | ||
1475 | break; | ||
1476 | case Opt_coherency_full: | ||
1477 | mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED; | ||
1478 | break; | ||
1441 | case Opt_acl: | 1479 | case Opt_acl: |
1442 | mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; | 1480 | mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; |
1443 | mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; | 1481 | mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; |
@@ -1477,6 +1515,15 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1477 | } | 1515 | } |
1478 | } | 1516 | } |
1479 | 1517 | ||
1518 | /* Ensure only one heartbeat mode */ | ||
1519 | tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | | ||
1520 | OCFS2_MOUNT_HB_NONE); | ||
1521 | if (hweight32(tmp) != 1) { | ||
1522 | mlog(ML_ERROR, "Invalid heartbeat mount options\n"); | ||
1523 | status = 0; | ||
1524 | goto bail; | ||
1525 | } | ||
1526 | |||
1480 | status = 1; | 1527 | status = 1; |
1481 | 1528 | ||
1482 | bail: | 1529 | bail: |
@@ -1490,10 +1537,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1490 | unsigned long opts = osb->s_mount_opt; | 1537 | unsigned long opts = osb->s_mount_opt; |
1491 | unsigned int local_alloc_megs; | 1538 | unsigned int local_alloc_megs; |
1492 | 1539 | ||
1493 | if (opts & OCFS2_MOUNT_HB_LOCAL) | 1540 | if (opts & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL)) { |
1494 | seq_printf(s, ",_netdev,heartbeat=local"); | 1541 | seq_printf(s, ",_netdev"); |
1495 | else | 1542 | if (opts & OCFS2_MOUNT_HB_LOCAL) |
1496 | seq_printf(s, ",heartbeat=none"); | 1543 | seq_printf(s, ",%s", OCFS2_HB_LOCAL); |
1544 | else | ||
1545 | seq_printf(s, ",%s", OCFS2_HB_GLOBAL); | ||
1546 | } else | ||
1547 | seq_printf(s, ",%s", OCFS2_HB_NONE); | ||
1497 | 1548 | ||
1498 | if (opts & OCFS2_MOUNT_NOINTR) | 1549 | if (opts & OCFS2_MOUNT_NOINTR) |
1499 | seq_printf(s, ",nointr"); | 1550 | seq_printf(s, ",nointr"); |
@@ -1536,6 +1587,11 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1536 | if (opts & OCFS2_MOUNT_GRPQUOTA) | 1587 | if (opts & OCFS2_MOUNT_GRPQUOTA) |
1537 | seq_printf(s, ",grpquota"); | 1588 | seq_printf(s, ",grpquota"); |
1538 | 1589 | ||
1590 | if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED) | ||
1591 | seq_printf(s, ",coherency=buffered"); | ||
1592 | else | ||
1593 | seq_printf(s, ",coherency=full"); | ||
1594 | |||
1539 | if (opts & OCFS2_MOUNT_NOUSERXATTR) | 1595 | if (opts & OCFS2_MOUNT_NOUSERXATTR) |
1540 | seq_printf(s, ",nouser_xattr"); | 1596 | seq_printf(s, ",nouser_xattr"); |
1541 | else | 1597 | else |
@@ -1990,6 +2046,36 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu | |||
1990 | return 0; | 2046 | return 0; |
1991 | } | 2047 | } |
1992 | 2048 | ||
2049 | /* Make sure entire volume is addressable by our journal. Requires | ||
2050 | osb_clusters_at_boot to be valid and for the journal to have been | ||
2051 | initialized by ocfs2_journal_init(). */ | ||
2052 | static int ocfs2_journal_addressable(struct ocfs2_super *osb) | ||
2053 | { | ||
2054 | int status = 0; | ||
2055 | u64 max_block = | ||
2056 | ocfs2_clusters_to_blocks(osb->sb, | ||
2057 | osb->osb_clusters_at_boot) - 1; | ||
2058 | |||
2059 | /* 32-bit block number is always OK. */ | ||
2060 | if (max_block <= (u32)~0ULL) | ||
2061 | goto out; | ||
2062 | |||
2063 | /* Volume is "huge", so see if our journal is new enough to | ||
2064 | support it. */ | ||
2065 | if (!(OCFS2_HAS_COMPAT_FEATURE(osb->sb, | ||
2066 | OCFS2_FEATURE_COMPAT_JBD2_SB) && | ||
2067 | jbd2_journal_check_used_features(osb->journal->j_journal, 0, 0, | ||
2068 | JBD2_FEATURE_INCOMPAT_64BIT))) { | ||
2069 | mlog(ML_ERROR, "The journal cannot address the entire volume. " | ||
2070 | "Enable the 'block64' journal option with tunefs.ocfs2"); | ||
2071 | status = -EFBIG; | ||
2072 | goto out; | ||
2073 | } | ||
2074 | |||
2075 | out: | ||
2076 | return status; | ||
2077 | } | ||
2078 | |||
1993 | static int ocfs2_initialize_super(struct super_block *sb, | 2079 | static int ocfs2_initialize_super(struct super_block *sb, |
1994 | struct buffer_head *bh, | 2080 | struct buffer_head *bh, |
1995 | int sector_size, | 2081 | int sector_size, |
@@ -2002,6 +2088,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2002 | struct ocfs2_journal *journal; | 2088 | struct ocfs2_journal *journal; |
2003 | __le32 uuid_net_key; | 2089 | __le32 uuid_net_key; |
2004 | struct ocfs2_super *osb; | 2090 | struct ocfs2_super *osb; |
2091 | u64 total_blocks; | ||
2005 | 2092 | ||
2006 | mlog_entry_void(); | 2093 | mlog_entry_void(); |
2007 | 2094 | ||
@@ -2060,6 +2147,15 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2060 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", | 2147 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", |
2061 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 2148 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
2062 | 2149 | ||
2150 | osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); | ||
2151 | if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { | ||
2152 | mlog(ML_ERROR, "Invalid number of node slots (%u)\n", | ||
2153 | osb->max_slots); | ||
2154 | status = -EINVAL; | ||
2155 | goto bail; | ||
2156 | } | ||
2157 | mlog(0, "max_slots for this device: %u\n", osb->max_slots); | ||
2158 | |||
2063 | ocfs2_orphan_scan_init(osb); | 2159 | ocfs2_orphan_scan_init(osb); |
2064 | 2160 | ||
2065 | status = ocfs2_recovery_init(osb); | 2161 | status = ocfs2_recovery_init(osb); |
@@ -2098,15 +2194,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2098 | goto bail; | 2194 | goto bail; |
2099 | } | 2195 | } |
2100 | 2196 | ||
2101 | osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); | ||
2102 | if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { | ||
2103 | mlog(ML_ERROR, "Invalid number of node slots (%u)\n", | ||
2104 | osb->max_slots); | ||
2105 | status = -EINVAL; | ||
2106 | goto bail; | ||
2107 | } | ||
2108 | mlog(0, "max_slots for this device: %u\n", osb->max_slots); | ||
2109 | |||
2110 | osb->slot_recovery_generations = | 2197 | osb->slot_recovery_generations = |
2111 | kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), | 2198 | kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), |
2112 | GFP_KERNEL); | 2199 | GFP_KERNEL); |
@@ -2149,7 +2236,9 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2149 | goto bail; | 2236 | goto bail; |
2150 | } | 2237 | } |
2151 | 2238 | ||
2152 | if (ocfs2_userspace_stack(osb)) { | 2239 | if (ocfs2_clusterinfo_valid(osb)) { |
2240 | osb->osb_stackflags = | ||
2241 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; | ||
2153 | memcpy(osb->osb_cluster_stack, | 2242 | memcpy(osb->osb_cluster_stack, |
2154 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, | 2243 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, |
2155 | OCFS2_STACK_LABEL_LEN); | 2244 | OCFS2_STACK_LABEL_LEN); |
@@ -2214,11 +2303,15 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2214 | goto bail; | 2303 | goto bail; |
2215 | } | 2304 | } |
2216 | 2305 | ||
2217 | if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters) - 1) | 2306 | total_blocks = ocfs2_clusters_to_blocks(osb->sb, |
2218 | > (u32)~0UL) { | 2307 | le32_to_cpu(di->i_clusters)); |
2219 | mlog(ML_ERROR, "Volume might try to write to blocks beyond " | 2308 | |
2220 | "what jbd can address in 32 bits.\n"); | 2309 | status = generic_check_addressable(osb->sb->s_blocksize_bits, |
2221 | status = -EINVAL; | 2310 | total_blocks); |
2311 | if (status) { | ||
2312 | mlog(ML_ERROR, "Volume too large " | ||
2313 | "to mount safely on this system"); | ||
2314 | status = -EFBIG; | ||
2222 | goto bail; | 2315 | goto bail; |
2223 | } | 2316 | } |
2224 | 2317 | ||
@@ -2380,6 +2473,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) | |||
2380 | goto finally; | 2473 | goto finally; |
2381 | } | 2474 | } |
2382 | 2475 | ||
2476 | /* Now that journal has been initialized, check to make sure | ||
2477 | entire volume is addressable. */ | ||
2478 | status = ocfs2_journal_addressable(osb); | ||
2479 | if (status) | ||
2480 | goto finally; | ||
2481 | |||
2383 | /* If the journal was unmounted cleanly then we don't want to | 2482 | /* If the journal was unmounted cleanly then we don't want to |
2384 | * recover anything. Otherwise, journal_load will do that | 2483 | * recover anything. Otherwise, journal_load will do that |
2385 | * dirty work for us :) */ | 2484 | * dirty work for us :) */ |
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index bfe7190cdbf1..902efb23b6a6 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c | |||
@@ -44,11 +44,6 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
44 | int type, | 44 | int type, |
45 | u32 slot); | 45 | u32 slot); |
46 | 46 | ||
47 | static inline int is_global_system_inode(int type); | ||
48 | static inline int is_in_system_inode_array(struct ocfs2_super *osb, | ||
49 | int type, | ||
50 | u32 slot); | ||
51 | |||
52 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 47 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
53 | static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; | 48 | static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; |
54 | #endif | 49 | #endif |
@@ -59,11 +54,52 @@ static inline int is_global_system_inode(int type) | |||
59 | type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; | 54 | type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; |
60 | } | 55 | } |
61 | 56 | ||
62 | static inline int is_in_system_inode_array(struct ocfs2_super *osb, | 57 | static struct inode **get_local_system_inode(struct ocfs2_super *osb, |
63 | int type, | 58 | int type, |
64 | u32 slot) | 59 | u32 slot) |
65 | { | 60 | { |
66 | return slot == osb->slot_num || is_global_system_inode(type); | 61 | int index; |
62 | struct inode **local_system_inodes, **free = NULL; | ||
63 | |||
64 | BUG_ON(slot == OCFS2_INVALID_SLOT); | ||
65 | BUG_ON(type < OCFS2_FIRST_LOCAL_SYSTEM_INODE || | ||
66 | type > OCFS2_LAST_LOCAL_SYSTEM_INODE); | ||
67 | |||
68 | spin_lock(&osb->osb_lock); | ||
69 | local_system_inodes = osb->local_system_inodes; | ||
70 | spin_unlock(&osb->osb_lock); | ||
71 | |||
72 | if (unlikely(!local_system_inodes)) { | ||
73 | local_system_inodes = kzalloc(sizeof(struct inode *) * | ||
74 | NUM_LOCAL_SYSTEM_INODES * | ||
75 | osb->max_slots, | ||
76 | GFP_NOFS); | ||
77 | if (!local_system_inodes) { | ||
78 | mlog_errno(-ENOMEM); | ||
79 | /* | ||
80 | * return NULL here so that ocfs2_get_sytem_file_inodes | ||
81 | * will try to create an inode and use it. We will try | ||
82 | * to initialize local_system_inodes next time. | ||
83 | */ | ||
84 | return NULL; | ||
85 | } | ||
86 | |||
87 | spin_lock(&osb->osb_lock); | ||
88 | if (osb->local_system_inodes) { | ||
89 | /* Someone has initialized it for us. */ | ||
90 | free = local_system_inodes; | ||
91 | local_system_inodes = osb->local_system_inodes; | ||
92 | } else | ||
93 | osb->local_system_inodes = local_system_inodes; | ||
94 | spin_unlock(&osb->osb_lock); | ||
95 | if (unlikely(free)) | ||
96 | kfree(free); | ||
97 | } | ||
98 | |||
99 | index = (slot * NUM_LOCAL_SYSTEM_INODES) + | ||
100 | (type - OCFS2_FIRST_LOCAL_SYSTEM_INODE); | ||
101 | |||
102 | return &local_system_inodes[index]; | ||
67 | } | 103 | } |
68 | 104 | ||
69 | struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, | 105 | struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, |
@@ -74,8 +110,10 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
74 | struct inode **arr = NULL; | 110 | struct inode **arr = NULL; |
75 | 111 | ||
76 | /* avoid the lookup if cached in local system file array */ | 112 | /* avoid the lookup if cached in local system file array */ |
77 | if (is_in_system_inode_array(osb, type, slot)) | 113 | if (is_global_system_inode(type)) { |
78 | arr = &(osb->system_inodes[type]); | 114 | arr = &(osb->global_system_inodes[type]); |
115 | } else | ||
116 | arr = get_local_system_inode(osb, type, slot); | ||
79 | 117 | ||
80 | if (arr && ((inode = *arr) != NULL)) { | 118 | if (arr && ((inode = *arr) != NULL)) { |
81 | /* get a ref in addition to the array ref */ | 119 | /* get a ref in addition to the array ref */ |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 06fa5e77c40e..67cd43914641 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -7081,7 +7081,7 @@ static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, | |||
7081 | goto out; | 7081 | goto out; |
7082 | } | 7082 | } |
7083 | 7083 | ||
7084 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) | 7084 | if (!indexed) |
7085 | ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); | 7085 | ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); |
7086 | else | 7086 | else |
7087 | ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); | 7087 | ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 3168dcfb94f2..7d6f18fddfdb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -2378,6 +2378,8 @@ extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, | |||
2378 | 2378 | ||
2379 | extern int generic_file_fsync(struct file *, int); | 2379 | extern int generic_file_fsync(struct file *, int); |
2380 | 2380 | ||
2381 | extern int generic_check_addressable(unsigned, u64); | ||
2382 | |||
2381 | #ifdef CONFIG_MIGRATION | 2383 | #ifdef CONFIG_MIGRATION |
2382 | extern int buffer_migrate_page(struct address_space *, | 2384 | extern int buffer_migrate_page(struct address_space *, |
2383 | struct page *, struct page *); | 2385 | struct page *, struct page *); |