diff options
author | Mark Fasheh <mark.fasheh@oracle.com> | 2005-12-15 17:31:24 -0500 |
---|---|---|
committer | Joel Becker <joel.becker@oracle.com> | 2006-01-03 14:45:47 -0500 |
commit | ccd979bdbce9fba8412beb3f1de68a9d0171b12c (patch) | |
tree | c50ed941849ce06ccadd4ce27599b3ef9fdbe2ae /fs/ocfs2/ocfs2.h | |
parent | 8df08c89c668e1bd922a053fdb5ba1fadbecbb38 (diff) |
[PATCH] OCFS2: The Second Oracle Cluster Filesystem
The OCFS2 file system module.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Diffstat (limited to 'fs/ocfs2/ocfs2.h')
-rw-r--r-- | fs/ocfs2/ocfs2.h | 464 |
1 files changed, 464 insertions, 0 deletions
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h new file mode 100644 index 000000000000..f468c600cf92 --- /dev/null +++ b/fs/ocfs2/ocfs2.h | |||
@@ -0,0 +1,464 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * ocfs2.h | ||
5 | * | ||
6 | * Defines macros and structures used in OCFS2 | ||
7 | * | ||
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #ifndef OCFS2_H | ||
27 | #define OCFS2_H | ||
28 | |||
29 | #include <linux/spinlock.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/wait.h> | ||
32 | #include <linux/list.h> | ||
33 | #include <linux/rbtree.h> | ||
34 | #include <linux/workqueue.h> | ||
35 | #include <linux/kref.h> | ||
36 | |||
37 | #include "cluster/nodemanager.h" | ||
38 | #include "cluster/heartbeat.h" | ||
39 | #include "cluster/tcp.h" | ||
40 | |||
41 | #include "dlm/dlmapi.h" | ||
42 | |||
43 | #include "ocfs2_fs.h" | ||
44 | #include "endian.h" | ||
45 | #include "ocfs2_lockid.h" | ||
46 | |||
47 | struct ocfs2_extent_map { | ||
48 | u32 em_clusters; | ||
49 | struct rb_root em_extents; | ||
50 | }; | ||
51 | |||
52 | /* Most user visible OCFS2 inodes will have very few pieces of | ||
53 | * metadata, but larger files (including bitmaps, etc) must be taken | ||
54 | * into account when designing an access scheme. We allow a small | ||
55 | * amount of inlined blocks to be stored on an array and grow the | ||
56 | * structure into a rb tree when necessary. */ | ||
57 | #define OCFS2_INODE_MAX_CACHE_ARRAY 2 | ||
58 | |||
59 | struct ocfs2_caching_info { | ||
60 | unsigned int ci_num_cached; | ||
61 | union { | ||
62 | sector_t ci_array[OCFS2_INODE_MAX_CACHE_ARRAY]; | ||
63 | struct rb_root ci_tree; | ||
64 | } ci_cache; | ||
65 | }; | ||
66 | |||
67 | /* this limits us to 256 nodes | ||
68 | * if we need more, we can do a kmalloc for the map */ | ||
69 | #define OCFS2_NODE_MAP_MAX_NODES 256 | ||
70 | struct ocfs2_node_map { | ||
71 | u16 num_nodes; | ||
72 | unsigned long map[BITS_TO_LONGS(OCFS2_NODE_MAP_MAX_NODES)]; | ||
73 | }; | ||
74 | |||
75 | enum ocfs2_ast_action { | ||
76 | OCFS2_AST_INVALID = 0, | ||
77 | OCFS2_AST_ATTACH, | ||
78 | OCFS2_AST_CONVERT, | ||
79 | OCFS2_AST_DOWNCONVERT, | ||
80 | }; | ||
81 | |||
82 | /* actions for an unlockast function to take. */ | ||
83 | enum ocfs2_unlock_action { | ||
84 | OCFS2_UNLOCK_INVALID = 0, | ||
85 | OCFS2_UNLOCK_CANCEL_CONVERT, | ||
86 | OCFS2_UNLOCK_DROP_LOCK, | ||
87 | }; | ||
88 | |||
89 | /* ocfs2_lock_res->l_flags flags. */ | ||
90 | #define OCFS2_LOCK_ATTACHED (0x00000001) /* have we initialized | ||
91 | * the lvb */ | ||
92 | #define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in | ||
93 | * dlm_lock */ | ||
94 | #define OCFS2_LOCK_BLOCKED (0x00000004) /* blocked waiting to | ||
95 | * downconvert*/ | ||
96 | #define OCFS2_LOCK_LOCAL (0x00000008) /* newly created inode */ | ||
97 | #define OCFS2_LOCK_NEEDS_REFRESH (0x00000010) | ||
98 | #define OCFS2_LOCK_REFRESHING (0x00000020) | ||
99 | #define OCFS2_LOCK_INITIALIZED (0x00000040) /* track initialization | ||
100 | * for shutdown paths */ | ||
101 | #define OCFS2_LOCK_FREEING (0x00000080) /* help dlmglue track | ||
102 | * when to skip queueing | ||
103 | * a lock because it's | ||
104 | * about to be | ||
105 | * dropped. */ | ||
106 | #define OCFS2_LOCK_QUEUED (0x00000100) /* queued for downconvert */ | ||
107 | |||
108 | struct ocfs2_lock_res_ops; | ||
109 | |||
110 | typedef void (*ocfs2_lock_callback)(int status, unsigned long data); | ||
111 | |||
112 | struct ocfs2_lock_res { | ||
113 | void *l_priv; | ||
114 | struct ocfs2_lock_res_ops *l_ops; | ||
115 | spinlock_t l_lock; | ||
116 | |||
117 | struct list_head l_blocked_list; | ||
118 | struct list_head l_mask_waiters; | ||
119 | |||
120 | enum ocfs2_lock_type l_type; | ||
121 | unsigned long l_flags; | ||
122 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; | ||
123 | int l_level; | ||
124 | unsigned int l_ro_holders; | ||
125 | unsigned int l_ex_holders; | ||
126 | struct dlm_lockstatus l_lksb; | ||
127 | |||
128 | /* used from AST/BAST funcs. */ | ||
129 | enum ocfs2_ast_action l_action; | ||
130 | enum ocfs2_unlock_action l_unlock_action; | ||
131 | int l_requested; | ||
132 | int l_blocking; | ||
133 | |||
134 | wait_queue_head_t l_event; | ||
135 | |||
136 | struct list_head l_debug_list; | ||
137 | }; | ||
138 | |||
139 | struct ocfs2_dlm_debug { | ||
140 | struct kref d_refcnt; | ||
141 | struct dentry *d_locking_state; | ||
142 | struct list_head d_lockres_tracking; | ||
143 | }; | ||
144 | |||
145 | enum ocfs2_vol_state | ||
146 | { | ||
147 | VOLUME_INIT = 0, | ||
148 | VOLUME_MOUNTED, | ||
149 | VOLUME_DISMOUNTED, | ||
150 | VOLUME_DISABLED | ||
151 | }; | ||
152 | |||
153 | struct ocfs2_alloc_stats | ||
154 | { | ||
155 | atomic_t moves; | ||
156 | atomic_t local_data; | ||
157 | atomic_t bitmap_data; | ||
158 | atomic_t bg_allocs; | ||
159 | atomic_t bg_extends; | ||
160 | }; | ||
161 | |||
162 | enum ocfs2_local_alloc_state | ||
163 | { | ||
164 | OCFS2_LA_UNUSED = 0, | ||
165 | OCFS2_LA_ENABLED, | ||
166 | OCFS2_LA_DISABLED | ||
167 | }; | ||
168 | |||
169 | enum ocfs2_mount_options | ||
170 | { | ||
171 | OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Heartbeat started in local mode */ | ||
172 | OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ | ||
173 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ | ||
174 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | ||
175 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ | ||
176 | #ifdef OCFS2_ORACORE_WORKAROUNDS | ||
177 | OCFS2_MOUNT_COMPAT_OCFS = 1 << 30, /* ocfs1 compatibility mode */ | ||
178 | #endif | ||
179 | }; | ||
180 | |||
181 | #define OCFS2_OSB_SOFT_RO 0x0001 | ||
182 | #define OCFS2_OSB_HARD_RO 0x0002 | ||
183 | #define OCFS2_OSB_ERROR_FS 0x0004 | ||
184 | |||
185 | struct ocfs2_journal; | ||
186 | struct ocfs2_journal_handle; | ||
187 | struct ocfs2_super | ||
188 | { | ||
189 | u32 osb_id; /* id used by the proc interface */ | ||
190 | struct task_struct *commit_task; | ||
191 | struct super_block *sb; | ||
192 | struct inode *root_inode; | ||
193 | struct inode *sys_root_inode; | ||
194 | struct inode *system_inodes[NUM_SYSTEM_INODES]; | ||
195 | |||
196 | struct ocfs2_slot_info *slot_info; | ||
197 | |||
198 | spinlock_t node_map_lock; | ||
199 | struct ocfs2_node_map mounted_map; | ||
200 | struct ocfs2_node_map recovery_map; | ||
201 | struct ocfs2_node_map umount_map; | ||
202 | |||
203 | u32 num_clusters; | ||
204 | u64 root_blkno; | ||
205 | u64 system_dir_blkno; | ||
206 | u64 bitmap_blkno; | ||
207 | u32 bitmap_cpg; | ||
208 | u8 *uuid; | ||
209 | char *uuid_str; | ||
210 | u8 *vol_label; | ||
211 | u64 first_cluster_group_blkno; | ||
212 | u32 fs_generation; | ||
213 | |||
214 | u32 s_feature_compat; | ||
215 | u32 s_feature_incompat; | ||
216 | u32 s_feature_ro_compat; | ||
217 | |||
218 | /* Protects s_next_generaion, osb_flags. Could protect more on | ||
219 | * osb as it's very short lived. */ | ||
220 | spinlock_t osb_lock; | ||
221 | u32 s_next_generation; | ||
222 | unsigned long osb_flags; | ||
223 | |||
224 | unsigned long s_mount_opt; | ||
225 | |||
226 | u16 max_slots; | ||
227 | u16 num_nodes; | ||
228 | s16 node_num; | ||
229 | s16 slot_num; | ||
230 | int s_sectsize_bits; | ||
231 | int s_clustersize; | ||
232 | int s_clustersize_bits; | ||
233 | struct proc_dir_entry *proc_sub_dir; /* points to /proc/fs/ocfs2/<maj_min> */ | ||
234 | |||
235 | atomic_t vol_state; | ||
236 | struct semaphore recovery_lock; | ||
237 | struct task_struct *recovery_thread_task; | ||
238 | int disable_recovery; | ||
239 | wait_queue_head_t checkpoint_event; | ||
240 | atomic_t needs_checkpoint; | ||
241 | struct ocfs2_journal *journal; | ||
242 | |||
243 | enum ocfs2_local_alloc_state local_alloc_state; | ||
244 | struct buffer_head *local_alloc_bh; | ||
245 | |||
246 | /* Next two fields are for local node slot recovery during | ||
247 | * mount. */ | ||
248 | int dirty; | ||
249 | struct ocfs2_dinode *local_alloc_copy; | ||
250 | |||
251 | struct ocfs2_alloc_stats alloc_stats; | ||
252 | char dev_str[20]; /* "major,minor" of the device */ | ||
253 | |||
254 | struct dlm_ctxt *dlm; | ||
255 | struct ocfs2_lock_res osb_super_lockres; | ||
256 | struct ocfs2_lock_res osb_rename_lockres; | ||
257 | struct dlm_eviction_cb osb_eviction_cb; | ||
258 | struct ocfs2_dlm_debug *osb_dlm_debug; | ||
259 | |||
260 | struct dentry *osb_debug_root; | ||
261 | |||
262 | wait_queue_head_t recovery_event; | ||
263 | |||
264 | spinlock_t vote_task_lock; | ||
265 | struct task_struct *vote_task; | ||
266 | wait_queue_head_t vote_event; | ||
267 | unsigned long vote_wake_sequence; | ||
268 | unsigned long vote_work_sequence; | ||
269 | |||
270 | struct list_head blocked_lock_list; | ||
271 | unsigned long blocked_lock_count; | ||
272 | |||
273 | struct list_head vote_list; | ||
274 | int vote_count; | ||
275 | |||
276 | u32 net_key; | ||
277 | spinlock_t net_response_lock; | ||
278 | unsigned int net_response_ids; | ||
279 | struct list_head net_response_list; | ||
280 | |||
281 | struct o2hb_callback_func osb_hb_up; | ||
282 | struct o2hb_callback_func osb_hb_down; | ||
283 | |||
284 | struct list_head osb_net_handlers; | ||
285 | |||
286 | wait_queue_head_t osb_mount_event; | ||
287 | |||
288 | /* Truncate log info */ | ||
289 | struct inode *osb_tl_inode; | ||
290 | struct buffer_head *osb_tl_bh; | ||
291 | struct work_struct osb_truncate_log_wq; | ||
292 | }; | ||
293 | |||
294 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) | ||
295 | #define OCFS2_MAX_OSB_ID 65536 | ||
296 | |||
297 | static inline int ocfs2_should_order_data(struct inode *inode) | ||
298 | { | ||
299 | if (!S_ISREG(inode->i_mode)) | ||
300 | return 0; | ||
301 | if (OCFS2_SB(inode->i_sb)->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) | ||
302 | return 0; | ||
303 | return 1; | ||
304 | } | ||
305 | |||
306 | /* set / clear functions because cluster events can make these happen | ||
307 | * in parallel so we want the transitions to be atomic. this also | ||
308 | * means that any future flags osb_flags must be protected by spinlock | ||
309 | * too! */ | ||
310 | static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb, | ||
311 | unsigned long flag) | ||
312 | { | ||
313 | spin_lock(&osb->osb_lock); | ||
314 | osb->osb_flags |= flag; | ||
315 | spin_unlock(&osb->osb_lock); | ||
316 | } | ||
317 | |||
318 | static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, | ||
319 | int hard) | ||
320 | { | ||
321 | spin_lock(&osb->osb_lock); | ||
322 | osb->osb_flags &= ~(OCFS2_OSB_SOFT_RO|OCFS2_OSB_HARD_RO); | ||
323 | if (hard) | ||
324 | osb->osb_flags |= OCFS2_OSB_HARD_RO; | ||
325 | else | ||
326 | osb->osb_flags |= OCFS2_OSB_SOFT_RO; | ||
327 | spin_unlock(&osb->osb_lock); | ||
328 | } | ||
329 | |||
330 | static inline int ocfs2_is_hard_readonly(struct ocfs2_super *osb) | ||
331 | { | ||
332 | int ret; | ||
333 | |||
334 | spin_lock(&osb->osb_lock); | ||
335 | ret = osb->osb_flags & OCFS2_OSB_HARD_RO; | ||
336 | spin_unlock(&osb->osb_lock); | ||
337 | |||
338 | return ret; | ||
339 | } | ||
340 | |||
341 | static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) | ||
342 | { | ||
343 | int ret; | ||
344 | |||
345 | spin_lock(&osb->osb_lock); | ||
346 | ret = osb->osb_flags & OCFS2_OSB_SOFT_RO; | ||
347 | spin_unlock(&osb->osb_lock); | ||
348 | |||
349 | return ret; | ||
350 | } | ||
351 | |||
352 | #define OCFS2_IS_VALID_DINODE(ptr) \ | ||
353 | (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) | ||
354 | |||
355 | #define OCFS2_RO_ON_INVALID_DINODE(__sb, __di) do { \ | ||
356 | typeof(__di) ____di = (__di); \ | ||
357 | ocfs2_error((__sb), \ | ||
358 | "Dinode # %"MLFu64" has bad signature %.*s", \ | ||
359 | (____di)->i_blkno, 7, \ | ||
360 | (____di)->i_signature); \ | ||
361 | } while (0); | ||
362 | |||
363 | #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ | ||
364 | (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) | ||
365 | |||
366 | #define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb) do { \ | ||
367 | typeof(__eb) ____eb = (__eb); \ | ||
368 | ocfs2_error((__sb), \ | ||
369 | "Extent Block # %"MLFu64" has bad signature %.*s", \ | ||
370 | (____eb)->h_blkno, 7, \ | ||
371 | (____eb)->h_signature); \ | ||
372 | } while (0); | ||
373 | |||
374 | #define OCFS2_IS_VALID_GROUP_DESC(ptr) \ | ||
375 | (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) | ||
376 | |||
377 | #define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd) do { \ | ||
378 | typeof(__gd) ____gd = (__gd); \ | ||
379 | ocfs2_error((__sb), \ | ||
380 | "Group Descriptor # %"MLFu64" has bad signature %.*s", \ | ||
381 | (____gd)->bg_blkno, 7, \ | ||
382 | (____gd)->bg_signature); \ | ||
383 | } while (0); | ||
384 | |||
385 | static inline unsigned long ino_from_blkno(struct super_block *sb, | ||
386 | u64 blkno) | ||
387 | { | ||
388 | return (unsigned long)(blkno & (u64)ULONG_MAX); | ||
389 | } | ||
390 | |||
391 | static inline u64 ocfs2_clusters_to_blocks(struct super_block *sb, | ||
392 | u32 clusters) | ||
393 | { | ||
394 | int c_to_b_bits = OCFS2_SB(sb)->s_clustersize_bits - | ||
395 | sb->s_blocksize_bits; | ||
396 | |||
397 | return (u64)clusters << c_to_b_bits; | ||
398 | } | ||
399 | |||
400 | static inline u32 ocfs2_blocks_to_clusters(struct super_block *sb, | ||
401 | u64 blocks) | ||
402 | { | ||
403 | int b_to_c_bits = OCFS2_SB(sb)->s_clustersize_bits - | ||
404 | sb->s_blocksize_bits; | ||
405 | |||
406 | return (u32)(blocks >> b_to_c_bits); | ||
407 | } | ||
408 | |||
409 | static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb, | ||
410 | u64 bytes) | ||
411 | { | ||
412 | int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; | ||
413 | unsigned int clusters; | ||
414 | |||
415 | bytes += OCFS2_SB(sb)->s_clustersize - 1; | ||
416 | /* OCFS2 just cannot have enough clusters to overflow this */ | ||
417 | clusters = (unsigned int)(bytes >> cl_bits); | ||
418 | |||
419 | return clusters; | ||
420 | } | ||
421 | |||
422 | static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb, | ||
423 | u64 bytes) | ||
424 | { | ||
425 | bytes += sb->s_blocksize - 1; | ||
426 | return bytes >> sb->s_blocksize_bits; | ||
427 | } | ||
428 | |||
429 | static inline u64 ocfs2_clusters_to_bytes(struct super_block *sb, | ||
430 | u32 clusters) | ||
431 | { | ||
432 | return (u64)clusters << OCFS2_SB(sb)->s_clustersize_bits; | ||
433 | } | ||
434 | |||
435 | static inline u64 ocfs2_align_bytes_to_clusters(struct super_block *sb, | ||
436 | u64 bytes) | ||
437 | { | ||
438 | int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; | ||
439 | unsigned int clusters; | ||
440 | |||
441 | clusters = ocfs2_clusters_for_bytes(sb, bytes); | ||
442 | return (u64)clusters << cl_bits; | ||
443 | } | ||
444 | |||
445 | static inline u64 ocfs2_align_bytes_to_blocks(struct super_block *sb, | ||
446 | u64 bytes) | ||
447 | { | ||
448 | u64 blocks; | ||
449 | |||
450 | blocks = ocfs2_blocks_for_bytes(sb, bytes); | ||
451 | return blocks << sb->s_blocksize_bits; | ||
452 | } | ||
453 | |||
454 | static inline unsigned long ocfs2_align_bytes_to_sectors(u64 bytes) | ||
455 | { | ||
456 | return (unsigned long)((bytes + 511) >> 9); | ||
457 | } | ||
458 | |||
459 | #define ocfs2_set_bit ext2_set_bit | ||
460 | #define ocfs2_clear_bit ext2_clear_bit | ||
461 | #define ocfs2_test_bit ext2_test_bit | ||
462 | #define ocfs2_find_next_zero_bit ext2_find_next_zero_bit | ||
463 | #endif /* OCFS2_H */ | ||
464 | |||