diff options
Diffstat (limited to 'fs/ceph/super.h')
-rw-r--r-- | fs/ceph/super.h | 448 |
1 files changed, 182 insertions, 266 deletions
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b87638e84c4b..f5cabefa98dc 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _FS_CEPH_SUPER_H | 1 | #ifndef _FS_CEPH_SUPER_H |
2 | #define _FS_CEPH_SUPER_H | 2 | #define _FS_CEPH_SUPER_H |
3 | 3 | ||
4 | #include "ceph_debug.h" | 4 | #include <linux/ceph/ceph_debug.h> |
5 | 5 | ||
6 | #include <asm/unaligned.h> | 6 | #include <asm/unaligned.h> |
7 | #include <linux/backing-dev.h> | 7 | #include <linux/backing-dev.h> |
@@ -14,13 +14,7 @@ | |||
14 | #include <linux/writeback.h> | 14 | #include <linux/writeback.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | 16 | ||
17 | #include "types.h" | 17 | #include <linux/ceph/libceph.h> |
18 | #include "messenger.h" | ||
19 | #include "msgpool.h" | ||
20 | #include "mon_client.h" | ||
21 | #include "mds_client.h" | ||
22 | #include "osd_client.h" | ||
23 | #include "ceph_fs.h" | ||
24 | 18 | ||
25 | /* f_type in struct statfs */ | 19 | /* f_type in struct statfs */ |
26 | #define CEPH_SUPER_MAGIC 0x00c36400 | 20 | #define CEPH_SUPER_MAGIC 0x00c36400 |
@@ -30,42 +24,27 @@ | |||
30 | #define CEPH_BLOCK_SHIFT 20 /* 1 MB */ | 24 | #define CEPH_BLOCK_SHIFT 20 /* 1 MB */ |
31 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) | 25 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) |
32 | 26 | ||
33 | /* | 27 | #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ |
34 | * Supported features | 28 | #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ |
35 | */ | 29 | #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ |
36 | #define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK | 30 | #define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */ |
37 | #define CEPH_FEATURE_REQUIRED CEPH_FEATURE_NOSRCADDR | ||
38 | 31 | ||
39 | /* | 32 | #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) |
40 | * mount options | ||
41 | */ | ||
42 | #define CEPH_OPT_FSID (1<<0) | ||
43 | #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ | ||
44 | #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ | ||
45 | #define CEPH_OPT_DIRSTAT (1<<4) /* funky `cat dirname` for stats */ | ||
46 | #define CEPH_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ | ||
47 | #define CEPH_OPT_NOCRC (1<<6) /* no data crc on writes */ | ||
48 | #define CEPH_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ | ||
49 | 33 | ||
50 | #define CEPH_OPT_DEFAULT (CEPH_OPT_RBYTES) | 34 | #define ceph_set_mount_opt(fsc, opt) \ |
35 | (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; | ||
36 | #define ceph_test_mount_opt(fsc, opt) \ | ||
37 | (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) | ||
51 | 38 | ||
52 | #define ceph_set_opt(client, opt) \ | 39 | #define CEPH_RSIZE_DEFAULT (512*1024) /* readahead */ |
53 | (client)->mount_args->flags |= CEPH_OPT_##opt; | 40 | #define CEPH_MAX_READDIR_DEFAULT 1024 |
54 | #define ceph_test_opt(client, opt) \ | 41 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) |
55 | (!!((client)->mount_args->flags & CEPH_OPT_##opt)) | 42 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" |
56 | 43 | ||
57 | 44 | struct ceph_mount_options { | |
58 | struct ceph_mount_args { | ||
59 | int sb_flags; | ||
60 | int flags; | 45 | int flags; |
61 | struct ceph_fsid fsid; | 46 | int sb_flags; |
62 | struct ceph_entity_addr my_addr; | 47 | |
63 | int num_mon; | ||
64 | struct ceph_entity_addr *mon_addr; | ||
65 | int mount_timeout; | ||
66 | int osd_idle_ttl; | ||
67 | int osd_timeout; | ||
68 | int osd_keepalive_timeout; | ||
69 | int wsize; | 48 | int wsize; |
70 | int rsize; /* max readahead */ | 49 | int rsize; /* max readahead */ |
71 | int congestion_kb; /* max writeback in flight */ | 50 | int congestion_kb; /* max writeback in flight */ |
@@ -73,82 +52,25 @@ struct ceph_mount_args { | |||
73 | int cap_release_safety; | 52 | int cap_release_safety; |
74 | int max_readdir; /* max readdir result (entires) */ | 53 | int max_readdir; /* max readdir result (entires) */ |
75 | int max_readdir_bytes; /* max readdir result (bytes) */ | 54 | int max_readdir_bytes; /* max readdir result (bytes) */ |
76 | char *snapdir_name; /* default ".snap" */ | ||
77 | char *name; | ||
78 | char *secret; | ||
79 | }; | ||
80 | |||
81 | /* | ||
82 | * defaults | ||
83 | */ | ||
84 | #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 | ||
85 | #define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ | ||
86 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 | ||
87 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | ||
88 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ | ||
89 | #define CEPH_MAX_READDIR_DEFAULT 1024 | ||
90 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) | ||
91 | |||
92 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | ||
93 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) | ||
94 | |||
95 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" | ||
96 | #define CEPH_AUTH_NAME_DEFAULT "guest" | ||
97 | /* | ||
98 | * Delay telling the MDS we no longer want caps, in case we reopen | ||
99 | * the file. Delay a minimum amount of time, even if we send a cap | ||
100 | * message for some other reason. Otherwise, take the oppotunity to | ||
101 | * update the mds to avoid sending another message later. | ||
102 | */ | ||
103 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ | ||
104 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ | ||
105 | |||
106 | #define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) | ||
107 | |||
108 | /* mount state */ | ||
109 | enum { | ||
110 | CEPH_MOUNT_MOUNTING, | ||
111 | CEPH_MOUNT_MOUNTED, | ||
112 | CEPH_MOUNT_UNMOUNTING, | ||
113 | CEPH_MOUNT_UNMOUNTED, | ||
114 | CEPH_MOUNT_SHUTDOWN, | ||
115 | }; | ||
116 | |||
117 | /* | ||
118 | * subtract jiffies | ||
119 | */ | ||
120 | static inline unsigned long time_sub(unsigned long a, unsigned long b) | ||
121 | { | ||
122 | BUG_ON(time_after(b, a)); | ||
123 | return (long)a - (long)b; | ||
124 | } | ||
125 | 55 | ||
126 | /* | 56 | /* |
127 | * per-filesystem client state | 57 | * everything above this point can be memcmp'd; everything below |
128 | * | 58 | * is handled in compare_mount_options() |
129 | * possibly shared by multiple mount points, if they are | 59 | */ |
130 | * mounting the same ceph filesystem/cluster. | ||
131 | */ | ||
132 | struct ceph_client { | ||
133 | struct ceph_fsid fsid; | ||
134 | bool have_fsid; | ||
135 | 60 | ||
136 | struct mutex mount_mutex; /* serialize mount attempts */ | 61 | char *snapdir_name; /* default ".snap" */ |
137 | struct ceph_mount_args *mount_args; | 62 | }; |
138 | 63 | ||
64 | struct ceph_fs_client { | ||
139 | struct super_block *sb; | 65 | struct super_block *sb; |
140 | 66 | ||
141 | unsigned long mount_state; | 67 | struct ceph_mount_options *mount_options; |
142 | wait_queue_head_t auth_wq; | 68 | struct ceph_client *client; |
143 | |||
144 | int auth_err; | ||
145 | 69 | ||
70 | unsigned long mount_state; | ||
146 | int min_caps; /* min caps i added */ | 71 | int min_caps; /* min caps i added */ |
147 | 72 | ||
148 | struct ceph_messenger *msgr; /* messenger instance */ | 73 | struct ceph_mds_client *mdsc; |
149 | struct ceph_mon_client monc; | ||
150 | struct ceph_mds_client mdsc; | ||
151 | struct ceph_osd_client osdc; | ||
152 | 74 | ||
153 | /* writeback */ | 75 | /* writeback */ |
154 | mempool_t *wb_pagevec_pool; | 76 | mempool_t *wb_pagevec_pool; |
@@ -160,14 +82,14 @@ struct ceph_client { | |||
160 | struct backing_dev_info backing_dev_info; | 82 | struct backing_dev_info backing_dev_info; |
161 | 83 | ||
162 | #ifdef CONFIG_DEBUG_FS | 84 | #ifdef CONFIG_DEBUG_FS |
163 | struct dentry *debugfs_monmap; | 85 | struct dentry *debugfs_dentry_lru, *debugfs_caps; |
164 | struct dentry *debugfs_mdsmap, *debugfs_osdmap; | ||
165 | struct dentry *debugfs_dir, *debugfs_dentry_lru, *debugfs_caps; | ||
166 | struct dentry *debugfs_congestion_kb; | 86 | struct dentry *debugfs_congestion_kb; |
167 | struct dentry *debugfs_bdi; | 87 | struct dentry *debugfs_bdi; |
88 | struct dentry *debugfs_mdsc, *debugfs_mdsmap; | ||
168 | #endif | 89 | #endif |
169 | }; | 90 | }; |
170 | 91 | ||
92 | |||
171 | /* | 93 | /* |
172 | * File i/o capability. This tracks shared state with the metadata | 94 | * File i/o capability. This tracks shared state with the metadata |
173 | * server that allows us to cache or writeback attributes or to read | 95 | * server that allows us to cache or writeback attributes or to read |
@@ -275,6 +197,20 @@ struct ceph_inode_xattr { | |||
275 | int should_free_val; | 197 | int should_free_val; |
276 | }; | 198 | }; |
277 | 199 | ||
200 | /* | ||
201 | * Ceph dentry state | ||
202 | */ | ||
203 | struct ceph_dentry_info { | ||
204 | struct ceph_mds_session *lease_session; | ||
205 | u32 lease_gen, lease_shared_gen; | ||
206 | u32 lease_seq; | ||
207 | unsigned long lease_renew_after, lease_renew_from; | ||
208 | struct list_head lru; | ||
209 | struct dentry *dentry; | ||
210 | u64 time; | ||
211 | u64 offset; | ||
212 | }; | ||
213 | |||
278 | struct ceph_inode_xattrs_info { | 214 | struct ceph_inode_xattrs_info { |
279 | /* | 215 | /* |
280 | * (still encoded) xattr blob. we avoid the overhead of parsing | 216 | * (still encoded) xattr blob. we avoid the overhead of parsing |
@@ -296,11 +232,6 @@ struct ceph_inode_xattrs_info { | |||
296 | /* | 232 | /* |
297 | * Ceph inode. | 233 | * Ceph inode. |
298 | */ | 234 | */ |
299 | #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ | ||
300 | #define CEPH_I_NODELAY 4 /* do not delay cap release */ | ||
301 | #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ | ||
302 | #define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ | ||
303 | |||
304 | struct ceph_inode_info { | 235 | struct ceph_inode_info { |
305 | struct ceph_vino i_vino; /* ceph ino + snap */ | 236 | struct ceph_vino i_vino; /* ceph ino + snap */ |
306 | 237 | ||
@@ -310,6 +241,7 @@ struct ceph_inode_info { | |||
310 | unsigned i_ceph_flags; | 241 | unsigned i_ceph_flags; |
311 | unsigned long i_release_count; | 242 | unsigned long i_release_count; |
312 | 243 | ||
244 | struct ceph_dir_layout i_dir_layout; | ||
313 | struct ceph_file_layout i_layout; | 245 | struct ceph_file_layout i_layout; |
314 | char *i_symlink; | 246 | char *i_symlink; |
315 | 247 | ||
@@ -361,12 +293,10 @@ struct ceph_inode_info { | |||
361 | 293 | ||
362 | /* held references to caps */ | 294 | /* held references to caps */ |
363 | int i_pin_ref; | 295 | int i_pin_ref; |
364 | int i_rd_ref, i_rdcache_ref, i_wr_ref; | 296 | int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref; |
365 | int i_wrbuffer_ref, i_wrbuffer_ref_head; | 297 | int i_wrbuffer_ref, i_wrbuffer_ref_head; |
366 | u32 i_shared_gen; /* increment each time we get FILE_SHARED */ | 298 | u32 i_shared_gen; /* increment each time we get FILE_SHARED */ |
367 | u32 i_rdcache_gen; /* we increment this each time we get | 299 | u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ |
368 | FILE_CACHE. If it's non-zero, we | ||
369 | _may_ have cached pages. */ | ||
370 | u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ | 300 | u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ |
371 | 301 | ||
372 | struct list_head i_unsafe_writes; /* uncommitted sync writes */ | 302 | struct list_head i_unsafe_writes; /* uncommitted sync writes */ |
@@ -391,98 +321,66 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode) | |||
391 | return container_of(inode, struct ceph_inode_info, vfs_inode); | 321 | return container_of(inode, struct ceph_inode_info, vfs_inode); |
392 | } | 322 | } |
393 | 323 | ||
394 | static inline void ceph_i_clear(struct inode *inode, unsigned mask) | 324 | static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode) |
395 | { | 325 | { |
396 | struct ceph_inode_info *ci = ceph_inode(inode); | 326 | return (struct ceph_fs_client *)inode->i_sb->s_fs_info; |
397 | |||
398 | spin_lock(&inode->i_lock); | ||
399 | ci->i_ceph_flags &= ~mask; | ||
400 | spin_unlock(&inode->i_lock); | ||
401 | } | 327 | } |
402 | 328 | ||
403 | static inline void ceph_i_set(struct inode *inode, unsigned mask) | 329 | static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb) |
404 | { | 330 | { |
405 | struct ceph_inode_info *ci = ceph_inode(inode); | 331 | return (struct ceph_fs_client *)sb->s_fs_info; |
406 | |||
407 | spin_lock(&inode->i_lock); | ||
408 | ci->i_ceph_flags |= mask; | ||
409 | spin_unlock(&inode->i_lock); | ||
410 | } | 332 | } |
411 | 333 | ||
412 | static inline bool ceph_i_test(struct inode *inode, unsigned mask) | 334 | static inline struct ceph_vino ceph_vino(struct inode *inode) |
413 | { | 335 | { |
414 | struct ceph_inode_info *ci = ceph_inode(inode); | 336 | return ceph_inode(inode)->i_vino; |
415 | bool r; | ||
416 | |||
417 | smp_mb(); | ||
418 | r = (ci->i_ceph_flags & mask) == mask; | ||
419 | return r; | ||
420 | } | 337 | } |
421 | 338 | ||
422 | |||
423 | /* find a specific frag @f */ | ||
424 | extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, | ||
425 | u32 f); | ||
426 | |||
427 | /* | ||
428 | * choose fragment for value @v. copy frag content to pfrag, if leaf | ||
429 | * exists | ||
430 | */ | ||
431 | extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | ||
432 | struct ceph_inode_frag *pfrag, | ||
433 | int *found); | ||
434 | |||
435 | /* | 339 | /* |
436 | * Ceph dentry state | 340 | * ino_t is <64 bits on many architectures, blech. |
341 | * | ||
342 | * i_ino (kernel inode) st_ino (userspace) | ||
343 | * i386 32 32 | ||
344 | * x86_64+ino32 64 32 | ||
345 | * x86_64 64 64 | ||
437 | */ | 346 | */ |
438 | struct ceph_dentry_info { | 347 | static inline u32 ceph_ino_to_ino32(ino_t ino) |
439 | struct ceph_mds_session *lease_session; | ||
440 | u32 lease_gen, lease_shared_gen; | ||
441 | u32 lease_seq; | ||
442 | unsigned long lease_renew_after, lease_renew_from; | ||
443 | struct list_head lru; | ||
444 | struct dentry *dentry; | ||
445 | u64 time; | ||
446 | u64 offset; | ||
447 | }; | ||
448 | |||
449 | static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) | ||
450 | { | 348 | { |
451 | return (struct ceph_dentry_info *)dentry->d_fsdata; | 349 | ino ^= ino >> (sizeof(ino) * 8 - 32); |
452 | } | 350 | if (!ino) |
453 | 351 | ino = 1; | |
454 | static inline loff_t ceph_make_fpos(unsigned frag, unsigned off) | 352 | return ino; |
455 | { | ||
456 | return ((loff_t)frag << 32) | (loff_t)off; | ||
457 | } | 353 | } |
458 | 354 | ||
459 | /* | 355 | /* |
460 | * ino_t is <64 bits on many architectures, blech. | 356 | * kernel i_ino value |
461 | * | ||
462 | * don't include snap in ino hash, at least for now. | ||
463 | */ | 357 | */ |
464 | static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) | 358 | static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) |
465 | { | 359 | { |
466 | ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ | 360 | ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ |
467 | #if BITS_PER_LONG == 32 | 361 | #if BITS_PER_LONG == 32 |
468 | ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; | 362 | ino = ceph_ino_to_ino32(ino); |
469 | if (!ino) | ||
470 | ino = 1; | ||
471 | #endif | 363 | #endif |
472 | return ino; | 364 | return ino; |
473 | } | 365 | } |
474 | 366 | ||
475 | static inline int ceph_set_ino_cb(struct inode *inode, void *data) | 367 | /* |
368 | * user-visible ino (stat, filldir) | ||
369 | */ | ||
370 | #if BITS_PER_LONG == 32 | ||
371 | static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino) | ||
476 | { | 372 | { |
477 | ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; | 373 | return ino; |
478 | inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data); | ||
479 | return 0; | ||
480 | } | 374 | } |
481 | 375 | #else | |
482 | static inline struct ceph_vino ceph_vino(struct inode *inode) | 376 | static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino) |
483 | { | 377 | { |
484 | return ceph_inode(inode)->i_vino; | 378 | if (ceph_test_mount_opt(ceph_sb_to_client(sb), INO32)) |
379 | ino = ceph_ino_to_ino32(ino); | ||
380 | return ino; | ||
485 | } | 381 | } |
382 | #endif | ||
383 | |||
486 | 384 | ||
487 | /* for printf-style formatting */ | 385 | /* for printf-style formatting */ |
488 | #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap | 386 | #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap |
@@ -513,6 +411,66 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, | |||
513 | 411 | ||
514 | 412 | ||
515 | /* | 413 | /* |
414 | * Ceph inode. | ||
415 | */ | ||
416 | #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ | ||
417 | #define CEPH_I_NODELAY 4 /* do not delay cap release */ | ||
418 | #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ | ||
419 | #define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ | ||
420 | |||
421 | static inline void ceph_i_clear(struct inode *inode, unsigned mask) | ||
422 | { | ||
423 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
424 | |||
425 | spin_lock(&inode->i_lock); | ||
426 | ci->i_ceph_flags &= ~mask; | ||
427 | spin_unlock(&inode->i_lock); | ||
428 | } | ||
429 | |||
430 | static inline void ceph_i_set(struct inode *inode, unsigned mask) | ||
431 | { | ||
432 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
433 | |||
434 | spin_lock(&inode->i_lock); | ||
435 | ci->i_ceph_flags |= mask; | ||
436 | spin_unlock(&inode->i_lock); | ||
437 | } | ||
438 | |||
439 | static inline bool ceph_i_test(struct inode *inode, unsigned mask) | ||
440 | { | ||
441 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
442 | bool r; | ||
443 | |||
444 | spin_lock(&inode->i_lock); | ||
445 | r = (ci->i_ceph_flags & mask) == mask; | ||
446 | spin_unlock(&inode->i_lock); | ||
447 | return r; | ||
448 | } | ||
449 | |||
450 | |||
451 | /* find a specific frag @f */ | ||
452 | extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, | ||
453 | u32 f); | ||
454 | |||
455 | /* | ||
456 | * choose fragment for value @v. copy frag content to pfrag, if leaf | ||
457 | * exists | ||
458 | */ | ||
459 | extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | ||
460 | struct ceph_inode_frag *pfrag, | ||
461 | int *found); | ||
462 | |||
463 | static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) | ||
464 | { | ||
465 | return (struct ceph_dentry_info *)dentry->d_fsdata; | ||
466 | } | ||
467 | |||
468 | static inline loff_t ceph_make_fpos(unsigned frag, unsigned off) | ||
469 | { | ||
470 | return ((loff_t)frag << 32) | (loff_t)off; | ||
471 | } | ||
472 | |||
473 | /* | ||
516 | * caps helpers | 474 | * caps helpers |
517 | */ | 475 | */ |
518 | static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci) | 476 | static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci) |
@@ -548,7 +506,7 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci) | |||
548 | { | 506 | { |
549 | return ci->i_dirty_caps | ci->i_flushing_caps; | 507 | return ci->i_dirty_caps | ci->i_flushing_caps; |
550 | } | 508 | } |
551 | extern void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask); | 509 | extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask); |
552 | 510 | ||
553 | extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask); | 511 | extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask); |
554 | extern int __ceph_caps_used(struct ceph_inode_info *ci); | 512 | extern int __ceph_caps_used(struct ceph_inode_info *ci); |
@@ -576,19 +534,10 @@ extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, | |||
576 | struct ceph_cap_reservation *ctx, int need); | 534 | struct ceph_cap_reservation *ctx, int need); |
577 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | 535 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, |
578 | struct ceph_cap_reservation *ctx); | 536 | struct ceph_cap_reservation *ctx); |
579 | extern void ceph_reservation_status(struct ceph_client *client, | 537 | extern void ceph_reservation_status(struct ceph_fs_client *client, |
580 | int *total, int *avail, int *used, | 538 | int *total, int *avail, int *used, |
581 | int *reserved, int *min); | 539 | int *reserved, int *min); |
582 | 540 | ||
583 | static inline struct ceph_client *ceph_inode_to_client(struct inode *inode) | ||
584 | { | ||
585 | return (struct ceph_client *)inode->i_sb->s_fs_info; | ||
586 | } | ||
587 | |||
588 | static inline struct ceph_client *ceph_sb_to_client(struct super_block *sb) | ||
589 | { | ||
590 | return (struct ceph_client *)sb->s_fs_info; | ||
591 | } | ||
592 | 541 | ||
593 | 542 | ||
594 | /* | 543 | /* |
@@ -617,51 +566,6 @@ struct ceph_file_info { | |||
617 | 566 | ||
618 | 567 | ||
619 | /* | 568 | /* |
620 | * snapshots | ||
621 | */ | ||
622 | |||
623 | /* | ||
624 | * A "snap context" is the set of existing snapshots when we | ||
625 | * write data. It is used by the OSD to guide its COW behavior. | ||
626 | * | ||
627 | * The ceph_snap_context is refcounted, and attached to each dirty | ||
628 | * page, indicating which context the dirty data belonged when it was | ||
629 | * dirtied. | ||
630 | */ | ||
631 | struct ceph_snap_context { | ||
632 | atomic_t nref; | ||
633 | u64 seq; | ||
634 | int num_snaps; | ||
635 | u64 snaps[]; | ||
636 | }; | ||
637 | |||
638 | static inline struct ceph_snap_context * | ||
639 | ceph_get_snap_context(struct ceph_snap_context *sc) | ||
640 | { | ||
641 | /* | ||
642 | printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
643 | atomic_read(&sc->nref)+1); | ||
644 | */ | ||
645 | if (sc) | ||
646 | atomic_inc(&sc->nref); | ||
647 | return sc; | ||
648 | } | ||
649 | |||
650 | static inline void ceph_put_snap_context(struct ceph_snap_context *sc) | ||
651 | { | ||
652 | if (!sc) | ||
653 | return; | ||
654 | /* | ||
655 | printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
656 | atomic_read(&sc->nref)-1); | ||
657 | */ | ||
658 | if (atomic_dec_and_test(&sc->nref)) { | ||
659 | /*printk(" deleting snap_context %p\n", sc);*/ | ||
660 | kfree(sc); | ||
661 | } | ||
662 | } | ||
663 | |||
664 | /* | ||
665 | * A "snap realm" describes a subset of the file hierarchy sharing | 569 | * A "snap realm" describes a subset of the file hierarchy sharing |
666 | * the same set of snapshots that apply to it. The realms themselves | 570 | * the same set of snapshots that apply to it. The realms themselves |
667 | * are organized into a hierarchy, such that children inherit (some of) | 571 | * are organized into a hierarchy, such that children inherit (some of) |
@@ -699,16 +603,33 @@ struct ceph_snap_realm { | |||
699 | spinlock_t inodes_with_caps_lock; | 603 | spinlock_t inodes_with_caps_lock; |
700 | }; | 604 | }; |
701 | 605 | ||
702 | 606 | static inline int default_congestion_kb(void) | |
703 | |||
704 | /* | ||
705 | * calculate the number of pages a given length and offset map onto, | ||
706 | * if we align the data. | ||
707 | */ | ||
708 | static inline int calc_pages_for(u64 off, u64 len) | ||
709 | { | 607 | { |
710 | return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - | 608 | int congestion_kb; |
711 | (off >> PAGE_CACHE_SHIFT); | 609 | |
610 | /* | ||
611 | * Copied from NFS | ||
612 | * | ||
613 | * congestion size, scale with available memory. | ||
614 | * | ||
615 | * 64MB: 8192k | ||
616 | * 128MB: 11585k | ||
617 | * 256MB: 16384k | ||
618 | * 512MB: 23170k | ||
619 | * 1GB: 32768k | ||
620 | * 2GB: 46340k | ||
621 | * 4GB: 65536k | ||
622 | * 8GB: 92681k | ||
623 | * 16GB: 131072k | ||
624 | * | ||
625 | * This allows larger machines to have larger/more transfers. | ||
626 | * Limit the default to 256M | ||
627 | */ | ||
628 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
629 | if (congestion_kb > 256*1024) | ||
630 | congestion_kb = 256*1024; | ||
631 | |||
632 | return congestion_kb; | ||
712 | } | 633 | } |
713 | 634 | ||
714 | 635 | ||
@@ -741,16 +662,6 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci) | |||
741 | ci_item)->writing; | 662 | ci_item)->writing; |
742 | } | 663 | } |
743 | 664 | ||
744 | |||
745 | /* super.c */ | ||
746 | extern struct kmem_cache *ceph_inode_cachep; | ||
747 | extern struct kmem_cache *ceph_cap_cachep; | ||
748 | extern struct kmem_cache *ceph_dentry_cachep; | ||
749 | extern struct kmem_cache *ceph_file_cachep; | ||
750 | |||
751 | extern const char *ceph_msg_type_name(int type); | ||
752 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | ||
753 | |||
754 | /* inode.c */ | 665 | /* inode.c */ |
755 | extern const struct inode_operations ceph_file_iops; | 666 | extern const struct inode_operations ceph_file_iops; |
756 | 667 | ||
@@ -781,7 +692,7 @@ extern void ceph_queue_invalidate(struct inode *inode); | |||
781 | extern void ceph_queue_writeback(struct inode *inode); | 692 | extern void ceph_queue_writeback(struct inode *inode); |
782 | 693 | ||
783 | extern int ceph_do_getattr(struct inode *inode, int mask); | 694 | extern int ceph_do_getattr(struct inode *inode, int mask); |
784 | extern int ceph_permission(struct inode *inode, int mask); | 695 | extern int ceph_permission(struct inode *inode, int mask, unsigned int flags); |
785 | extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); | 696 | extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); |
786 | extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, | 697 | extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, |
787 | struct kstat *stat); | 698 | struct kstat *stat); |
@@ -857,12 +768,18 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); | |||
857 | /* file.c */ | 768 | /* file.c */ |
858 | extern const struct file_operations ceph_file_fops; | 769 | extern const struct file_operations ceph_file_fops; |
859 | extern const struct address_space_operations ceph_aops; | 770 | extern const struct address_space_operations ceph_aops; |
771 | extern int ceph_copy_to_page_vector(struct page **pages, | ||
772 | const char *data, | ||
773 | loff_t off, size_t len); | ||
774 | extern int ceph_copy_from_page_vector(struct page **pages, | ||
775 | char *data, | ||
776 | loff_t off, size_t len); | ||
777 | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); | ||
860 | extern int ceph_open(struct inode *inode, struct file *file); | 778 | extern int ceph_open(struct inode *inode, struct file *file); |
861 | extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | 779 | extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, |
862 | struct nameidata *nd, int mode, | 780 | struct nameidata *nd, int mode, |
863 | int locked_dir); | 781 | int locked_dir); |
864 | extern int ceph_release(struct inode *inode, struct file *filp); | 782 | extern int ceph_release(struct inode *inode, struct file *filp); |
865 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | ||
866 | 783 | ||
867 | /* dir.c */ | 784 | /* dir.c */ |
868 | extern const struct file_operations ceph_dir_fops; | 785 | extern const struct file_operations ceph_dir_fops; |
@@ -878,6 +795,7 @@ extern void ceph_dentry_lru_add(struct dentry *dn); | |||
878 | extern void ceph_dentry_lru_touch(struct dentry *dn); | 795 | extern void ceph_dentry_lru_touch(struct dentry *dn); |
879 | extern void ceph_dentry_lru_del(struct dentry *dn); | 796 | extern void ceph_dentry_lru_del(struct dentry *dn); |
880 | extern void ceph_invalidate_dentry_lease(struct dentry *dentry); | 797 | extern void ceph_invalidate_dentry_lease(struct dentry *dentry); |
798 | extern unsigned ceph_dentry_hash(struct dentry *dn); | ||
881 | 799 | ||
882 | /* | 800 | /* |
883 | * our d_ops vary depending on whether the inode is live, | 801 | * our d_ops vary depending on whether the inode is live, |
@@ -892,12 +810,6 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | |||
892 | /* export.c */ | 810 | /* export.c */ |
893 | extern const struct export_operations ceph_export_ops; | 811 | extern const struct export_operations ceph_export_ops; |
894 | 812 | ||
895 | /* debugfs.c */ | ||
896 | extern int ceph_debugfs_init(void); | ||
897 | extern void ceph_debugfs_cleanup(void); | ||
898 | extern int ceph_debugfs_client_init(struct ceph_client *client); | ||
899 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); | ||
900 | |||
901 | /* locks.c */ | 813 | /* locks.c */ |
902 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); | 814 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); |
903 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); | 815 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); |
@@ -914,4 +826,8 @@ static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) | |||
914 | return NULL; | 826 | return NULL; |
915 | } | 827 | } |
916 | 828 | ||
829 | /* debugfs.c */ | ||
830 | extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); | ||
831 | extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); | ||
832 | |||
917 | #endif /* _FS_CEPH_SUPER_H */ | 833 | #endif /* _FS_CEPH_SUPER_H */ |