diff options
author | Sage Weil <sage@newdream.net> | 2009-10-06 14:31:10 -0400 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2009-10-06 14:31:10 -0400 |
commit | f24e9980eb860d8600cbe5ef3d2fd9295320d229 (patch) | |
tree | 10f43450ad2cd4d799dd02d33c02d4ed8bef39d6 /fs/ceph/osdmap.h | |
parent | 2f2dc053404febedc9c273452d9d518fb31fde72 (diff) |
ceph: OSD client
The OSD client is responsible for reading and writing data from/to the
object storage pool. This includes determining where objects are
stored in the cluster, and ensuring that requests are retried or
redirected in the event of a node failure or data migration.
If an OSD does not respond before a timeout expires, keepalive
messages are sent across the lossless, ordered communications channel
to ensure that any break in the TCP is discovered. If the session
does reset, a reconnection is attempted and affected requests are
resent (by the message transport layer).
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph/osdmap.h')
-rw-r--r-- | fs/ceph/osdmap.h | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h new file mode 100644 index 000000000000..07127c6fb134 --- /dev/null +++ b/fs/ceph/osdmap.h | |||
@@ -0,0 +1,123 @@ | |||
1 | #ifndef _FS_CEPH_OSDMAP_H | ||
2 | #define _FS_CEPH_OSDMAP_H | ||
3 | |||
4 | #include <linux/rbtree.h> | ||
5 | #include "types.h" | ||
6 | #include "ceph_fs.h" | ||
7 | #include "crush/crush.h" | ||
8 | |||
9 | /* | ||
10 | * The osd map describes the current membership of the osd cluster and | ||
11 | * specifies the mapping of objects to placement groups and placement | ||
12 | * groups to (sets of) osds. That is, it completely specifies the | ||
13 | * (desired) distribution of all data objects in the system at some | ||
14 | * point in time. | ||
15 | * | ||
16 | * Each map version is identified by an epoch, which increases monotonically. | ||
17 | * | ||
18 | * The map can be updated either via an incremental map (diff) describing | ||
19 | * the change between two successive epochs, or as a fully encoded map. | ||
20 | */ | ||
21 | struct ceph_pg_pool_info { | ||
22 | struct ceph_pg_pool v; | ||
23 | int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; | ||
24 | }; | ||
25 | |||
26 | struct ceph_pg_mapping { | ||
27 | struct rb_node node; | ||
28 | u64 pgid; | ||
29 | int len; | ||
30 | int osds[]; | ||
31 | }; | ||
32 | |||
33 | struct ceph_osdmap { | ||
34 | struct ceph_fsid fsid; | ||
35 | u32 epoch; | ||
36 | u32 mkfs_epoch; | ||
37 | struct ceph_timespec created, modified; | ||
38 | |||
39 | u32 flags; /* CEPH_OSDMAP_* */ | ||
40 | |||
41 | u32 max_osd; /* size of osd_state, _offload, _addr arrays */ | ||
42 | u8 *osd_state; /* CEPH_OSD_* */ | ||
43 | u32 *osd_weight; /* 0 = failed, 0x10000 = 100% normal */ | ||
44 | struct ceph_entity_addr *osd_addr; | ||
45 | |||
46 | struct rb_root pg_temp; | ||
47 | |||
48 | u32 num_pools; | ||
49 | struct ceph_pg_pool_info *pg_pool; | ||
50 | |||
51 | /* the CRUSH map specifies the mapping of placement groups to | ||
52 | * the list of osds that store+replicate them. */ | ||
53 | struct crush_map *crush; | ||
54 | }; | ||
55 | |||
56 | /* | ||
57 | * file layout helpers | ||
58 | */ | ||
59 | #define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit)) | ||
60 | #define ceph_file_layout_stripe_count(l) \ | ||
61 | ((__s32)le32_to_cpu((l).fl_stripe_count)) | ||
62 | #define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size)) | ||
63 | #define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash)) | ||
64 | #define ceph_file_layout_object_su(l) \ | ||
65 | ((__s32)le32_to_cpu((l).fl_object_stripe_unit)) | ||
66 | #define ceph_file_layout_pg_preferred(l) \ | ||
67 | ((__s32)le32_to_cpu((l).fl_pg_preferred)) | ||
68 | #define ceph_file_layout_pg_pool(l) \ | ||
69 | ((__s32)le32_to_cpu((l).fl_pg_pool)) | ||
70 | |||
71 | static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l) | ||
72 | { | ||
73 | return le32_to_cpu(l->fl_stripe_unit) * | ||
74 | le32_to_cpu(l->fl_stripe_count); | ||
75 | } | ||
76 | |||
77 | /* "period" == bytes before i start on a new set of objects */ | ||
78 | static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l) | ||
79 | { | ||
80 | return le32_to_cpu(l->fl_object_size) * | ||
81 | le32_to_cpu(l->fl_stripe_count); | ||
82 | } | ||
83 | |||
84 | |||
85 | static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) | ||
86 | { | ||
87 | return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP); | ||
88 | } | ||
89 | |||
90 | static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) | ||
91 | { | ||
92 | return map && (map->flags & flag); | ||
93 | } | ||
94 | |||
95 | extern char *ceph_osdmap_state_str(char *str, int len, int state); | ||
96 | |||
97 | static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, | ||
98 | int osd) | ||
99 | { | ||
100 | if (osd >= map->max_osd) | ||
101 | return NULL; | ||
102 | return &map->osd_addr[osd]; | ||
103 | } | ||
104 | |||
105 | extern struct ceph_osdmap *osdmap_decode(void **p, void *end); | ||
106 | extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | ||
107 | struct ceph_osdmap *map, | ||
108 | struct ceph_messenger *msgr); | ||
109 | extern void ceph_osdmap_destroy(struct ceph_osdmap *map); | ||
110 | |||
111 | /* calculate mapping of a file extent to an object */ | ||
112 | extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | ||
113 | u64 off, u64 *plen, | ||
114 | u64 *bno, u64 *oxoff, u64 *oxlen); | ||
115 | |||
116 | /* calculate mapping of object to a placement group */ | ||
117 | extern int ceph_calc_object_layout(struct ceph_object_layout *ol, | ||
118 | const char *oid, | ||
119 | struct ceph_file_layout *fl, | ||
120 | struct ceph_osdmap *osdmap); | ||
121 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, union ceph_pg pgid); | ||
122 | |||
123 | #endif | ||