aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/drm/ttm/ttm_bo_api.h4
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/ceph/auth.h92
-rw-r--r--include/linux/ceph/buffer.h39
-rw-r--r--include/linux/ceph/ceph_debug.h38
-rw-r--r--include/linux/ceph/ceph_frag.h109
-rw-r--r--include/linux/ceph/ceph_fs.h729
-rw-r--r--include/linux/ceph/ceph_hash.h13
-rw-r--r--include/linux/ceph/debugfs.h33
-rw-r--r--include/linux/ceph/decode.h201
-rw-r--r--include/linux/ceph/libceph.h249
-rw-r--r--include/linux/ceph/mdsmap.h62
-rw-r--r--include/linux/ceph/messenger.h261
-rw-r--r--include/linux/ceph/mon_client.h122
-rw-r--r--include/linux/ceph/msgpool.h25
-rw-r--r--include/linux/ceph/msgr.h175
-rw-r--r--include/linux/ceph/osd_client.h234
-rw-r--r--include/linux/ceph/osdmap.h130
-rw-r--r--include/linux/ceph/pagelist.h75
-rw-r--r--include/linux/ceph/rados.h405
-rw-r--r--include/linux/ceph/types.h29
-rw-r--r--include/linux/coredump.h34
-rw-r--r--include/linux/crush/crush.h180
-rw-r--r--include/linux/crush/hash.h17
-rw-r--r--include/linux/crush/mapper.h20
-rw-r--r--include/linux/debug_locks.h5
-rw-r--r--include/linux/elevator.h1
-rw-r--r--include/linux/kernel.h13
-rw-r--r--include/linux/lockdep.h13
-rw-r--r--include/linux/netfilter/nfnetlink_conntrack.h10
-rw-r--r--include/linux/netfilter/xt_SECMARK.h12
-rw-r--r--include/linux/security.h45
-rw-r--r--include/linux/selinux.h63
-rw-r--r--include/linux/types.h15
-rw-r--r--include/media/videobuf-dma-sg.h1
-rw-r--r--include/net/bluetooth/bluetooth.h18
36 files changed, 3348 insertions, 125 deletions
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 267a86c74e2..2040e6c4f17 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -246,9 +246,11 @@ struct ttm_buffer_object {
246 246
247 atomic_t reserved; 247 atomic_t reserved;
248 248
249
250 /** 249 /**
251 * Members protected by the bo::lock 250 * Members protected by the bo::lock
251 * In addition, setting sync_obj to anything else
252 * than NULL requires bo::reserved to be held. This allows for
253 * checking NULL while reserved but not holding bo::lock.
252 */ 254 */
253 255
254 void *sync_obj_arg; 256 void *sync_obj_arg;
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 626b629429f..4e8ea8c8ec1 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -118,7 +118,6 @@ header-y += eventpoll.h
118header-y += ext2_fs.h 118header-y += ext2_fs.h
119header-y += fadvise.h 119header-y += fadvise.h
120header-y += falloc.h 120header-y += falloc.h
121header-y += fanotify.h
122header-y += fb.h 121header-y += fb.h
123header-y += fcntl.h 122header-y += fcntl.h
124header-y += fd.h 123header-y += fd.h
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
new file mode 100644
index 00000000000..7fff521d7eb
--- /dev/null
+++ b/include/linux/ceph/auth.h
@@ -0,0 +1,92 @@
1#ifndef _FS_CEPH_AUTH_H
2#define _FS_CEPH_AUTH_H
3
4#include <linux/ceph/types.h>
5#include <linux/ceph/buffer.h>
6
7/*
8 * Abstract interface for communicating with the authenticate module.
9 * There is some handshake that takes place between us and the monitor
10 * to acquire the necessary keys. These are used to generate an
11 * 'authorizer' that we use when connecting to a service (mds, osd).
12 */
13
14struct ceph_auth_client;
15struct ceph_authorizer;
16
17struct ceph_auth_client_ops {
18 const char *name;
19
20 /*
21 * true if we are authenticated and can connect to
22 * services.
23 */
24 int (*is_authenticated)(struct ceph_auth_client *ac);
25
26 /*
27 * true if we should (re)authenticate, e.g., when our tickets
28 * are getting old and crusty.
29 */
30 int (*should_authenticate)(struct ceph_auth_client *ac);
31
32 /*
33 * build requests and process replies during monitor
34 * handshake. if handle_reply returns -EAGAIN, we build
35 * another request.
36 */
37 int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
38 int (*handle_reply)(struct ceph_auth_client *ac, int result,
39 void *buf, void *end);
40
41 /*
42 * Create authorizer for connecting to a service, and verify
43 * the response to authenticate the service.
44 */
45 int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type,
46 struct ceph_authorizer **a,
47 void **buf, size_t *len,
48 void **reply_buf, size_t *reply_len);
49 int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
50 struct ceph_authorizer *a, size_t len);
51 void (*destroy_authorizer)(struct ceph_auth_client *ac,
52 struct ceph_authorizer *a);
53 void (*invalidate_authorizer)(struct ceph_auth_client *ac,
54 int peer_type);
55
56 /* reset when we (re)connect to a monitor */
57 void (*reset)(struct ceph_auth_client *ac);
58
59 void (*destroy)(struct ceph_auth_client *ac);
60};
61
62struct ceph_auth_client {
63 u32 protocol; /* CEPH_AUTH_* */
64 void *private; /* for use by protocol implementation */
65 const struct ceph_auth_client_ops *ops; /* null iff protocol==0 */
66
67 bool negotiating; /* true if negotiating protocol */
68 const char *name; /* entity name */
69 u64 global_id; /* our unique id in system */
70 const char *secret; /* our secret key */
71 unsigned want_keys; /* which services we want */
72};
73
74extern struct ceph_auth_client *ceph_auth_init(const char *name,
75 const char *secret);
76extern void ceph_auth_destroy(struct ceph_auth_client *ac);
77
78extern void ceph_auth_reset(struct ceph_auth_client *ac);
79
80extern int ceph_auth_build_hello(struct ceph_auth_client *ac,
81 void *buf, size_t len);
82extern int ceph_handle_auth_reply(struct ceph_auth_client *ac,
83 void *buf, size_t len,
84 void *reply_buf, size_t reply_len);
85extern int ceph_entity_name_encode(const char *name, void **p, void *end);
86
87extern int ceph_build_auth(struct ceph_auth_client *ac,
88 void *msg_buf, size_t msg_len);
89
90extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
91
92#endif
diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h
new file mode 100644
index 00000000000..58d19014068
--- /dev/null
+++ b/include/linux/ceph/buffer.h
@@ -0,0 +1,39 @@
1#ifndef __FS_CEPH_BUFFER_H
2#define __FS_CEPH_BUFFER_H
3
4#include <linux/kref.h>
5#include <linux/mm.h>
6#include <linux/vmalloc.h>
7#include <linux/types.h>
8#include <linux/uio.h>
9
10/*
11 * a simple reference counted buffer.
12 *
13 * use kmalloc for small sizes (<= one page), vmalloc for larger
14 * sizes.
15 */
16struct ceph_buffer {
17 struct kref kref;
18 struct kvec vec;
19 size_t alloc_len;
20 bool is_vmalloc;
21};
22
23extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp);
24extern void ceph_buffer_release(struct kref *kref);
25
26static inline struct ceph_buffer *ceph_buffer_get(struct ceph_buffer *b)
27{
28 kref_get(&b->kref);
29 return b;
30}
31
32static inline void ceph_buffer_put(struct ceph_buffer *b)
33{
34 kref_put(&b->kref, ceph_buffer_release);
35}
36
37extern int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end);
38
39#endif
diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h
new file mode 100644
index 00000000000..aa2e19182d9
--- /dev/null
+++ b/include/linux/ceph/ceph_debug.h
@@ -0,0 +1,38 @@
1#ifndef _FS_CEPH_DEBUG_H
2#define _FS_CEPH_DEBUG_H
3
4#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
5
6#ifdef CONFIG_CEPH_LIB_PRETTYDEBUG
7
8/*
9 * wrap pr_debug to include a filename:lineno prefix on each line.
10 * this incurs some overhead (kernel size and execution time) due to
11 * the extra function call at each call site.
12 */
13
14# if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
15extern const char *ceph_file_part(const char *s, int len);
16# define dout(fmt, ...) \
17 pr_debug("%.*s %12.12s:%-4d : " fmt, \
18 8 - (int)sizeof(KBUILD_MODNAME), " ", \
19 ceph_file_part(__FILE__, sizeof(__FILE__)), \
20 __LINE__, ##__VA_ARGS__)
21# else
22/* faux printk call just to see any compiler warnings. */
23# define dout(fmt, ...) do { \
24 if (0) \
25 printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
26 } while (0)
27# endif
28
29#else
30
31/*
32 * or, just wrap pr_debug
33 */
34# define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__)
35
36#endif
37
38#endif
diff --git a/include/linux/ceph/ceph_frag.h b/include/linux/ceph/ceph_frag.h
new file mode 100644
index 00000000000..5babb8e9535
--- /dev/null
+++ b/include/linux/ceph/ceph_frag.h
@@ -0,0 +1,109 @@
1#ifndef FS_CEPH_FRAG_H
2#define FS_CEPH_FRAG_H
3
4/*
5 * "Frags" are a way to describe a subset of a 32-bit number space,
6 * using a mask and a value to match against that mask. Any given frag
7 * (subset of the number space) can be partitioned into 2^n sub-frags.
8 *
9 * Frags are encoded into a 32-bit word:
10 * 8 upper bits = "bits"
11 * 24 lower bits = "value"
12 * (We could go to 5+27 bits, but who cares.)
13 *
14 * We use the _most_ significant bits of the 24 bit value. This makes
15 * values logically sort.
16 *
17 * Unfortunately, because the "bits" field is still in the high bits, we
18 * can't sort encoded frags numerically. However, it does allow you
19 * to feed encoded frags as values into frag_contains_value.
20 */
21static inline __u32 ceph_frag_make(__u32 b, __u32 v)
22{
23 return (b << 24) |
24 (v & (0xffffffu << (24-b)) & 0xffffffu);
25}
26static inline __u32 ceph_frag_bits(__u32 f)
27{
28 return f >> 24;
29}
30static inline __u32 ceph_frag_value(__u32 f)
31{
32 return f & 0xffffffu;
33}
34static inline __u32 ceph_frag_mask(__u32 f)
35{
36 return (0xffffffu << (24-ceph_frag_bits(f))) & 0xffffffu;
37}
38static inline __u32 ceph_frag_mask_shift(__u32 f)
39{
40 return 24 - ceph_frag_bits(f);
41}
42
43static inline int ceph_frag_contains_value(__u32 f, __u32 v)
44{
45 return (v & ceph_frag_mask(f)) == ceph_frag_value(f);
46}
47static inline int ceph_frag_contains_frag(__u32 f, __u32 sub)
48{
49 /* is sub as specific as us, and contained by us? */
50 return ceph_frag_bits(sub) >= ceph_frag_bits(f) &&
51 (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f);
52}
53
54static inline __u32 ceph_frag_parent(__u32 f)
55{
56 return ceph_frag_make(ceph_frag_bits(f) - 1,
57 ceph_frag_value(f) & (ceph_frag_mask(f) << 1));
58}
59static inline int ceph_frag_is_left_child(__u32 f)
60{
61 return ceph_frag_bits(f) > 0 &&
62 (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0;
63}
64static inline int ceph_frag_is_right_child(__u32 f)
65{
66 return ceph_frag_bits(f) > 0 &&
67 (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1;
68}
69static inline __u32 ceph_frag_sibling(__u32 f)
70{
71 return ceph_frag_make(ceph_frag_bits(f),
72 ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f)));
73}
74static inline __u32 ceph_frag_left_child(__u32 f)
75{
76 return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f));
77}
78static inline __u32 ceph_frag_right_child(__u32 f)
79{
80 return ceph_frag_make(ceph_frag_bits(f)+1,
81 ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f))));
82}
83static inline __u32 ceph_frag_make_child(__u32 f, int by, int i)
84{
85 int newbits = ceph_frag_bits(f) + by;
86 return ceph_frag_make(newbits,
87 ceph_frag_value(f) | (i << (24 - newbits)));
88}
89static inline int ceph_frag_is_leftmost(__u32 f)
90{
91 return ceph_frag_value(f) == 0;
92}
93static inline int ceph_frag_is_rightmost(__u32 f)
94{
95 return ceph_frag_value(f) == ceph_frag_mask(f);
96}
97static inline __u32 ceph_frag_next(__u32 f)
98{
99 return ceph_frag_make(ceph_frag_bits(f),
100 ceph_frag_value(f) + (0x1000000 >> ceph_frag_bits(f)));
101}
102
103/*
104 * comparator to sort frags logically, as when traversing the
105 * number space in ascending order...
106 */
107int ceph_frag_compare(__u32 a, __u32 b);
108
109#endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
new file mode 100644
index 00000000000..c3c74aef289
--- /dev/null
+++ b/include/linux/ceph/ceph_fs.h
@@ -0,0 +1,729 @@
1/*
2 * ceph_fs.h - Ceph constants and data types to share between kernel and
3 * user space.
4 *
5 * Most types in this file are defined as little-endian, and are
6 * primarily intended to describe data structures that pass over the
7 * wire or that are stored on disk.
8 *
9 * LGPL2
10 */
11
12#ifndef CEPH_FS_H
13#define CEPH_FS_H
14
15#include "msgr.h"
16#include "rados.h"
17
18/*
19 * subprotocol versions. when specific messages types or high-level
20 * protocols change, bump the affected components. we keep rev
21 * internal cluster protocols separately from the public,
22 * client-facing protocol.
23 */
24#define CEPH_OSD_PROTOCOL 8 /* cluster internal */
25#define CEPH_MDS_PROTOCOL 12 /* cluster internal */
26#define CEPH_MON_PROTOCOL 5 /* cluster internal */
27#define CEPH_OSDC_PROTOCOL 24 /* server/client */
28#define CEPH_MDSC_PROTOCOL 32 /* server/client */
29#define CEPH_MONC_PROTOCOL 15 /* server/client */
30
31
32#define CEPH_INO_ROOT 1
33#define CEPH_INO_CEPH 2 /* hidden .ceph dir */
34
35/* arbitrary limit on max # of monitors (cluster of 3 is typical) */
36#define CEPH_MAX_MON 31
37
38
39/*
40 * feature bits
41 */
42#define CEPH_FEATURE_UID (1<<0)
43#define CEPH_FEATURE_NOSRCADDR (1<<1)
44#define CEPH_FEATURE_MONCLOCKCHECK (1<<2)
45#define CEPH_FEATURE_FLOCK (1<<3)
46
47
48/*
49 * ceph_file_layout - describe data layout for a file/inode
50 */
51struct ceph_file_layout {
52 /* file -> object mapping */
53 __le32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple
54 of page size. */
55 __le32 fl_stripe_count; /* over this many objects */
56 __le32 fl_object_size; /* until objects are this big, then move to
57 new objects */
58 __le32 fl_cas_hash; /* 0 = none; 1 = sha256 */
59
60 /* pg -> disk layout */
61 __le32 fl_object_stripe_unit; /* for per-object parity, if any */
62
63 /* object -> pg layout */
64 __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
65 __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */
66} __attribute__ ((packed));
67
68#define CEPH_MIN_STRIPE_UNIT 65536
69
70int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
71
72
73/* crypto algorithms */
74#define CEPH_CRYPTO_NONE 0x0
75#define CEPH_CRYPTO_AES 0x1
76
77#define CEPH_AES_IV "cephsageyudagreg"
78
79/* security/authentication protocols */
80#define CEPH_AUTH_UNKNOWN 0x0
81#define CEPH_AUTH_NONE 0x1
82#define CEPH_AUTH_CEPHX 0x2
83
84#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
85
86
87/*********************************************
88 * message layer
89 */
90
91/*
92 * message types
93 */
94
95/* misc */
96#define CEPH_MSG_SHUTDOWN 1
97#define CEPH_MSG_PING 2
98
99/* client <-> monitor */
100#define CEPH_MSG_MON_MAP 4
101#define CEPH_MSG_MON_GET_MAP 5
102#define CEPH_MSG_STATFS 13
103#define CEPH_MSG_STATFS_REPLY 14
104#define CEPH_MSG_MON_SUBSCRIBE 15
105#define CEPH_MSG_MON_SUBSCRIBE_ACK 16
106#define CEPH_MSG_AUTH 17
107#define CEPH_MSG_AUTH_REPLY 18
108
109/* client <-> mds */
110#define CEPH_MSG_MDS_MAP 21
111
112#define CEPH_MSG_CLIENT_SESSION 22
113#define CEPH_MSG_CLIENT_RECONNECT 23
114
115#define CEPH_MSG_CLIENT_REQUEST 24
116#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
117#define CEPH_MSG_CLIENT_REPLY 26
118#define CEPH_MSG_CLIENT_CAPS 0x310
119#define CEPH_MSG_CLIENT_LEASE 0x311
120#define CEPH_MSG_CLIENT_SNAP 0x312
121#define CEPH_MSG_CLIENT_CAPRELEASE 0x313
122
123/* pool ops */
124#define CEPH_MSG_POOLOP_REPLY 48
125#define CEPH_MSG_POOLOP 49
126
127
128/* osd */
129#define CEPH_MSG_OSD_MAP 41
130#define CEPH_MSG_OSD_OP 42
131#define CEPH_MSG_OSD_OPREPLY 43
132
133/* pool operations */
134enum {
135 POOL_OP_CREATE = 0x01,
136 POOL_OP_DELETE = 0x02,
137 POOL_OP_AUID_CHANGE = 0x03,
138 POOL_OP_CREATE_SNAP = 0x11,
139 POOL_OP_DELETE_SNAP = 0x12,
140 POOL_OP_CREATE_UNMANAGED_SNAP = 0x21,
141 POOL_OP_DELETE_UNMANAGED_SNAP = 0x22,
142};
143
144struct ceph_mon_request_header {
145 __le64 have_version;
146 __le16 session_mon;
147 __le64 session_mon_tid;
148} __attribute__ ((packed));
149
150struct ceph_mon_statfs {
151 struct ceph_mon_request_header monhdr;
152 struct ceph_fsid fsid;
153} __attribute__ ((packed));
154
155struct ceph_statfs {
156 __le64 kb, kb_used, kb_avail;
157 __le64 num_objects;
158} __attribute__ ((packed));
159
160struct ceph_mon_statfs_reply {
161 struct ceph_fsid fsid;
162 __le64 version;
163 struct ceph_statfs st;
164} __attribute__ ((packed));
165
166const char *ceph_pool_op_name(int op);
167
168struct ceph_mon_poolop {
169 struct ceph_mon_request_header monhdr;
170 struct ceph_fsid fsid;
171 __le32 pool;
172 __le32 op;
173 __le64 auid;
174 __le64 snapid;
175 __le32 name_len;
176} __attribute__ ((packed));
177
178struct ceph_mon_poolop_reply {
179 struct ceph_mon_request_header monhdr;
180 struct ceph_fsid fsid;
181 __le32 reply_code;
182 __le32 epoch;
183 char has_data;
184 char data[0];
185} __attribute__ ((packed));
186
187struct ceph_mon_unmanaged_snap {
188 __le64 snapid;
189} __attribute__ ((packed));
190
191struct ceph_osd_getmap {
192 struct ceph_mon_request_header monhdr;
193 struct ceph_fsid fsid;
194 __le32 start;
195} __attribute__ ((packed));
196
197struct ceph_mds_getmap {
198 struct ceph_mon_request_header monhdr;
199 struct ceph_fsid fsid;
200} __attribute__ ((packed));
201
202struct ceph_client_mount {
203 struct ceph_mon_request_header monhdr;
204} __attribute__ ((packed));
205
206struct ceph_mon_subscribe_item {
207 __le64 have_version; __le64 have;
208 __u8 onetime;
209} __attribute__ ((packed));
210
211struct ceph_mon_subscribe_ack {
212 __le32 duration; /* seconds */
213 struct ceph_fsid fsid;
214} __attribute__ ((packed));
215
216/*
217 * mds states
218 * > 0 -> in
219 * <= 0 -> out
220 */
221#define CEPH_MDS_STATE_DNE 0 /* down, does not exist. */
222#define CEPH_MDS_STATE_STOPPED -1 /* down, once existed, but no subtrees.
223 empty log. */
224#define CEPH_MDS_STATE_BOOT -4 /* up, boot announcement. */
225#define CEPH_MDS_STATE_STANDBY -5 /* up, idle. waiting for assignment. */
226#define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */
227#define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */
228#define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */
229
230#define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */
231#define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed
232 operations (import, rename, etc.) */
233#define CEPH_MDS_STATE_RECONNECT 10 /* up, reconnect to clients */
234#define CEPH_MDS_STATE_REJOIN 11 /* up, rejoining distributed cache */
235#define CEPH_MDS_STATE_CLIENTREPLAY 12 /* up, replaying client operations */
236#define CEPH_MDS_STATE_ACTIVE 13 /* up, active */
237#define CEPH_MDS_STATE_STOPPING 14 /* up, but exporting metadata */
238
239extern const char *ceph_mds_state_name(int s);
240
241
242/*
243 * metadata lock types.
244 * - these are bitmasks.. we can compose them
245 * - they also define the lock ordering by the MDS
246 * - a few of these are internal to the mds
247 */
248#define CEPH_LOCK_DVERSION 1
249#define CEPH_LOCK_DN 2
250#define CEPH_LOCK_ISNAP 16
251#define CEPH_LOCK_IVERSION 32 /* mds internal */
252#define CEPH_LOCK_IFILE 64
253#define CEPH_LOCK_IAUTH 128
254#define CEPH_LOCK_ILINK 256
255#define CEPH_LOCK_IDFT 512 /* dir frag tree */
256#define CEPH_LOCK_INEST 1024 /* mds internal */
257#define CEPH_LOCK_IXATTR 2048
258#define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */
259#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */
260
261/* client_session ops */
262enum {
263 CEPH_SESSION_REQUEST_OPEN,
264 CEPH_SESSION_OPEN,
265 CEPH_SESSION_REQUEST_CLOSE,
266 CEPH_SESSION_CLOSE,
267 CEPH_SESSION_REQUEST_RENEWCAPS,
268 CEPH_SESSION_RENEWCAPS,
269 CEPH_SESSION_STALE,
270 CEPH_SESSION_RECALL_STATE,
271};
272
273extern const char *ceph_session_op_name(int op);
274
275struct ceph_mds_session_head {
276 __le32 op;
277 __le64 seq;
278 struct ceph_timespec stamp;
279 __le32 max_caps, max_leases;
280} __attribute__ ((packed));
281
282/* client_request */
283/*
284 * metadata ops.
285 * & 0x001000 -> write op
286 * & 0x010000 -> follow symlink (e.g. stat(), not lstat()).
287 & & 0x100000 -> use weird ino/path trace
288 */
289#define CEPH_MDS_OP_WRITE 0x001000
290enum {
291 CEPH_MDS_OP_LOOKUP = 0x00100,
292 CEPH_MDS_OP_GETATTR = 0x00101,
293 CEPH_MDS_OP_LOOKUPHASH = 0x00102,
294 CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
295
296 CEPH_MDS_OP_SETXATTR = 0x01105,
297 CEPH_MDS_OP_RMXATTR = 0x01106,
298 CEPH_MDS_OP_SETLAYOUT = 0x01107,
299 CEPH_MDS_OP_SETATTR = 0x01108,
300 CEPH_MDS_OP_SETFILELOCK= 0x01109,
301 CEPH_MDS_OP_GETFILELOCK= 0x00110,
302 CEPH_MDS_OP_SETDIRLAYOUT=0x0110a,
303
304 CEPH_MDS_OP_MKNOD = 0x01201,
305 CEPH_MDS_OP_LINK = 0x01202,
306 CEPH_MDS_OP_UNLINK = 0x01203,
307 CEPH_MDS_OP_RENAME = 0x01204,
308 CEPH_MDS_OP_MKDIR = 0x01220,
309 CEPH_MDS_OP_RMDIR = 0x01221,
310 CEPH_MDS_OP_SYMLINK = 0x01222,
311
312 CEPH_MDS_OP_CREATE = 0x01301,
313 CEPH_MDS_OP_OPEN = 0x00302,
314 CEPH_MDS_OP_READDIR = 0x00305,
315
316 CEPH_MDS_OP_LOOKUPSNAP = 0x00400,
317 CEPH_MDS_OP_MKSNAP = 0x01400,
318 CEPH_MDS_OP_RMSNAP = 0x01401,
319 CEPH_MDS_OP_LSSNAP = 0x00402,
320};
321
322extern const char *ceph_mds_op_name(int op);
323
324
325#define CEPH_SETATTR_MODE 1
326#define CEPH_SETATTR_UID 2
327#define CEPH_SETATTR_GID 4
328#define CEPH_SETATTR_MTIME 8
329#define CEPH_SETATTR_ATIME 16
330#define CEPH_SETATTR_SIZE 32
331#define CEPH_SETATTR_CTIME 64
332
333union ceph_mds_request_args {
334 struct {
335 __le32 mask; /* CEPH_CAP_* */
336 } __attribute__ ((packed)) getattr;
337 struct {
338 __le32 mode;
339 __le32 uid;
340 __le32 gid;
341 struct ceph_timespec mtime;
342 struct ceph_timespec atime;
343 __le64 size, old_size; /* old_size needed by truncate */
344 __le32 mask; /* CEPH_SETATTR_* */
345 } __attribute__ ((packed)) setattr;
346 struct {
347 __le32 frag; /* which dir fragment */
348 __le32 max_entries; /* how many dentries to grab */
349 __le32 max_bytes;
350 } __attribute__ ((packed)) readdir;
351 struct {
352 __le32 mode;
353 __le32 rdev;
354 } __attribute__ ((packed)) mknod;
355 struct {
356 __le32 mode;
357 } __attribute__ ((packed)) mkdir;
358 struct {
359 __le32 flags;
360 __le32 mode;
361 __le32 stripe_unit; /* layout for newly created file */
362 __le32 stripe_count; /* ... */
363 __le32 object_size;
364 __le32 file_replication;
365 __le32 preferred;
366 } __attribute__ ((packed)) open;
367 struct {
368 __le32 flags;
369 } __attribute__ ((packed)) setxattr;
370 struct {
371 struct ceph_file_layout layout;
372 } __attribute__ ((packed)) setlayout;
373 struct {
374 __u8 rule; /* currently fcntl or flock */
375 __u8 type; /* shared, exclusive, remove*/
376 __le64 pid; /* process id requesting the lock */
377 __le64 pid_namespace;
378 __le64 start; /* initial location to lock */
379 __le64 length; /* num bytes to lock from start */
380 __u8 wait; /* will caller wait for lock to become available? */
381 } __attribute__ ((packed)) filelock_change;
382} __attribute__ ((packed));
383
384#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
385#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
386
387struct ceph_mds_request_head {
388 __le64 oldest_client_tid;
389 __le32 mdsmap_epoch; /* on client */
390 __le32 flags; /* CEPH_MDS_FLAG_* */
391 __u8 num_retry, num_fwd; /* count retry, fwd attempts */
392 __le16 num_releases; /* # include cap/lease release records */
393 __le32 op; /* mds op code */
394 __le32 caller_uid, caller_gid;
395 __le64 ino; /* use this ino for openc, mkdir, mknod,
396 etc. (if replaying) */
397 union ceph_mds_request_args args;
398} __attribute__ ((packed));
399
400/* cap/lease release record */
401struct ceph_mds_request_release {
402 __le64 ino, cap_id; /* ino and unique cap id */
403 __le32 caps, wanted; /* new issued, wanted */
404 __le32 seq, issue_seq, mseq;
405 __le32 dname_seq; /* if releasing a dentry lease, a */
406 __le32 dname_len; /* string follows. */
407} __attribute__ ((packed));
408
409/* client reply */
410struct ceph_mds_reply_head {
411 __le32 op;
412 __le32 result;
413 __le32 mdsmap_epoch;
414 __u8 safe; /* true if committed to disk */
415 __u8 is_dentry, is_target; /* true if dentry, target inode records
416 are included with reply */
417} __attribute__ ((packed));
418
419/* one for each node split */
420struct ceph_frag_tree_split {
421 __le32 frag; /* this frag splits... */
422 __le32 by; /* ...by this many bits */
423} __attribute__ ((packed));
424
425struct ceph_frag_tree_head {
426 __le32 nsplits; /* num ceph_frag_tree_split records */
427 struct ceph_frag_tree_split splits[];
428} __attribute__ ((packed));
429
430/* capability issue, for bundling with mds reply */
431struct ceph_mds_reply_cap {
432 __le32 caps, wanted; /* caps issued, wanted */
433 __le64 cap_id;
434 __le32 seq, mseq;
435 __le64 realm; /* snap realm */
436 __u8 flags; /* CEPH_CAP_FLAG_* */
437} __attribute__ ((packed));
438
439#define CEPH_CAP_FLAG_AUTH 1 /* cap is issued by auth mds */
440
441/* inode record, for bundling with mds reply */
442struct ceph_mds_reply_inode {
443 __le64 ino;
444 __le64 snapid;
445 __le32 rdev;
446 __le64 version; /* inode version */
447 __le64 xattr_version; /* version for xattr blob */
448 struct ceph_mds_reply_cap cap; /* caps issued for this inode */
449 struct ceph_file_layout layout;
450 struct ceph_timespec ctime, mtime, atime;
451 __le32 time_warp_seq;
452 __le64 size, max_size, truncate_size;
453 __le32 truncate_seq;
454 __le32 mode, uid, gid;
455 __le32 nlink;
456 __le64 files, subdirs, rbytes, rfiles, rsubdirs; /* dir stats */
457 struct ceph_timespec rctime;
458 struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */
459} __attribute__ ((packed));
460/* followed by frag array, then symlink string, then xattr blob */
461
462/* reply_lease follows dname, and reply_inode */
463struct ceph_mds_reply_lease {
464 __le16 mask; /* lease type(s) */
465 __le32 duration_ms; /* lease duration */
466 __le32 seq;
467} __attribute__ ((packed));
468
469struct ceph_mds_reply_dirfrag {
470 __le32 frag; /* fragment */
471 __le32 auth; /* auth mds, if this is a delegation point */
472 __le32 ndist; /* number of mds' this is replicated on */
473 __le32 dist[];
474} __attribute__ ((packed));
475
476#define CEPH_LOCK_FCNTL 1
477#define CEPH_LOCK_FLOCK 2
478
479#define CEPH_LOCK_SHARED 1
480#define CEPH_LOCK_EXCL 2
481#define CEPH_LOCK_UNLOCK 4
482
483struct ceph_filelock {
484 __le64 start;/* file offset to start lock at */
485 __le64 length; /* num bytes to lock; 0 for all following start */
486 __le64 client; /* which client holds the lock */
487 __le64 pid; /* process id holding the lock on the client */
488 __le64 pid_namespace;
489 __u8 type; /* shared lock, exclusive lock, or unlock */
490} __attribute__ ((packed));
491
492
493/* file access modes */
494#define CEPH_FILE_MODE_PIN 0
495#define CEPH_FILE_MODE_RD 1
496#define CEPH_FILE_MODE_WR 2
497#define CEPH_FILE_MODE_RDWR 3 /* RD | WR */
498#define CEPH_FILE_MODE_LAZY 4 /* lazy io */
499#define CEPH_FILE_MODE_NUM 8 /* bc these are bit fields.. mostly */
500
501int ceph_flags_to_mode(int flags);
502
503
504/* capability bits */
505#define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */
506
507/* generic cap bits */
508#define CEPH_CAP_GSHARED 1 /* client can reads */
509#define CEPH_CAP_GEXCL 2 /* client can read and update */
510#define CEPH_CAP_GCACHE 4 /* (file) client can cache reads */
511#define CEPH_CAP_GRD 8 /* (file) client can read */
512#define CEPH_CAP_GWR 16 /* (file) client can write */
513#define CEPH_CAP_GBUFFER 32 /* (file) client can buffer writes */
514#define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */
515#define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */
516
517/* per-lock shift */
518#define CEPH_CAP_SAUTH 2
519#define CEPH_CAP_SLINK 4
520#define CEPH_CAP_SXATTR 6
521#define CEPH_CAP_SFILE 8
522#define CEPH_CAP_SFLOCK 20
523
524#define CEPH_CAP_BITS 22
525
526/* composed values */
527#define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH)
528#define CEPH_CAP_AUTH_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SAUTH)
529#define CEPH_CAP_LINK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SLINK)
530#define CEPH_CAP_LINK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SLINK)
531#define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SXATTR)
532#define CEPH_CAP_XATTR_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SXATTR)
533#define CEPH_CAP_FILE(x) (x << CEPH_CAP_SFILE)
534#define CEPH_CAP_FILE_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFILE)
535#define CEPH_CAP_FILE_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFILE)
536#define CEPH_CAP_FILE_CACHE (CEPH_CAP_GCACHE << CEPH_CAP_SFILE)
537#define CEPH_CAP_FILE_RD (CEPH_CAP_GRD << CEPH_CAP_SFILE)
538#define CEPH_CAP_FILE_WR (CEPH_CAP_GWR << CEPH_CAP_SFILE)
539#define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE)
540#define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE)
541#define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE)
542#define CEPH_CAP_FLOCK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFLOCK)
543#define CEPH_CAP_FLOCK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFLOCK)
544
545
546/* cap masks (for getattr) */
547#define CEPH_STAT_CAP_INODE CEPH_CAP_PIN
548#define CEPH_STAT_CAP_TYPE CEPH_CAP_PIN /* mode >> 12 */
549#define CEPH_STAT_CAP_SYMLINK CEPH_CAP_PIN
550#define CEPH_STAT_CAP_UID CEPH_CAP_AUTH_SHARED
551#define CEPH_STAT_CAP_GID CEPH_CAP_AUTH_SHARED
552#define CEPH_STAT_CAP_MODE CEPH_CAP_AUTH_SHARED
553#define CEPH_STAT_CAP_NLINK CEPH_CAP_LINK_SHARED
554#define CEPH_STAT_CAP_LAYOUT CEPH_CAP_FILE_SHARED
555#define CEPH_STAT_CAP_MTIME CEPH_CAP_FILE_SHARED
556#define CEPH_STAT_CAP_SIZE CEPH_CAP_FILE_SHARED
557#define CEPH_STAT_CAP_ATIME CEPH_CAP_FILE_SHARED /* fixme */
558#define CEPH_STAT_CAP_XATTR CEPH_CAP_XATTR_SHARED
559#define CEPH_STAT_CAP_INODE_ALL (CEPH_CAP_PIN | \
560 CEPH_CAP_AUTH_SHARED | \
561 CEPH_CAP_LINK_SHARED | \
562 CEPH_CAP_FILE_SHARED | \
563 CEPH_CAP_XATTR_SHARED)
564
565#define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED | \
566 CEPH_CAP_LINK_SHARED | \
567 CEPH_CAP_XATTR_SHARED | \
568 CEPH_CAP_FILE_SHARED)
569#define CEPH_CAP_ANY_RD (CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_RD | \
570 CEPH_CAP_FILE_CACHE)
571
572#define CEPH_CAP_ANY_EXCL (CEPH_CAP_AUTH_EXCL | \
573 CEPH_CAP_LINK_EXCL | \
574 CEPH_CAP_XATTR_EXCL | \
575 CEPH_CAP_FILE_EXCL)
576#define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | \
577 CEPH_CAP_FILE_EXCL)
578#define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR)
579#define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \
580 CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \
581 CEPH_CAP_PIN)
582
583#define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \
584 CEPH_LOCK_IXATTR)
585
586int ceph_caps_for_mode(int mode);
587
588enum {
589 CEPH_CAP_OP_GRANT, /* mds->client grant */
590 CEPH_CAP_OP_REVOKE, /* mds->client revoke */
591 CEPH_CAP_OP_TRUNC, /* mds->client trunc notify */
592 CEPH_CAP_OP_EXPORT, /* mds has exported the cap */
593 CEPH_CAP_OP_IMPORT, /* mds has imported the cap */
594 CEPH_CAP_OP_UPDATE, /* client->mds update */
595 CEPH_CAP_OP_DROP, /* client->mds drop cap bits */
596 CEPH_CAP_OP_FLUSH, /* client->mds cap writeback */
597 CEPH_CAP_OP_FLUSH_ACK, /* mds->client flushed */
598 CEPH_CAP_OP_FLUSHSNAP, /* client->mds flush snapped metadata */
599 CEPH_CAP_OP_FLUSHSNAP_ACK, /* mds->client flushed snapped metadata */
600 CEPH_CAP_OP_RELEASE, /* client->mds release (clean) cap */
601 CEPH_CAP_OP_RENEW, /* client->mds renewal request */
602};
603
604extern const char *ceph_cap_op_name(int op);
605
606/*
607 * caps message, used for capability callbacks, acks, requests, etc.
608 */
609struct ceph_mds_caps {
610 __le32 op; /* CEPH_CAP_OP_* */
611 __le64 ino, realm;
612 __le64 cap_id;
613 __le32 seq, issue_seq;
614 __le32 caps, wanted, dirty; /* latest issued/wanted/dirty */
615 __le32 migrate_seq;
616 __le64 snap_follows;
617 __le32 snap_trace_len;
618
619 /* authlock */
620 __le32 uid, gid, mode;
621
622 /* linklock */
623 __le32 nlink;
624
625 /* xattrlock */
626 __le32 xattr_len;
627 __le64 xattr_version;
628
629 /* filelock */
630 __le64 size, max_size, truncate_size;
631 __le32 truncate_seq;
632 struct ceph_timespec mtime, atime, ctime;
633 struct ceph_file_layout layout;
634 __le32 time_warp_seq;
635} __attribute__ ((packed));
636
637/* cap release msg head */
638struct ceph_mds_cap_release {
639 __le32 num; /* number of cap_items that follow */
640} __attribute__ ((packed));
641
642struct ceph_mds_cap_item {
643 __le64 ino;
644 __le64 cap_id;
645 __le32 migrate_seq, seq;
646} __attribute__ ((packed));
647
648#define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */
649#define CEPH_MDS_LEASE_RELEASE 2 /* client -> mds */
650#define CEPH_MDS_LEASE_RENEW 3 /* client <-> mds */
651#define CEPH_MDS_LEASE_REVOKE_ACK 4 /* client -> mds */
652
653extern const char *ceph_lease_op_name(int o);
654
655/* lease msg header */
656struct ceph_mds_lease {
657 __u8 action; /* CEPH_MDS_LEASE_* */
658 __le16 mask; /* which lease */
659 __le64 ino;
660 __le64 first, last; /* snap range */
661 __le32 seq;
662 __le32 duration_ms; /* duration of renewal */
663} __attribute__ ((packed));
664/* followed by a __le32+string for dname */
665
666/* client reconnect */
667struct ceph_mds_cap_reconnect {
668 __le64 cap_id;
669 __le32 wanted;
670 __le32 issued;
671 __le64 snaprealm;
672 __le64 pathbase; /* base ino for our path to this ino */
673 __le32 flock_len; /* size of flock state blob, if any */
674} __attribute__ ((packed));
675/* followed by flock blob */
676
677struct ceph_mds_cap_reconnect_v1 {
678 __le64 cap_id;
679 __le32 wanted;
680 __le32 issued;
681 __le64 size;
682 struct ceph_timespec mtime, atime;
683 __le64 snaprealm;
684 __le64 pathbase; /* base ino for our path to this ino */
685} __attribute__ ((packed));
686
687struct ceph_mds_snaprealm_reconnect {
688 __le64 ino; /* snap realm base */
689 __le64 seq; /* snap seq for this snap realm */
690 __le64 parent; /* parent realm */
691} __attribute__ ((packed));
692
693/*
694 * snaps
695 */
696enum {
697 CEPH_SNAP_OP_UPDATE, /* CREATE or DESTROY */
698 CEPH_SNAP_OP_CREATE,
699 CEPH_SNAP_OP_DESTROY,
700 CEPH_SNAP_OP_SPLIT,
701};
702
703extern const char *ceph_snap_op_name(int o);
704
705/* snap msg header */
706struct ceph_mds_snap_head {
707 __le32 op; /* CEPH_SNAP_OP_* */
708 __le64 split; /* ino to split off, if any */
709 __le32 num_split_inos; /* # inos belonging to new child realm */
710 __le32 num_split_realms; /* # child realms udner new child realm */
711 __le32 trace_len; /* size of snap trace blob */
712} __attribute__ ((packed));
713/* followed by split ino list, then split realms, then the trace blob */
714
715/*
716 * encode info about a snaprealm, as viewed by a client
717 */
718struct ceph_mds_snap_realm {
719 __le64 ino; /* ino */
720 __le64 created; /* snap: when created */
721 __le64 parent; /* ino: parent realm */
722 __le64 parent_since; /* snap: same parent since */
723 __le64 seq; /* snap: version */
724 __le32 num_snaps;
725 __le32 num_prior_parent_snaps;
726} __attribute__ ((packed));
727/* followed by my snap list, then prior parent snap list */
728
729#endif
diff --git a/include/linux/ceph/ceph_hash.h b/include/linux/ceph/ceph_hash.h
new file mode 100644
index 00000000000..d099c3f9023
--- /dev/null
+++ b/include/linux/ceph/ceph_hash.h
@@ -0,0 +1,13 @@
1#ifndef FS_CEPH_HASH_H
2#define FS_CEPH_HASH_H
3
4#define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */
5#define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */
6
7extern unsigned ceph_str_hash_linux(const char *s, unsigned len);
8extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len);
9
10extern unsigned ceph_str_hash(int type, const char *s, unsigned len);
11extern const char *ceph_str_hash_name(int type);
12
13#endif
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h
new file mode 100644
index 00000000000..2a79702e092
--- /dev/null
+++ b/include/linux/ceph/debugfs.h
@@ -0,0 +1,33 @@
1#ifndef _FS_CEPH_DEBUGFS_H
2#define _FS_CEPH_DEBUGFS_H
3
4#include "ceph_debug.h"
5#include "types.h"
6
7#define CEPH_DEFINE_SHOW_FUNC(name) \
8static int name##_open(struct inode *inode, struct file *file) \
9{ \
10 struct seq_file *sf; \
11 int ret; \
12 \
13 ret = single_open(file, name, NULL); \
14 sf = file->private_data; \
15 sf->private = inode->i_private; \
16 return ret; \
17} \
18 \
19static const struct file_operations name##_fops = { \
20 .open = name##_open, \
21 .read = seq_read, \
22 .llseek = seq_lseek, \
23 .release = single_release, \
24};
25
26/* debugfs.c */
27extern int ceph_debugfs_init(void);
28extern void ceph_debugfs_cleanup(void);
29extern int ceph_debugfs_client_init(struct ceph_client *client);
30extern void ceph_debugfs_client_cleanup(struct ceph_client *client);
31
32#endif
33
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
new file mode 100644
index 00000000000..c5b6939fb32
--- /dev/null
+++ b/include/linux/ceph/decode.h
@@ -0,0 +1,201 @@
1#ifndef __CEPH_DECODE_H
2#define __CEPH_DECODE_H
3
4#include <asm/unaligned.h>
5#include <linux/time.h>
6
7#include "types.h"
8
9/*
10 * in all cases,
11 * void **p pointer to position pointer
12 * void *end pointer to end of buffer (last byte + 1)
13 */
14
15static inline u64 ceph_decode_64(void **p)
16{
17 u64 v = get_unaligned_le64(*p);
18 *p += sizeof(u64);
19 return v;
20}
21static inline u32 ceph_decode_32(void **p)
22{
23 u32 v = get_unaligned_le32(*p);
24 *p += sizeof(u32);
25 return v;
26}
27static inline u16 ceph_decode_16(void **p)
28{
29 u16 v = get_unaligned_le16(*p);
30 *p += sizeof(u16);
31 return v;
32}
33static inline u8 ceph_decode_8(void **p)
34{
35 u8 v = *(u8 *)*p;
36 (*p)++;
37 return v;
38}
39static inline void ceph_decode_copy(void **p, void *pv, size_t n)
40{
41 memcpy(pv, *p, n);
42 *p += n;
43}
44
45/*
46 * bounds check input.
47 */
48#define ceph_decode_need(p, end, n, bad) \
49 do { \
50 if (unlikely(*(p) + (n) > (end))) \
51 goto bad; \
52 } while (0)
53
54#define ceph_decode_64_safe(p, end, v, bad) \
55 do { \
56 ceph_decode_need(p, end, sizeof(u64), bad); \
57 v = ceph_decode_64(p); \
58 } while (0)
59#define ceph_decode_32_safe(p, end, v, bad) \
60 do { \
61 ceph_decode_need(p, end, sizeof(u32), bad); \
62 v = ceph_decode_32(p); \
63 } while (0)
64#define ceph_decode_16_safe(p, end, v, bad) \
65 do { \
66 ceph_decode_need(p, end, sizeof(u16), bad); \
67 v = ceph_decode_16(p); \
68 } while (0)
69#define ceph_decode_8_safe(p, end, v, bad) \
70 do { \
71 ceph_decode_need(p, end, sizeof(u8), bad); \
72 v = ceph_decode_8(p); \
73 } while (0)
74
75#define ceph_decode_copy_safe(p, end, pv, n, bad) \
76 do { \
77 ceph_decode_need(p, end, n, bad); \
78 ceph_decode_copy(p, pv, n); \
79 } while (0)
80
81/*
82 * struct ceph_timespec <-> struct timespec
83 */
84static inline void ceph_decode_timespec(struct timespec *ts,
85 const struct ceph_timespec *tv)
86{
87 ts->tv_sec = le32_to_cpu(tv->tv_sec);
88 ts->tv_nsec = le32_to_cpu(tv->tv_nsec);
89}
90static inline void ceph_encode_timespec(struct ceph_timespec *tv,
91 const struct timespec *ts)
92{
93 tv->tv_sec = cpu_to_le32(ts->tv_sec);
94 tv->tv_nsec = cpu_to_le32(ts->tv_nsec);
95}
96
97/*
98 * sockaddr_storage <-> ceph_sockaddr
99 */
100static inline void ceph_encode_addr(struct ceph_entity_addr *a)
101{
102 __be16 ss_family = htons(a->in_addr.ss_family);
103 a->in_addr.ss_family = *(__u16 *)&ss_family;
104}
105static inline void ceph_decode_addr(struct ceph_entity_addr *a)
106{
107 __be16 ss_family = *(__be16 *)&a->in_addr.ss_family;
108 a->in_addr.ss_family = ntohs(ss_family);
109 WARN_ON(a->in_addr.ss_family == 512);
110}
111
112/*
113 * encoders
114 */
115static inline void ceph_encode_64(void **p, u64 v)
116{
117 put_unaligned_le64(v, (__le64 *)*p);
118 *p += sizeof(u64);
119}
120static inline void ceph_encode_32(void **p, u32 v)
121{
122 put_unaligned_le32(v, (__le32 *)*p);
123 *p += sizeof(u32);
124}
125static inline void ceph_encode_16(void **p, u16 v)
126{
127 put_unaligned_le16(v, (__le16 *)*p);
128 *p += sizeof(u16);
129}
130static inline void ceph_encode_8(void **p, u8 v)
131{
132 *(u8 *)*p = v;
133 (*p)++;
134}
135static inline void ceph_encode_copy(void **p, const void *s, int len)
136{
137 memcpy(*p, s, len);
138 *p += len;
139}
140
141/*
142 * filepath, string encoders
143 */
144static inline void ceph_encode_filepath(void **p, void *end,
145 u64 ino, const char *path)
146{
147 u32 len = path ? strlen(path) : 0;
148 BUG_ON(*p + sizeof(ino) + sizeof(len) + len > end);
149 ceph_encode_8(p, 1);
150 ceph_encode_64(p, ino);
151 ceph_encode_32(p, len);
152 if (len)
153 memcpy(*p, path, len);
154 *p += len;
155}
156
157static inline void ceph_encode_string(void **p, void *end,
158 const char *s, u32 len)
159{
160 BUG_ON(*p + sizeof(len) + len > end);
161 ceph_encode_32(p, len);
162 if (len)
163 memcpy(*p, s, len);
164 *p += len;
165}
166
167#define ceph_encode_need(p, end, n, bad) \
168 do { \
169 if (unlikely(*(p) + (n) > (end))) \
170 goto bad; \
171 } while (0)
172
173#define ceph_encode_64_safe(p, end, v, bad) \
174 do { \
175 ceph_encode_need(p, end, sizeof(u64), bad); \
176 ceph_encode_64(p, v); \
177 } while (0)
178#define ceph_encode_32_safe(p, end, v, bad) \
179 do { \
180 ceph_encode_need(p, end, sizeof(u32), bad); \
181 ceph_encode_32(p, v); \
182 } while (0)
183#define ceph_encode_16_safe(p, end, v, bad) \
184 do { \
185 ceph_encode_need(p, end, sizeof(u16), bad); \
186 ceph_encode_16(p, v); \
187 } while (0)
188
189#define ceph_encode_copy_safe(p, end, pv, n, bad) \
190 do { \
191 ceph_encode_need(p, end, n, bad); \
192 ceph_encode_copy(p, pv, n); \
193 } while (0)
194#define ceph_encode_string_safe(p, end, s, n, bad) \
195 do { \
196 ceph_encode_need(p, end, n, bad); \
197 ceph_encode_string(p, end, s, n); \
198 } while (0)
199
200
201#endif
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
new file mode 100644
index 00000000000..f22b2e94168
--- /dev/null
+++ b/include/linux/ceph/libceph.h
@@ -0,0 +1,249 @@
1#ifndef _FS_CEPH_LIBCEPH_H
2#define _FS_CEPH_LIBCEPH_H
3
4#include "ceph_debug.h"
5
6#include <asm/unaligned.h>
7#include <linux/backing-dev.h>
8#include <linux/completion.h>
9#include <linux/exportfs.h>
10#include <linux/fs.h>
11#include <linux/mempool.h>
12#include <linux/pagemap.h>
13#include <linux/wait.h>
14#include <linux/writeback.h>
15#include <linux/slab.h>
16
17#include "types.h"
18#include "messenger.h"
19#include "msgpool.h"
20#include "mon_client.h"
21#include "osd_client.h"
22#include "ceph_fs.h"
23
24/*
25 * Supported features
26 */
27#define CEPH_FEATURE_SUPPORTED_DEFAULT CEPH_FEATURE_NOSRCADDR
28#define CEPH_FEATURE_REQUIRED_DEFAULT CEPH_FEATURE_NOSRCADDR
29
30/*
31 * mount options
32 */
33#define CEPH_OPT_FSID (1<<0)
34#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
35#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
36#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
37
38#define CEPH_OPT_DEFAULT (0);
39
40#define ceph_set_opt(client, opt) \
41 (client)->options->flags |= CEPH_OPT_##opt;
42#define ceph_test_opt(client, opt) \
43 (!!((client)->options->flags & CEPH_OPT_##opt))
44
45struct ceph_options {
46 int flags;
47 struct ceph_fsid fsid;
48 struct ceph_entity_addr my_addr;
49 int mount_timeout;
50 int osd_idle_ttl;
51 int osd_timeout;
52 int osd_keepalive_timeout;
53
54 /*
55 * any type that can't be simply compared or doesn't need need
56 * to be compared should go beyond this point,
57 * ceph_compare_options() should be updated accordingly
58 */
59
60 struct ceph_entity_addr *mon_addr; /* should be the first
61 pointer type of args */
62 int num_mon;
63 char *name;
64 char *secret;
65};
66
67/*
68 * defaults
69 */
70#define CEPH_MOUNT_TIMEOUT_DEFAULT 60
71#define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */
72#define CEPH_OSD_KEEPALIVE_DEFAULT 5
73#define CEPH_OSD_IDLE_TTL_DEFAULT 60
74#define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */
75
76#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
77#define CEPH_MSG_MAX_DATA_LEN (16*1024*1024)
78
79#define CEPH_AUTH_NAME_DEFAULT "guest"
80
81/*
82 * Delay telling the MDS we no longer want caps, in case we reopen
83 * the file. Delay a minimum amount of time, even if we send a cap
84 * message for some other reason. Otherwise, take the oppotunity to
85 * update the mds to avoid sending another message later.
86 */
87#define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */
88#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */
89
90#define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4)
91
92/* mount state */
93enum {
94 CEPH_MOUNT_MOUNTING,
95 CEPH_MOUNT_MOUNTED,
96 CEPH_MOUNT_UNMOUNTING,
97 CEPH_MOUNT_UNMOUNTED,
98 CEPH_MOUNT_SHUTDOWN,
99};
100
101/*
102 * subtract jiffies
103 */
104static inline unsigned long time_sub(unsigned long a, unsigned long b)
105{
106 BUG_ON(time_after(b, a));
107 return (long)a - (long)b;
108}
109
110struct ceph_mds_client;
111
112/*
113 * per client state
114 *
115 * possibly shared by multiple mount points, if they are
116 * mounting the same ceph filesystem/cluster.
117 */
118struct ceph_client {
119 struct ceph_fsid fsid;
120 bool have_fsid;
121
122 void *private;
123
124 struct ceph_options *options;
125
126 struct mutex mount_mutex; /* serialize mount attempts */
127 wait_queue_head_t auth_wq;
128 int auth_err;
129
130 int (*extra_mon_dispatch)(struct ceph_client *, struct ceph_msg *);
131
132 u32 supported_features;
133 u32 required_features;
134
135 struct ceph_messenger *msgr; /* messenger instance */
136 struct ceph_mon_client monc;
137 struct ceph_osd_client osdc;
138
139#ifdef CONFIG_DEBUG_FS
140 struct dentry *debugfs_dir;
141 struct dentry *debugfs_monmap;
142 struct dentry *debugfs_osdmap;
143#endif
144};
145
146
147
148/*
149 * snapshots
150 */
151
152/*
153 * A "snap context" is the set of existing snapshots when we
154 * write data. It is used by the OSD to guide its COW behavior.
155 *
156 * The ceph_snap_context is refcounted, and attached to each dirty
157 * page, indicating which context the dirty data belonged when it was
158 * dirtied.
159 */
160struct ceph_snap_context {
161 atomic_t nref;
162 u64 seq;
163 int num_snaps;
164 u64 snaps[];
165};
166
167static inline struct ceph_snap_context *
168ceph_get_snap_context(struct ceph_snap_context *sc)
169{
170 /*
171 printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
172 atomic_read(&sc->nref)+1);
173 */
174 if (sc)
175 atomic_inc(&sc->nref);
176 return sc;
177}
178
179static inline void ceph_put_snap_context(struct ceph_snap_context *sc)
180{
181 if (!sc)
182 return;
183 /*
184 printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref),
185 atomic_read(&sc->nref)-1);
186 */
187 if (atomic_dec_and_test(&sc->nref)) {
188 /*printk(" deleting snap_context %p\n", sc);*/
189 kfree(sc);
190 }
191}
192
193/*
194 * calculate the number of pages a given length and offset map onto,
195 * if we align the data.
196 */
197static inline int calc_pages_for(u64 off, u64 len)
198{
199 return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -
200 (off >> PAGE_CACHE_SHIFT);
201}
202
203/* ceph_common.c */
204extern const char *ceph_msg_type_name(int type);
205extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
206extern struct kmem_cache *ceph_inode_cachep;
207extern struct kmem_cache *ceph_cap_cachep;
208extern struct kmem_cache *ceph_dentry_cachep;
209extern struct kmem_cache *ceph_file_cachep;
210
211extern int ceph_parse_options(struct ceph_options **popt, char *options,
212 const char *dev_name, const char *dev_name_end,
213 int (*parse_extra_token)(char *c, void *private),
214 void *private);
215extern void ceph_destroy_options(struct ceph_options *opt);
216extern int ceph_compare_options(struct ceph_options *new_opt,
217 struct ceph_client *client);
218extern struct ceph_client *ceph_create_client(struct ceph_options *opt,
219 void *private);
220extern u64 ceph_client_id(struct ceph_client *client);
221extern void ceph_destroy_client(struct ceph_client *client);
222extern int __ceph_open_session(struct ceph_client *client,
223 unsigned long started);
224extern int ceph_open_session(struct ceph_client *client);
225
226/* pagevec.c */
227extern void ceph_release_page_vector(struct page **pages, int num_pages);
228
229extern struct page **ceph_get_direct_page_vector(const char __user *data,
230 int num_pages,
231 loff_t off, size_t len);
232extern void ceph_put_page_vector(struct page **pages, int num_pages);
233extern void ceph_release_page_vector(struct page **pages, int num_pages);
234extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
235extern int ceph_copy_user_to_page_vector(struct page **pages,
236 const char __user *data,
237 loff_t off, size_t len);
238extern int ceph_copy_to_page_vector(struct page **pages,
239 const char *data,
240 loff_t off, size_t len);
241extern int ceph_copy_from_page_vector(struct page **pages,
242 char *data,
243 loff_t off, size_t len);
244extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data,
245 loff_t off, size_t len);
246extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
247
248
249#endif /* _FS_CEPH_SUPER_H */
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
new file mode 100644
index 00000000000..4c5cb0880bb
--- /dev/null
+++ b/include/linux/ceph/mdsmap.h
@@ -0,0 +1,62 @@
1#ifndef _FS_CEPH_MDSMAP_H
2#define _FS_CEPH_MDSMAP_H
3
4#include "types.h"
5
6/*
7 * mds map - describe servers in the mds cluster.
8 *
9 * we limit fields to those the client actually xcares about
10 */
11struct ceph_mds_info {
12 u64 global_id;
13 struct ceph_entity_addr addr;
14 s32 state;
15 int num_export_targets;
16 bool laggy;
17 u32 *export_targets;
18};
19
20struct ceph_mdsmap {
21 u32 m_epoch, m_client_epoch, m_last_failure;
22 u32 m_root;
23 u32 m_session_timeout; /* seconds */
24 u32 m_session_autoclose; /* seconds */
25 u64 m_max_file_size;
26 u32 m_max_mds; /* size of m_addr, m_state arrays */
27 struct ceph_mds_info *m_info;
28
29 /* which object pools file data can be stored in */
30 int m_num_data_pg_pools;
31 u32 *m_data_pg_pools;
32 u32 m_cas_pg_pool;
33};
34
35static inline struct ceph_entity_addr *
36ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w)
37{
38 if (w >= m->m_max_mds)
39 return NULL;
40 return &m->m_info[w].addr;
41}
42
43static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w)
44{
45 BUG_ON(w < 0);
46 if (w >= m->m_max_mds)
47 return CEPH_MDS_STATE_DNE;
48 return m->m_info[w].state;
49}
50
51static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
52{
53 if (w >= 0 && w < m->m_max_mds)
54 return m->m_info[w].laggy;
55 return false;
56}
57
58extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
59extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
60extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
61
62#endif
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
new file mode 100644
index 00000000000..5956d62c305
--- /dev/null
+++ b/include/linux/ceph/messenger.h
@@ -0,0 +1,261 @@
1#ifndef __FS_CEPH_MESSENGER_H
2#define __FS_CEPH_MESSENGER_H
3
4#include <linux/kref.h>
5#include <linux/mutex.h>
6#include <linux/net.h>
7#include <linux/radix-tree.h>
8#include <linux/uio.h>
9#include <linux/version.h>
10#include <linux/workqueue.h>
11
12#include "types.h"
13#include "buffer.h"
14
15struct ceph_msg;
16struct ceph_connection;
17
18extern struct workqueue_struct *ceph_msgr_wq; /* receive work queue */
19
20/*
21 * Ceph defines these callbacks for handling connection events.
22 */
23struct ceph_connection_operations {
24 struct ceph_connection *(*get)(struct ceph_connection *);
25 void (*put)(struct ceph_connection *);
26
27 /* handle an incoming message. */
28 void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
29
30 /* authorize an outgoing connection */
31 int (*get_authorizer) (struct ceph_connection *con,
32 void **buf, int *len, int *proto,
33 void **reply_buf, int *reply_len, int force_new);
34 int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
35 int (*invalidate_authorizer)(struct ceph_connection *con);
36
37 /* protocol version mismatch */
38 void (*bad_proto) (struct ceph_connection *con);
39
40 /* there was some error on the socket (disconnect, whatever) */
41 void (*fault) (struct ceph_connection *con);
42
43 /* a remote host as terminated a message exchange session, and messages
44 * we sent (or they tried to send us) may be lost. */
45 void (*peer_reset) (struct ceph_connection *con);
46
47 struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
48 struct ceph_msg_header *hdr,
49 int *skip);
50};
51
52/* use format string %s%d */
53#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num)
54
55struct ceph_messenger {
56 struct ceph_entity_inst inst; /* my name+address */
57 struct ceph_entity_addr my_enc_addr;
58 struct page *zero_page; /* used in certain error cases */
59
60 bool nocrc;
61
62 /*
63 * the global_seq counts connections i (attempt to) initiate
64 * in order to disambiguate certain connect race conditions.
65 */
66 u32 global_seq;
67 spinlock_t global_seq_lock;
68
69 u32 supported_features;
70 u32 required_features;
71};
72
73/*
74 * a single message. it contains a header (src, dest, message type, etc.),
75 * footer (crc values, mainly), a "front" message body, and possibly a
76 * data payload (stored in some number of pages).
77 */
78struct ceph_msg {
79 struct ceph_msg_header hdr; /* header */
80 struct ceph_msg_footer footer; /* footer */
81 struct kvec front; /* unaligned blobs of message */
82 struct ceph_buffer *middle;
83 struct page **pages; /* data payload. NOT OWNER. */
84 unsigned nr_pages; /* size of page array */
85 struct ceph_pagelist *pagelist; /* instead of pages */
86 struct list_head list_head;
87 struct kref kref;
88 struct bio *bio; /* instead of pages/pagelist */
89 struct bio *bio_iter; /* bio iterator */
90 int bio_seg; /* current bio segment */
91 struct ceph_pagelist *trail; /* the trailing part of the data */
92 bool front_is_vmalloc;
93 bool more_to_follow;
94 bool needs_out_seq;
95 int front_max;
96
97 struct ceph_msgpool *pool;
98};
99
100struct ceph_msg_pos {
101 int page, page_pos; /* which page; offset in page */
102 int data_pos; /* offset in data payload */
103 int did_page_crc; /* true if we've calculated crc for current page */
104};
105
106/* ceph connection fault delay defaults, for exponential backoff */
107#define BASE_DELAY_INTERVAL (HZ/2)
108#define MAX_DELAY_INTERVAL (5 * 60 * HZ)
109
110/*
111 * ceph_connection state bit flags
112 *
113 * QUEUED and BUSY are used together to ensure that only a single
114 * thread is currently opening, reading or writing data to the socket.
115 */
116#define LOSSYTX 0 /* we can close channel or drop messages on errors */
117#define CONNECTING 1
118#define NEGOTIATING 2
119#define KEEPALIVE_PENDING 3
120#define WRITE_PENDING 4 /* we have data ready to send */
121#define QUEUED 5 /* there is work queued on this connection */
122#define BUSY 6 /* work is being done */
123#define STANDBY 8 /* no outgoing messages, socket closed. we keep
124 * the ceph_connection around to maintain shared
125 * state with the peer. */
126#define CLOSED 10 /* we've closed the connection */
127#define SOCK_CLOSED 11 /* socket state changed to closed */
128#define OPENING 13 /* open connection w/ (possibly new) peer */
129#define DEAD 14 /* dead, about to kfree */
130
131/*
132 * A single connection with another host.
133 *
134 * We maintain a queue of outgoing messages, and some session state to
135 * ensure that we can preserve the lossless, ordered delivery of
136 * messages in the case of a TCP disconnect.
137 */
138struct ceph_connection {
139 void *private;
140 atomic_t nref;
141
142 const struct ceph_connection_operations *ops;
143
144 struct ceph_messenger *msgr;
145 struct socket *sock;
146 unsigned long state; /* connection state (see flags above) */
147 const char *error_msg; /* error message, if any */
148
149 struct ceph_entity_addr peer_addr; /* peer address */
150 struct ceph_entity_name peer_name; /* peer name */
151 struct ceph_entity_addr peer_addr_for_me;
152 unsigned peer_features;
153 u32 connect_seq; /* identify the most recent connection
154 attempt for this connection, client */
155 u32 peer_global_seq; /* peer's global seq for this connection */
156
157 int auth_retry; /* true if we need a newer authorizer */
158 void *auth_reply_buf; /* where to put the authorizer reply */
159 int auth_reply_buf_len;
160
161 struct mutex mutex;
162
163 /* out queue */
164 struct list_head out_queue;
165 struct list_head out_sent; /* sending or sent but unacked */
166 u64 out_seq; /* last message queued for send */
167 bool out_keepalive_pending;
168
169 u64 in_seq, in_seq_acked; /* last message received, acked */
170
171 /* connection negotiation temps */
172 char in_banner[CEPH_BANNER_MAX_LEN];
173 union {
174 struct { /* outgoing connection */
175 struct ceph_msg_connect out_connect;
176 struct ceph_msg_connect_reply in_reply;
177 };
178 struct { /* incoming */
179 struct ceph_msg_connect in_connect;
180 struct ceph_msg_connect_reply out_reply;
181 };
182 };
183 struct ceph_entity_addr actual_peer_addr;
184
185 /* message out temps */
186 struct ceph_msg *out_msg; /* sending message (== tail of
187 out_sent) */
188 bool out_msg_done;
189 struct ceph_msg_pos out_msg_pos;
190
191 struct kvec out_kvec[8], /* sending header/footer data */
192 *out_kvec_cur;
193 int out_kvec_left; /* kvec's left in out_kvec */
194 int out_skip; /* skip this many bytes */
195 int out_kvec_bytes; /* total bytes left */
196 bool out_kvec_is_msg; /* kvec refers to out_msg */
197 int out_more; /* there is more data after the kvecs */
198 __le64 out_temp_ack; /* for writing an ack */
199
200 /* message in temps */
201 struct ceph_msg_header in_hdr;
202 struct ceph_msg *in_msg;
203 struct ceph_msg_pos in_msg_pos;
204 u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */
205
206 char in_tag; /* protocol control byte */
207 int in_base_pos; /* bytes read */
208 __le64 in_temp_ack; /* for reading an ack */
209
210 struct delayed_work work; /* send|recv work */
211 unsigned long delay; /* current delay interval */
212};
213
214
215extern const char *ceph_pr_addr(const struct sockaddr_storage *ss);
216extern int ceph_parse_ips(const char *c, const char *end,
217 struct ceph_entity_addr *addr,
218 int max_count, int *count);
219
220
221extern int ceph_msgr_init(void);
222extern void ceph_msgr_exit(void);
223extern void ceph_msgr_flush(void);
224
225extern struct ceph_messenger *ceph_messenger_create(
226 struct ceph_entity_addr *myaddr,
227 u32 features, u32 required);
228extern void ceph_messenger_destroy(struct ceph_messenger *);
229
230extern void ceph_con_init(struct ceph_messenger *msgr,
231 struct ceph_connection *con);
232extern void ceph_con_open(struct ceph_connection *con,
233 struct ceph_entity_addr *addr);
234extern bool ceph_con_opened(struct ceph_connection *con);
235extern void ceph_con_close(struct ceph_connection *con);
236extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
237extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
238extern void ceph_con_revoke_message(struct ceph_connection *con,
239 struct ceph_msg *msg);
240extern void ceph_con_keepalive(struct ceph_connection *con);
241extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
242extern void ceph_con_put(struct ceph_connection *con);
243
244extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags);
245extern void ceph_msg_kfree(struct ceph_msg *m);
246
247
248static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg)
249{
250 kref_get(&msg->kref);
251 return msg;
252}
253extern void ceph_msg_last_put(struct kref *kref);
254static inline void ceph_msg_put(struct ceph_msg *msg)
255{
256 kref_put(&msg->kref, ceph_msg_last_put);
257}
258
259extern void ceph_msg_dump(struct ceph_msg *msg);
260
261#endif
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
new file mode 100644
index 00000000000..545f8591778
--- /dev/null
+++ b/include/linux/ceph/mon_client.h
@@ -0,0 +1,122 @@
1#ifndef _FS_CEPH_MON_CLIENT_H
2#define _FS_CEPH_MON_CLIENT_H
3
4#include <linux/completion.h>
5#include <linux/kref.h>
6#include <linux/rbtree.h>
7
8#include "messenger.h"
9
10struct ceph_client;
11struct ceph_mount_args;
12struct ceph_auth_client;
13
14/*
15 * The monitor map enumerates the set of all monitors.
16 */
17struct ceph_monmap {
18 struct ceph_fsid fsid;
19 u32 epoch;
20 u32 num_mon;
21 struct ceph_entity_inst mon_inst[0];
22};
23
24struct ceph_mon_client;
25struct ceph_mon_generic_request;
26
27
28/*
29 * Generic mechanism for resending monitor requests.
30 */
31typedef void (*ceph_monc_request_func_t)(struct ceph_mon_client *monc,
32 int newmon);
33
34/* a pending monitor request */
35struct ceph_mon_request {
36 struct ceph_mon_client *monc;
37 struct delayed_work delayed_work;
38 unsigned long delay;
39 ceph_monc_request_func_t do_request;
40};
41
42/*
43 * ceph_mon_generic_request is being used for the statfs and poolop requests
44 * which are bening done a bit differently because we need to get data back
45 * to the caller
46 */
47struct ceph_mon_generic_request {
48 struct kref kref;
49 u64 tid;
50 struct rb_node node;
51 int result;
52 void *buf;
53 int buf_len;
54 struct completion completion;
55 struct ceph_msg *request; /* original request */
56 struct ceph_msg *reply; /* and reply */
57};
58
59struct ceph_mon_client {
60 struct ceph_client *client;
61 struct ceph_monmap *monmap;
62
63 struct mutex mutex;
64 struct delayed_work delayed_work;
65
66 struct ceph_auth_client *auth;
67 struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack;
68 int pending_auth;
69
70 bool hunting;
71 int cur_mon; /* last monitor i contacted */
72 unsigned long sub_sent, sub_renew_after;
73 struct ceph_connection *con;
74 bool have_fsid;
75
76 /* pending generic requests */
77 struct rb_root generic_request_tree;
78 int num_generic_requests;
79 u64 last_tid;
80
81 /* mds/osd map */
82 int want_mdsmap;
83 int want_next_osdmap; /* 1 = want, 2 = want+asked */
84 u32 have_osdmap, have_mdsmap;
85
86#ifdef CONFIG_DEBUG_FS
87 struct dentry *debugfs_file;
88#endif
89};
90
91extern struct ceph_monmap *ceph_monmap_decode(void *p, void *end);
92extern int ceph_monmap_contains(struct ceph_monmap *m,
93 struct ceph_entity_addr *addr);
94
95extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
96extern void ceph_monc_stop(struct ceph_mon_client *monc);
97
98/*
99 * The model here is to indicate that we need a new map of at least
100 * epoch @want, and also call in when we receive a map. We will
101 * periodically rerequest the map from the monitor cluster until we
102 * get what we want.
103 */
104extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);
105extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
106
107extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
108
109extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
110 struct ceph_statfs *buf);
111
112extern int ceph_monc_open_session(struct ceph_mon_client *monc);
113
114extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);
115
116extern int ceph_monc_create_snapid(struct ceph_mon_client *monc,
117 u32 pool, u64 *snapid);
118
119extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
120 u32 pool, u64 snapid);
121
122#endif
diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
new file mode 100644
index 00000000000..a362605f936
--- /dev/null
+++ b/include/linux/ceph/msgpool.h
@@ -0,0 +1,25 @@
1#ifndef _FS_CEPH_MSGPOOL
2#define _FS_CEPH_MSGPOOL
3
4#include <linux/mempool.h>
5#include "messenger.h"
6
7/*
8 * we use memory pools for preallocating messages we may receive, to
9 * avoid unexpected OOM conditions.
10 */
11struct ceph_msgpool {
12 const char *name;
13 mempool_t *pool;
14 int front_len; /* preallocated payload size */
15};
16
17extern int ceph_msgpool_init(struct ceph_msgpool *pool,
18 int front_len, int size, bool blocking,
19 const char *name);
20extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
21extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *,
22 int front_len);
23extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *);
24
25#endif
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
new file mode 100644
index 00000000000..680d3d648ca
--- /dev/null
+++ b/include/linux/ceph/msgr.h
@@ -0,0 +1,175 @@
1#ifndef CEPH_MSGR_H
2#define CEPH_MSGR_H
3
4/*
5 * Data types for message passing layer used by Ceph.
6 */
7
8#define CEPH_MON_PORT 6789 /* default monitor port */
9
10/*
11 * client-side processes will try to bind to ports in this
12 * range, simply for the benefit of tools like nmap or wireshark
13 * that would like to identify the protocol.
14 */
15#define CEPH_PORT_FIRST 6789
16#define CEPH_PORT_START 6800 /* non-monitors start here */
17#define CEPH_PORT_LAST 6900
18
19/*
20 * tcp connection banner. include a protocol version. and adjust
21 * whenever the wire protocol changes. try to keep this string length
22 * constant.
23 */
24#define CEPH_BANNER "ceph v027"
25#define CEPH_BANNER_MAX_LEN 30
26
27
28/*
29 * Rollover-safe type and comparator for 32-bit sequence numbers.
30 * Comparator returns -1, 0, or 1.
31 */
32typedef __u32 ceph_seq_t;
33
34static inline __s32 ceph_seq_cmp(__u32 a, __u32 b)
35{
36 return (__s32)a - (__s32)b;
37}
38
39
40/*
41 * entity_name -- logical name for a process participating in the
42 * network, e.g. 'mds0' or 'osd3'.
43 */
44struct ceph_entity_name {
45 __u8 type; /* CEPH_ENTITY_TYPE_* */
46 __le64 num;
47} __attribute__ ((packed));
48
49#define CEPH_ENTITY_TYPE_MON 0x01
50#define CEPH_ENTITY_TYPE_MDS 0x02
51#define CEPH_ENTITY_TYPE_OSD 0x04
52#define CEPH_ENTITY_TYPE_CLIENT 0x08
53#define CEPH_ENTITY_TYPE_AUTH 0x20
54
55#define CEPH_ENTITY_TYPE_ANY 0xFF
56
57extern const char *ceph_entity_type_name(int type);
58
59/*
60 * entity_addr -- network address
61 */
62struct ceph_entity_addr {
63 __le32 type;
64 __le32 nonce; /* unique id for process (e.g. pid) */
65 struct sockaddr_storage in_addr;
66} __attribute__ ((packed));
67
68struct ceph_entity_inst {
69 struct ceph_entity_name name;
70 struct ceph_entity_addr addr;
71} __attribute__ ((packed));
72
73
74/* used by message exchange protocol */
75#define CEPH_MSGR_TAG_READY 1 /* server->client: ready for messages */
76#define CEPH_MSGR_TAG_RESETSESSION 2 /* server->client: reset, try again */
77#define CEPH_MSGR_TAG_WAIT 3 /* server->client: wait for racing
78 incoming connection */
79#define CEPH_MSGR_TAG_RETRY_SESSION 4 /* server->client + cseq: try again
80 with higher cseq */
81#define CEPH_MSGR_TAG_RETRY_GLOBAL 5 /* server->client + gseq: try again
82 with higher gseq */
83#define CEPH_MSGR_TAG_CLOSE 6 /* closing pipe */
84#define CEPH_MSGR_TAG_MSG 7 /* message */
85#define CEPH_MSGR_TAG_ACK 8 /* message ack */
86#define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */
87#define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */
88#define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */
89#define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */
90
91
92/*
93 * connection negotiation
94 */
95struct ceph_msg_connect {
96 __le64 features; /* supported feature bits */
97 __le32 host_type; /* CEPH_ENTITY_TYPE_* */
98 __le32 global_seq; /* count connections initiated by this host */
99 __le32 connect_seq; /* count connections initiated in this session */
100 __le32 protocol_version;
101 __le32 authorizer_protocol;
102 __le32 authorizer_len;
103 __u8 flags; /* CEPH_MSG_CONNECT_* */
104} __attribute__ ((packed));
105
106struct ceph_msg_connect_reply {
107 __u8 tag;
108 __le64 features; /* feature bits for this session */
109 __le32 global_seq;
110 __le32 connect_seq;
111 __le32 protocol_version;
112 __le32 authorizer_len;
113 __u8 flags;
114} __attribute__ ((packed));
115
116#define CEPH_MSG_CONNECT_LOSSY 1 /* messages i send may be safely dropped */
117
118
119/*
120 * message header
121 */
122struct ceph_msg_header_old {
123 __le64 seq; /* message seq# for this session */
124 __le64 tid; /* transaction id */
125 __le16 type; /* message type */
126 __le16 priority; /* priority. higher value == higher priority */
127 __le16 version; /* version of message encoding */
128
129 __le32 front_len; /* bytes in main payload */
130 __le32 middle_len;/* bytes in middle payload */
131 __le32 data_len; /* bytes of data payload */
132 __le16 data_off; /* sender: include full offset;
133 receiver: mask against ~PAGE_MASK */
134
135 struct ceph_entity_inst src, orig_src;
136 __le32 reserved;
137 __le32 crc; /* header crc32c */
138} __attribute__ ((packed));
139
140struct ceph_msg_header {
141 __le64 seq; /* message seq# for this session */
142 __le64 tid; /* transaction id */
143 __le16 type; /* message type */
144 __le16 priority; /* priority. higher value == higher priority */
145 __le16 version; /* version of message encoding */
146
147 __le32 front_len; /* bytes in main payload */
148 __le32 middle_len;/* bytes in middle payload */
149 __le32 data_len; /* bytes of data payload */
150 __le16 data_off; /* sender: include full offset;
151 receiver: mask against ~PAGE_MASK */
152
153 struct ceph_entity_name src;
154 __le32 reserved;
155 __le32 crc; /* header crc32c */
156} __attribute__ ((packed));
157
158#define CEPH_MSG_PRIO_LOW 64
159#define CEPH_MSG_PRIO_DEFAULT 127
160#define CEPH_MSG_PRIO_HIGH 196
161#define CEPH_MSG_PRIO_HIGHEST 255
162
163/*
164 * follows data payload
165 */
166struct ceph_msg_footer {
167 __le32 front_crc, middle_crc, data_crc;
168 __u8 flags;
169} __attribute__ ((packed));
170
171#define CEPH_MSG_FOOTER_COMPLETE (1<<0) /* msg wasn't aborted */
172#define CEPH_MSG_FOOTER_NOCRC (1<<1) /* no data crc */
173
174
175#endif
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
new file mode 100644
index 00000000000..6c91fb032c3
--- /dev/null
+++ b/include/linux/ceph/osd_client.h
@@ -0,0 +1,234 @@
1#ifndef _FS_CEPH_OSD_CLIENT_H
2#define _FS_CEPH_OSD_CLIENT_H
3
4#include <linux/completion.h>
5#include <linux/kref.h>
6#include <linux/mempool.h>
7#include <linux/rbtree.h>
8
9#include "types.h"
10#include "osdmap.h"
11#include "messenger.h"
12
13struct ceph_msg;
14struct ceph_snap_context;
15struct ceph_osd_request;
16struct ceph_osd_client;
17struct ceph_authorizer;
18struct ceph_pagelist;
19
20/*
21 * completion callback for async writepages
22 */
23typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *,
24 struct ceph_msg *);
25
26/* a given osd we're communicating with */
27struct ceph_osd {
28 atomic_t o_ref;
29 struct ceph_osd_client *o_osdc;
30 int o_osd;
31 int o_incarnation;
32 struct rb_node o_node;
33 struct ceph_connection o_con;
34 struct list_head o_requests;
35 struct list_head o_osd_lru;
36 struct ceph_authorizer *o_authorizer;
37 void *o_authorizer_buf, *o_authorizer_reply_buf;
38 size_t o_authorizer_buf_len, o_authorizer_reply_buf_len;
39 unsigned long lru_ttl;
40 int o_marked_for_keepalive;
41 struct list_head o_keepalive_item;
42};
43
44/* an in-flight request */
45struct ceph_osd_request {
46 u64 r_tid; /* unique for this client */
47 struct rb_node r_node;
48 struct list_head r_req_lru_item;
49 struct list_head r_osd_item;
50 struct ceph_osd *r_osd;
51 struct ceph_pg r_pgid;
52 int r_pg_osds[CEPH_PG_MAX_SIZE];
53 int r_num_pg_osds;
54
55 struct ceph_connection *r_con_filling_msg;
56
57 struct ceph_msg *r_request, *r_reply;
58 int r_result;
59 int r_flags; /* any additional flags for the osd */
60 u32 r_sent; /* >0 if r_request is sending/sent */
61 int r_got_reply;
62
63 struct ceph_osd_client *r_osdc;
64 struct kref r_kref;
65 bool r_mempool;
66 struct completion r_completion, r_safe_completion;
67 ceph_osdc_callback_t r_callback, r_safe_callback;
68 struct ceph_eversion r_reassert_version;
69 struct list_head r_unsafe_item;
70
71 struct inode *r_inode; /* for use by callbacks */
72 void *r_priv; /* ditto */
73
74 char r_oid[40]; /* object name */
75 int r_oid_len;
76 unsigned long r_stamp; /* send OR check time */
77 bool r_resend; /* msg send failed, needs retry */
78
79 struct ceph_file_layout r_file_layout;
80 struct ceph_snap_context *r_snapc; /* snap context for writes */
81 unsigned r_num_pages; /* size of page array (follows) */
82 struct page **r_pages; /* pages for data payload */
83 int r_pages_from_pool;
84 int r_own_pages; /* if true, i own page list */
85#ifdef CONFIG_BLOCK
86 struct bio *r_bio; /* instead of pages */
87#endif
88
89 struct ceph_pagelist *r_trail; /* trailing part of the data */
90};
91
92struct ceph_osd_client {
93 struct ceph_client *client;
94
95 struct ceph_osdmap *osdmap; /* current map */
96 struct rw_semaphore map_sem;
97 struct completion map_waiters;
98 u64 last_requested_map;
99
100 struct mutex request_mutex;
101 struct rb_root osds; /* osds */
102 struct list_head osd_lru; /* idle osds */
103 u64 timeout_tid; /* tid of timeout triggering rq */
104 u64 last_tid; /* tid of last request */
105 struct rb_root requests; /* pending requests */
106 struct list_head req_lru; /* pending requests lru */
107 int num_requests;
108 struct delayed_work timeout_work;
109 struct delayed_work osds_timeout_work;
110#ifdef CONFIG_DEBUG_FS
111 struct dentry *debugfs_file;
112#endif
113
114 mempool_t *req_mempool;
115
116 struct ceph_msgpool msgpool_op;
117 struct ceph_msgpool msgpool_op_reply;
118};
119
120struct ceph_osd_req_op {
121 u16 op; /* CEPH_OSD_OP_* */
122 u32 flags; /* CEPH_OSD_FLAG_* */
123 union {
124 struct {
125 u64 offset, length;
126 u64 truncate_size;
127 u32 truncate_seq;
128 } extent;
129 struct {
130 const char *name;
131 u32 name_len;
132 const char *val;
133 u32 value_len;
134 __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
135 __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
136 } xattr;
137 struct {
138 const char *class_name;
139 __u8 class_len;
140 const char *method_name;
141 __u8 method_len;
142 __u8 argc;
143 const char *indata;
144 u32 indata_len;
145 } cls;
146 struct {
147 u64 cookie, count;
148 } pgls;
149 struct {
150 u64 snapid;
151 } snap;
152 };
153 u32 payload_len;
154};
155
156extern int ceph_osdc_init(struct ceph_osd_client *osdc,
157 struct ceph_client *client);
158extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
159
160extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
161 struct ceph_msg *msg);
162extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
163 struct ceph_msg *msg);
164
165extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
166 struct ceph_file_layout *layout,
167 u64 snapid,
168 u64 off, u64 *plen, u64 *bno,
169 struct ceph_osd_request *req,
170 struct ceph_osd_req_op *op);
171
172extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
173 int flags,
174 struct ceph_snap_context *snapc,
175 struct ceph_osd_req_op *ops,
176 bool use_mempool,
177 gfp_t gfp_flags,
178 struct page **pages,
179 struct bio *bio);
180
181extern void ceph_osdc_build_request(struct ceph_osd_request *req,
182 u64 off, u64 *plen,
183 struct ceph_osd_req_op *src_ops,
184 struct ceph_snap_context *snapc,
185 struct timespec *mtime,
186 const char *oid,
187 int oid_len);
188
189extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
190 struct ceph_file_layout *layout,
191 struct ceph_vino vino,
192 u64 offset, u64 *len, int op, int flags,
193 struct ceph_snap_context *snapc,
194 int do_sync, u32 truncate_seq,
195 u64 truncate_size,
196 struct timespec *mtime,
197 bool use_mempool, int num_reply);
198
199static inline void ceph_osdc_get_request(struct ceph_osd_request *req)
200{
201 kref_get(&req->r_kref);
202}
203extern void ceph_osdc_release_request(struct kref *kref);
204static inline void ceph_osdc_put_request(struct ceph_osd_request *req)
205{
206 kref_put(&req->r_kref, ceph_osdc_release_request);
207}
208
209extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
210 struct ceph_osd_request *req,
211 bool nofail);
212extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
213 struct ceph_osd_request *req);
214extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
215
216extern int ceph_osdc_readpages(struct ceph_osd_client *osdc,
217 struct ceph_vino vino,
218 struct ceph_file_layout *layout,
219 u64 off, u64 *plen,
220 u32 truncate_seq, u64 truncate_size,
221 struct page **pages, int nr_pages);
222
223extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
224 struct ceph_vino vino,
225 struct ceph_file_layout *layout,
226 struct ceph_snap_context *sc,
227 u64 off, u64 len,
228 u32 truncate_seq, u64 truncate_size,
229 struct timespec *mtime,
230 struct page **pages, int nr_pages,
231 int flags, int do_sync, bool nofail);
232
233#endif
234
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
new file mode 100644
index 00000000000..ba4c205cbb0
--- /dev/null
+++ b/include/linux/ceph/osdmap.h
@@ -0,0 +1,130 @@
1#ifndef _FS_CEPH_OSDMAP_H
2#define _FS_CEPH_OSDMAP_H
3
4#include <linux/rbtree.h>
5#include "types.h"
6#include "ceph_fs.h"
7#include <linux/crush/crush.h>
8
9/*
10 * The osd map describes the current membership of the osd cluster and
11 * specifies the mapping of objects to placement groups and placement
12 * groups to (sets of) osds. That is, it completely specifies the
13 * (desired) distribution of all data objects in the system at some
14 * point in time.
15 *
16 * Each map version is identified by an epoch, which increases monotonically.
17 *
18 * The map can be updated either via an incremental map (diff) describing
19 * the change between two successive epochs, or as a fully encoded map.
20 */
21struct ceph_pg_pool_info {
22 struct rb_node node;
23 int id;
24 struct ceph_pg_pool v;
25 int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask;
26 char *name;
27};
28
29struct ceph_pg_mapping {
30 struct rb_node node;
31 struct ceph_pg pgid;
32 int len;
33 int osds[];
34};
35
36struct ceph_osdmap {
37 struct ceph_fsid fsid;
38 u32 epoch;
39 u32 mkfs_epoch;
40 struct ceph_timespec created, modified;
41
42 u32 flags; /* CEPH_OSDMAP_* */
43
44 u32 max_osd; /* size of osd_state, _offload, _addr arrays */
45 u8 *osd_state; /* CEPH_OSD_* */
46 u32 *osd_weight; /* 0 = failed, 0x10000 = 100% normal */
47 struct ceph_entity_addr *osd_addr;
48
49 struct rb_root pg_temp;
50 struct rb_root pg_pools;
51 u32 pool_max;
52
53 /* the CRUSH map specifies the mapping of placement groups to
54 * the list of osds that store+replicate them. */
55 struct crush_map *crush;
56};
57
58/*
59 * file layout helpers
60 */
61#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
62#define ceph_file_layout_stripe_count(l) \
63 ((__s32)le32_to_cpu((l).fl_stripe_count))
64#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
65#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
66#define ceph_file_layout_object_su(l) \
67 ((__s32)le32_to_cpu((l).fl_object_stripe_unit))
68#define ceph_file_layout_pg_preferred(l) \
69 ((__s32)le32_to_cpu((l).fl_pg_preferred))
70#define ceph_file_layout_pg_pool(l) \
71 ((__s32)le32_to_cpu((l).fl_pg_pool))
72
73static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
74{
75 return le32_to_cpu(l->fl_stripe_unit) *
76 le32_to_cpu(l->fl_stripe_count);
77}
78
79/* "period" == bytes before i start on a new set of objects */
80static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
81{
82 return le32_to_cpu(l->fl_object_size) *
83 le32_to_cpu(l->fl_stripe_count);
84}
85
86
87static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
88{
89 return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP);
90}
91
92static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
93{
94 return map && (map->flags & flag);
95}
96
97extern char *ceph_osdmap_state_str(char *str, int len, int state);
98
99static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
100 int osd)
101{
102 if (osd >= map->max_osd)
103 return NULL;
104 return &map->osd_addr[osd];
105}
106
107extern struct ceph_osdmap *osdmap_decode(void **p, void *end);
108extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
109 struct ceph_osdmap *map,
110 struct ceph_messenger *msgr);
111extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
112
113/* calculate mapping of a file extent to an object */
114extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
115 u64 off, u64 *plen,
116 u64 *bno, u64 *oxoff, u64 *oxlen);
117
118/* calculate mapping of object to a placement group */
119extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
120 const char *oid,
121 struct ceph_file_layout *fl,
122 struct ceph_osdmap *osdmap);
123extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
124 int *acting);
125extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
126 struct ceph_pg pgid);
127
128extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
129
130#endif
diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
new file mode 100644
index 00000000000..9660d6b0a35
--- /dev/null
+++ b/include/linux/ceph/pagelist.h
@@ -0,0 +1,75 @@
1#ifndef __FS_CEPH_PAGELIST_H
2#define __FS_CEPH_PAGELIST_H
3
4#include <linux/list.h>
5
6struct ceph_pagelist {
7 struct list_head head;
8 void *mapped_tail;
9 size_t length;
10 size_t room;
11 struct list_head free_list;
12 size_t num_pages_free;
13};
14
15struct ceph_pagelist_cursor {
16 struct ceph_pagelist *pl; /* pagelist, for error checking */
17 struct list_head *page_lru; /* page in list */
18 size_t room; /* room remaining to reset to */
19};
20
21static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
22{
23 INIT_LIST_HEAD(&pl->head);
24 pl->mapped_tail = NULL;
25 pl->length = 0;
26 pl->room = 0;
27 INIT_LIST_HEAD(&pl->free_list);
28 pl->num_pages_free = 0;
29}
30
31extern int ceph_pagelist_release(struct ceph_pagelist *pl);
32
33extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l);
34
35extern int ceph_pagelist_reserve(struct ceph_pagelist *pl, size_t space);
36
37extern int ceph_pagelist_free_reserve(struct ceph_pagelist *pl);
38
39extern void ceph_pagelist_set_cursor(struct ceph_pagelist *pl,
40 struct ceph_pagelist_cursor *c);
41
42extern int ceph_pagelist_truncate(struct ceph_pagelist *pl,
43 struct ceph_pagelist_cursor *c);
44
45static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v)
46{
47 __le64 ev = cpu_to_le64(v);
48 return ceph_pagelist_append(pl, &ev, sizeof(ev));
49}
50static inline int ceph_pagelist_encode_32(struct ceph_pagelist *pl, u32 v)
51{
52 __le32 ev = cpu_to_le32(v);
53 return ceph_pagelist_append(pl, &ev, sizeof(ev));
54}
55static inline int ceph_pagelist_encode_16(struct ceph_pagelist *pl, u16 v)
56{
57 __le16 ev = cpu_to_le16(v);
58 return ceph_pagelist_append(pl, &ev, sizeof(ev));
59}
60static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v)
61{
62 return ceph_pagelist_append(pl, &v, 1);
63}
64static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl,
65 char *s, size_t len)
66{
67 int ret = ceph_pagelist_encode_32(pl, len);
68 if (ret)
69 return ret;
70 if (len)
71 return ceph_pagelist_append(pl, s, len);
72 return 0;
73}
74
75#endif
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
new file mode 100644
index 00000000000..6d5247f2e81
--- /dev/null
+++ b/include/linux/ceph/rados.h
@@ -0,0 +1,405 @@
1#ifndef CEPH_RADOS_H
2#define CEPH_RADOS_H
3
4/*
5 * Data types for the Ceph distributed object storage layer RADOS
6 * (Reliable Autonomic Distributed Object Store).
7 */
8
9#include "msgr.h"
10
11/*
12 * osdmap encoding versions
13 */
14#define CEPH_OSDMAP_INC_VERSION 5
15#define CEPH_OSDMAP_INC_VERSION_EXT 5
16#define CEPH_OSDMAP_VERSION 5
17#define CEPH_OSDMAP_VERSION_EXT 5
18
19/*
20 * fs id
21 */
22struct ceph_fsid {
23 unsigned char fsid[16];
24};
25
26static inline int ceph_fsid_compare(const struct ceph_fsid *a,
27 const struct ceph_fsid *b)
28{
29 return memcmp(a, b, sizeof(*a));
30}
31
32/*
33 * ino, object, etc.
34 */
35typedef __le64 ceph_snapid_t;
36#define CEPH_SNAPDIR ((__u64)(-1)) /* reserved for hidden .snap dir */
37#define CEPH_NOSNAP ((__u64)(-2)) /* "head", "live" revision */
38#define CEPH_MAXSNAP ((__u64)(-3)) /* largest valid snapid */
39
40struct ceph_timespec {
41 __le32 tv_sec;
42 __le32 tv_nsec;
43} __attribute__ ((packed));
44
45
46/*
47 * object layout - how objects are mapped into PGs
48 */
49#define CEPH_OBJECT_LAYOUT_HASH 1
50#define CEPH_OBJECT_LAYOUT_LINEAR 2
51#define CEPH_OBJECT_LAYOUT_HASHINO 3
52
53/*
54 * pg layout -- how PGs are mapped onto (sets of) OSDs
55 */
56#define CEPH_PG_LAYOUT_CRUSH 0
57#define CEPH_PG_LAYOUT_HASH 1
58#define CEPH_PG_LAYOUT_LINEAR 2
59#define CEPH_PG_LAYOUT_HYBRID 3
60
61#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */
62
63/*
64 * placement group.
65 * we encode this into one __le64.
66 */
67struct ceph_pg {
68 __le16 preferred; /* preferred primary osd */
69 __le16 ps; /* placement seed */
70 __le32 pool; /* object pool */
71} __attribute__ ((packed));
72
73/*
74 * pg_pool is a set of pgs storing a pool of objects
75 *
76 * pg_num -- base number of pseudorandomly placed pgs
77 *
78 * pgp_num -- effective number when calculating pg placement. this
79 * is used for pg_num increases. new pgs result in data being "split"
80 * into new pgs. for this to proceed smoothly, new pgs are intiially
81 * colocated with their parents; that is, pgp_num doesn't increase
82 * until the new pgs have successfully split. only _then_ are the new
83 * pgs placed independently.
84 *
85 * lpg_num -- localized pg count (per device). replicas are randomly
86 * selected.
87 *
88 * lpgp_num -- as above.
89 */
90#define CEPH_PG_TYPE_REP 1
91#define CEPH_PG_TYPE_RAID4 2
92#define CEPH_PG_POOL_VERSION 2
93struct ceph_pg_pool {
94 __u8 type; /* CEPH_PG_TYPE_* */
95 __u8 size; /* number of osds in each pg */
96 __u8 crush_ruleset; /* crush placement rule */
97 __u8 object_hash; /* hash mapping object name to ps */
98 __le32 pg_num, pgp_num; /* number of pg's */
99 __le32 lpg_num, lpgp_num; /* number of localized pg's */
100 __le32 last_change; /* most recent epoch changed */
101 __le64 snap_seq; /* seq for per-pool snapshot */
102 __le32 snap_epoch; /* epoch of last snap */
103 __le32 num_snaps;
104 __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */
105 __le64 auid; /* who owns the pg */
106} __attribute__ ((packed));
107
108/*
109 * stable_mod func is used to control number of placement groups.
110 * similar to straight-up modulo, but produces a stable mapping as b
111 * increases over time. b is the number of bins, and bmask is the
112 * containing power of 2 minus 1.
113 *
114 * b <= bmask and bmask=(2**n)-1
115 * e.g., b=12 -> bmask=15, b=123 -> bmask=127
116 */
117static inline int ceph_stable_mod(int x, int b, int bmask)
118{
119 if ((x & bmask) < b)
120 return x & bmask;
121 else
122 return x & (bmask >> 1);
123}
124
125/*
126 * object layout - how a given object should be stored.
127 */
128struct ceph_object_layout {
129 struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */
130 __le32 ol_stripe_unit; /* for per-object parity, if any */
131} __attribute__ ((packed));
132
133/*
134 * compound epoch+version, used by storage layer to serialize mutations
135 */
136struct ceph_eversion {
137 __le32 epoch;
138 __le64 version;
139} __attribute__ ((packed));
140
141/*
142 * osd map bits
143 */
144
145/* status bits */
146#define CEPH_OSD_EXISTS 1
147#define CEPH_OSD_UP 2
148
149/* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */
150#define CEPH_OSD_IN 0x10000
151#define CEPH_OSD_OUT 0
152
153
154/*
155 * osd map flag bits
156 */
157#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */
158#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */
159#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */
160#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */
161#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */
162
163/*
164 * osd ops
165 */
166#define CEPH_OSD_OP_MODE 0xf000
167#define CEPH_OSD_OP_MODE_RD 0x1000
168#define CEPH_OSD_OP_MODE_WR 0x2000
169#define CEPH_OSD_OP_MODE_RMW 0x3000
170#define CEPH_OSD_OP_MODE_SUB 0x4000
171
172#define CEPH_OSD_OP_TYPE 0x0f00
173#define CEPH_OSD_OP_TYPE_LOCK 0x0100
174#define CEPH_OSD_OP_TYPE_DATA 0x0200
175#define CEPH_OSD_OP_TYPE_ATTR 0x0300
176#define CEPH_OSD_OP_TYPE_EXEC 0x0400
177#define CEPH_OSD_OP_TYPE_PG 0x0500
178
179enum {
180 /** data **/
181 /* read */
182 CEPH_OSD_OP_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1,
183 CEPH_OSD_OP_STAT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2,
184
185 /* fancy read */
186 CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4,
187
188 /* write */
189 CEPH_OSD_OP_WRITE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1,
190 CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2,
191 CEPH_OSD_OP_TRUNCATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3,
192 CEPH_OSD_OP_ZERO = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4,
193 CEPH_OSD_OP_DELETE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5,
194
195 /* fancy write */
196 CEPH_OSD_OP_APPEND = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6,
197 CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7,
198 CEPH_OSD_OP_SETTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8,
199 CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9,
200
201 CEPH_OSD_OP_TMAPUP = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10,
202 CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11,
203 CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12,
204
205 CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13,
206 CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14,
207
208 /** attrs **/
209 /* read */
210 CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
211 CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2,
212 CEPH_OSD_OP_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3,
213
214 /* write */
215 CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1,
216 CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2,
217 CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3,
218 CEPH_OSD_OP_RMXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4,
219
220 /** subop **/
221 CEPH_OSD_OP_PULL = CEPH_OSD_OP_MODE_SUB | 1,
222 CEPH_OSD_OP_PUSH = CEPH_OSD_OP_MODE_SUB | 2,
223 CEPH_OSD_OP_BALANCEREADS = CEPH_OSD_OP_MODE_SUB | 3,
224 CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4,
225 CEPH_OSD_OP_SCRUB = CEPH_OSD_OP_MODE_SUB | 5,
226
227 /** lock **/
228 CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
229 CEPH_OSD_OP_WRUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2,
230 CEPH_OSD_OP_RDLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3,
231 CEPH_OSD_OP_RDUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4,
232 CEPH_OSD_OP_UPLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5,
233 CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
234
235 /** exec **/
236 CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
237
238 /** pg **/
239 CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
240};
241
242static inline int ceph_osd_op_type_lock(int op)
243{
244 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK;
245}
246static inline int ceph_osd_op_type_data(int op)
247{
248 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA;
249}
250static inline int ceph_osd_op_type_attr(int op)
251{
252 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR;
253}
254static inline int ceph_osd_op_type_exec(int op)
255{
256 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_EXEC;
257}
258static inline int ceph_osd_op_type_pg(int op)
259{
260 return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG;
261}
262
263static inline int ceph_osd_op_mode_subop(int op)
264{
265 return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB;
266}
267static inline int ceph_osd_op_mode_read(int op)
268{
269 return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD;
270}
271static inline int ceph_osd_op_mode_modify(int op)
272{
273 return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR;
274}
275
276/*
277 * note that the following tmap stuff is also defined in the ceph librados.h
278 * any modification here needs to be updated there
279 */
280#define CEPH_OSD_TMAP_HDR 'h'
281#define CEPH_OSD_TMAP_SET 's'
282#define CEPH_OSD_TMAP_RM 'r'
283
284extern const char *ceph_osd_op_name(int op);
285
286
287/*
288 * osd op flags
289 *
290 * An op may be READ, WRITE, or READ|WRITE.
291 */
292enum {
293 CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */
294 CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */
295 CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */
296 CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */
297 CEPH_OSD_FLAG_READ = 16, /* op may read */
298 CEPH_OSD_FLAG_WRITE = 32, /* op may write */
299 CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */
300 CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */
301 CEPH_OSD_FLAG_BALANCE_READS = 256,
302 CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */
303 CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */
304 CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */
305 CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */
306};
307
308enum {
309 CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */
310};
311
312#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/
313#define EBLACKLISTED ESHUTDOWN /* blacklisted */
314
315/* xattr comparison */
316enum {
317 CEPH_OSD_CMPXATTR_OP_NOP = 0,
318 CEPH_OSD_CMPXATTR_OP_EQ = 1,
319 CEPH_OSD_CMPXATTR_OP_NE = 2,
320 CEPH_OSD_CMPXATTR_OP_GT = 3,
321 CEPH_OSD_CMPXATTR_OP_GTE = 4,
322 CEPH_OSD_CMPXATTR_OP_LT = 5,
323 CEPH_OSD_CMPXATTR_OP_LTE = 6
324};
325
326enum {
327 CEPH_OSD_CMPXATTR_MODE_STRING = 1,
328 CEPH_OSD_CMPXATTR_MODE_U64 = 2
329};
330
331/*
332 * an individual object operation. each may be accompanied by some data
333 * payload
334 */
335struct ceph_osd_op {
336 __le16 op; /* CEPH_OSD_OP_* */
337 __le32 flags; /* CEPH_OSD_FLAG_* */
338 union {
339 struct {
340 __le64 offset, length;
341 __le64 truncate_size;
342 __le32 truncate_seq;
343 } __attribute__ ((packed)) extent;
344 struct {
345 __le32 name_len;
346 __le32 value_len;
347 __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */
348 __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */
349 } __attribute__ ((packed)) xattr;
350 struct {
351 __u8 class_len;
352 __u8 method_len;
353 __u8 argc;
354 __le32 indata_len;
355 } __attribute__ ((packed)) cls;
356 struct {
357 __le64 cookie, count;
358 } __attribute__ ((packed)) pgls;
359 struct {
360 __le64 snapid;
361 } __attribute__ ((packed)) snap;
362 };
363 __le32 payload_len;
364} __attribute__ ((packed));
365
366/*
367 * osd request message header. each request may include multiple
368 * ceph_osd_op object operations.
369 */
370struct ceph_osd_request_head {
371 __le32 client_inc; /* client incarnation */
372 struct ceph_object_layout layout; /* pgid */
373 __le32 osdmap_epoch; /* client's osdmap epoch */
374
375 __le32 flags;
376
377 struct ceph_timespec mtime; /* for mutations only */
378 struct ceph_eversion reassert_version; /* if we are replaying op */
379
380 __le32 object_len; /* length of object name */
381
382 __le64 snapid; /* snapid to read */
383 __le64 snap_seq; /* writer's snap context */
384 __le32 num_snaps;
385
386 __le16 num_ops;
387 struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */
388} __attribute__ ((packed));
389
390struct ceph_osd_reply_head {
391 __le32 client_inc; /* client incarnation */
392 __le32 flags;
393 struct ceph_object_layout layout;
394 __le32 osdmap_epoch;
395 struct ceph_eversion reassert_version; /* for replaying uncommitted */
396
397 __le32 result; /* result code */
398
399 __le32 object_len; /* length of object name */
400 __le32 num_ops;
401 struct ceph_osd_op ops[0]; /* ops[], object */
402} __attribute__ ((packed));
403
404
405#endif
diff --git a/include/linux/ceph/types.h b/include/linux/ceph/types.h
new file mode 100644
index 00000000000..28b35a005ec
--- /dev/null
+++ b/include/linux/ceph/types.h
@@ -0,0 +1,29 @@
1#ifndef _FS_CEPH_TYPES_H
2#define _FS_CEPH_TYPES_H
3
4/* needed before including ceph_fs.h */
5#include <linux/in.h>
6#include <linux/types.h>
7#include <linux/fcntl.h>
8#include <linux/string.h>
9
10#include "ceph_fs.h"
11#include "ceph_frag.h"
12#include "ceph_hash.h"
13
14/*
15 * Identify inodes by both their ino AND snapshot id (a u64).
16 */
17struct ceph_vino {
18 u64 ino;
19 u64 snap;
20};
21
22
23/* context for the caps reservation mechanism */
24struct ceph_cap_reservation {
25 int count;
26};
27
28
29#endif
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 8ba66a9d902..ba4b85a6d9b 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -9,37 +9,7 @@
9 * These are the only things you should do on a core-file: use only these 9 * These are the only things you should do on a core-file: use only these
10 * functions to write out all the necessary info. 10 * functions to write out all the necessary info.
11 */ 11 */
12static inline int dump_write(struct file *file, const void *addr, int nr) 12extern int dump_write(struct file *file, const void *addr, int nr);
13{ 13extern int dump_seek(struct file *file, loff_t off);
14 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
15}
16
17static inline int dump_seek(struct file *file, loff_t off)
18{
19 int ret = 1;
20
21 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
22 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
23 return 0;
24 } else {
25 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
26
27 if (!buf)
28 return 0;
29 while (off > 0) {
30 unsigned long n = off;
31
32 if (n > PAGE_SIZE)
33 n = PAGE_SIZE;
34 if (!dump_write(file, buf, n)) {
35 ret = 0;
36 break;
37 }
38 off -= n;
39 }
40 free_page((unsigned long)buf);
41 }
42 return ret;
43}
44 14
45#endif /* _LINUX_COREDUMP_H */ 15#endif /* _LINUX_COREDUMP_H */
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
new file mode 100644
index 00000000000..97e435b191f
--- /dev/null
+++ b/include/linux/crush/crush.h
@@ -0,0 +1,180 @@
1#ifndef CEPH_CRUSH_CRUSH_H
2#define CEPH_CRUSH_CRUSH_H
3
4#include <linux/types.h>
5
6/*
7 * CRUSH is a pseudo-random data distribution algorithm that
8 * efficiently distributes input values (typically, data objects)
9 * across a heterogeneous, structured storage cluster.
10 *
11 * The algorithm was originally described in detail in this paper
12 * (although the algorithm has evolved somewhat since then):
13 *
14 * http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf
15 *
16 * LGPL2
17 */
18
19
20#define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */
21
22
23#define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */
24#define CRUSH_MAX_SET 10 /* max size of a mapping result */
25
26
27/*
28 * CRUSH uses user-defined "rules" to describe how inputs should be
29 * mapped to devices. A rule consists of sequence of steps to perform
30 * to generate the set of output devices.
31 */
32struct crush_rule_step {
33 __u32 op;
34 __s32 arg1;
35 __s32 arg2;
36};
37
38/* step op codes */
39enum {
40 CRUSH_RULE_NOOP = 0,
41 CRUSH_RULE_TAKE = 1, /* arg1 = value to start with */
42 CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */
43 /* arg2 = type */
44 CRUSH_RULE_CHOOSE_INDEP = 3, /* same */
45 CRUSH_RULE_EMIT = 4, /* no args */
46 CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6,
47 CRUSH_RULE_CHOOSE_LEAF_INDEP = 7,
48};
49
50/*
51 * for specifying choose num (arg1) relative to the max parameter
52 * passed to do_rule
53 */
54#define CRUSH_CHOOSE_N 0
55#define CRUSH_CHOOSE_N_MINUS(x) (-(x))
56
57/*
58 * The rule mask is used to describe what the rule is intended for.
59 * Given a ruleset and size of output set, we search through the
60 * rule list for a matching rule_mask.
61 */
62struct crush_rule_mask {
63 __u8 ruleset;
64 __u8 type;
65 __u8 min_size;
66 __u8 max_size;
67};
68
69struct crush_rule {
70 __u32 len;
71 struct crush_rule_mask mask;
72 struct crush_rule_step steps[0];
73};
74
75#define crush_rule_size(len) (sizeof(struct crush_rule) + \
76 (len)*sizeof(struct crush_rule_step))
77
78
79
80/*
81 * A bucket is a named container of other items (either devices or
82 * other buckets). Items within a bucket are chosen using one of a
83 * few different algorithms. The table summarizes how the speed of
84 * each option measures up against mapping stability when items are
85 * added or removed.
86 *
87 * Bucket Alg Speed Additions Removals
88 * ------------------------------------------------
89 * uniform O(1) poor poor
90 * list O(n) optimal poor
91 * tree O(log n) good good
92 * straw O(n) optimal optimal
93 */
94enum {
95 CRUSH_BUCKET_UNIFORM = 1,
96 CRUSH_BUCKET_LIST = 2,
97 CRUSH_BUCKET_TREE = 3,
98 CRUSH_BUCKET_STRAW = 4
99};
100extern const char *crush_bucket_alg_name(int alg);
101
102struct crush_bucket {
103 __s32 id; /* this'll be negative */
104 __u16 type; /* non-zero; type=0 is reserved for devices */
105 __u8 alg; /* one of CRUSH_BUCKET_* */
106 __u8 hash; /* which hash function to use, CRUSH_HASH_* */
107 __u32 weight; /* 16-bit fixed point */
108 __u32 size; /* num items */
109 __s32 *items;
110
111 /*
112 * cached random permutation: used for uniform bucket and for
113 * the linear search fallback for the other bucket types.
114 */
115 __u32 perm_x; /* @x for which *perm is defined */
116 __u32 perm_n; /* num elements of *perm that are permuted/defined */
117 __u32 *perm;
118};
119
120struct crush_bucket_uniform {
121 struct crush_bucket h;
122 __u32 item_weight; /* 16-bit fixed point; all items equally weighted */
123};
124
125struct crush_bucket_list {
126 struct crush_bucket h;
127 __u32 *item_weights; /* 16-bit fixed point */
128 __u32 *sum_weights; /* 16-bit fixed point. element i is sum
129 of weights 0..i, inclusive */
130};
131
132struct crush_bucket_tree {
133 struct crush_bucket h; /* note: h.size is _tree_ size, not number of
134 actual items */
135 __u8 num_nodes;
136 __u32 *node_weights;
137};
138
139struct crush_bucket_straw {
140 struct crush_bucket h;
141 __u32 *item_weights; /* 16-bit fixed point */
142 __u32 *straws; /* 16-bit fixed point */
143};
144
145
146
147/*
148 * CRUSH map includes all buckets, rules, etc.
149 */
150struct crush_map {
151 struct crush_bucket **buckets;
152 struct crush_rule **rules;
153
154 /*
155 * Parent pointers to identify the parent bucket a device or
156 * bucket in the hierarchy. If an item appears more than
157 * once, this is the _last_ time it appeared (where buckets
158 * are processed in bucket id order, from -1 on down to
159 * -max_buckets.
160 */
161 __u32 *bucket_parents;
162 __u32 *device_parents;
163
164 __s32 max_buckets;
165 __u32 max_rules;
166 __s32 max_devices;
167};
168
169
170/* crush.c */
171extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos);
172extern void crush_calc_parents(struct crush_map *map);
173extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
174extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
175extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
176extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
177extern void crush_destroy_bucket(struct crush_bucket *b);
178extern void crush_destroy(struct crush_map *map);
179
180#endif
diff --git a/include/linux/crush/hash.h b/include/linux/crush/hash.h
new file mode 100644
index 00000000000..91e884230d5
--- /dev/null
+++ b/include/linux/crush/hash.h
@@ -0,0 +1,17 @@
1#ifndef CEPH_CRUSH_HASH_H
2#define CEPH_CRUSH_HASH_H
3
4#define CRUSH_HASH_RJENKINS1 0
5
6#define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1
7
8extern const char *crush_hash_name(int type);
9
10extern __u32 crush_hash32(int type, __u32 a);
11extern __u32 crush_hash32_2(int type, __u32 a, __u32 b);
12extern __u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c);
13extern __u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d);
14extern __u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d,
15 __u32 e);
16
17#endif
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
new file mode 100644
index 00000000000..c46b99c18bb
--- /dev/null
+++ b/include/linux/crush/mapper.h
@@ -0,0 +1,20 @@
1#ifndef CEPH_CRUSH_MAPPER_H
2#define CEPH_CRUSH_MAPPER_H
3
4/*
5 * CRUSH functions for find rules and then mapping an input to an
6 * output set.
7 *
8 * LGPL2
9 */
10
11#include "crush.h"
12
13extern int crush_find_rule(struct crush_map *map, int pool, int type, int size);
14extern int crush_do_rule(struct crush_map *map,
15 int ruleno,
16 int x, int *result, int result_max,
17 int forcefeed, /* -1 for none */
18 __u32 *weights);
19
20#endif
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 29b3ce3f2a1..2833452ea01 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -49,7 +49,6 @@ struct task_struct;
49 49
50#ifdef CONFIG_LOCKDEP 50#ifdef CONFIG_LOCKDEP
51extern void debug_show_all_locks(void); 51extern void debug_show_all_locks(void);
52extern void __debug_show_held_locks(struct task_struct *task);
53extern void debug_show_held_locks(struct task_struct *task); 52extern void debug_show_held_locks(struct task_struct *task);
54extern void debug_check_no_locks_freed(const void *from, unsigned long len); 53extern void debug_check_no_locks_freed(const void *from, unsigned long len);
55extern void debug_check_no_locks_held(struct task_struct *task); 54extern void debug_check_no_locks_held(struct task_struct *task);
@@ -58,10 +57,6 @@ static inline void debug_show_all_locks(void)
58{ 57{
59} 58}
60 59
61static inline void __debug_show_held_locks(struct task_struct *task)
62{
63}
64
65static inline void debug_show_held_locks(struct task_struct *task) 60static inline void debug_show_held_locks(struct task_struct *task)
66{ 61{
67} 62}
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 926b50322a4..4fd978e7eb8 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -93,6 +93,7 @@ struct elevator_queue
93 struct elevator_type *elevator_type; 93 struct elevator_type *elevator_type;
94 struct mutex sysfs_lock; 94 struct mutex sysfs_lock;
95 struct hlist_head *hash; 95 struct hlist_head *hash;
96 unsigned int registered:1;
96}; 97};
97 98
98/* 99/*
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b0a35e6bc6..1759ba5adce 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -58,7 +58,18 @@ extern const char linux_proc_banner[];
58 58
59#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) 59#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
60#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) 60#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
61#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) 61#define roundup(x, y) ( \
62{ \
63 typeof(y) __y = y; \
64 (((x) + (__y - 1)) / __y) * __y; \
65} \
66)
67#define rounddown(x, y) ( \
68{ \
69 typeof(x) __x = (x); \
70 __x - (__x % (y)); \
71} \
72)
62#define DIV_ROUND_CLOSEST(x, divisor)( \ 73#define DIV_ROUND_CLOSEST(x, divisor)( \
63{ \ 74{ \
64 typeof(divisor) __divisor = divisor; \ 75 typeof(divisor) __divisor = divisor; \
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 06aed8305bf..2186a64ee4b 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -32,6 +32,17 @@ extern int lock_stat;
32#define MAX_LOCKDEP_SUBCLASSES 8UL 32#define MAX_LOCKDEP_SUBCLASSES 8UL
33 33
34/* 34/*
35 * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
36 * cached in the instance of lockdep_map
37 *
38 * Currently main class (subclass == 0) and signle depth subclass
39 * are cached in lockdep_map. This optimization is mainly targeting
40 * on rq->lock. double_rq_lock() acquires this highly competitive with
41 * single depth.
42 */
43#define NR_LOCKDEP_CACHING_CLASSES 2
44
45/*
35 * Lock-classes are keyed via unique addresses, by embedding the 46 * Lock-classes are keyed via unique addresses, by embedding the
36 * lockclass-key into the kernel (or module) .data section. (For 47 * lockclass-key into the kernel (or module) .data section. (For
37 * static locks we use the lock address itself as the key.) 48 * static locks we use the lock address itself as the key.)
@@ -138,7 +149,7 @@ void clear_lock_stats(struct lock_class *class);
138 */ 149 */
139struct lockdep_map { 150struct lockdep_map {
140 struct lock_class_key *key; 151 struct lock_class_key *key;
141 struct lock_class *class_cache; 152 struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
142 const char *name; 153 const char *name;
143#ifdef CONFIG_LOCK_STAT 154#ifdef CONFIG_LOCK_STAT
144 int cpu; 155 int cpu;
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 9ed534c991b..70cd0603911 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -39,8 +39,9 @@ enum ctattr_type {
39 CTA_TUPLE_MASTER, 39 CTA_TUPLE_MASTER,
40 CTA_NAT_SEQ_ADJ_ORIG, 40 CTA_NAT_SEQ_ADJ_ORIG,
41 CTA_NAT_SEQ_ADJ_REPLY, 41 CTA_NAT_SEQ_ADJ_REPLY,
42 CTA_SECMARK, 42 CTA_SECMARK, /* obsolete */
43 CTA_ZONE, 43 CTA_ZONE,
44 CTA_SECCTX,
44 __CTA_MAX 45 __CTA_MAX
45}; 46};
46#define CTA_MAX (__CTA_MAX - 1) 47#define CTA_MAX (__CTA_MAX - 1)
@@ -172,4 +173,11 @@ enum ctattr_help {
172}; 173};
173#define CTA_HELP_MAX (__CTA_HELP_MAX - 1) 174#define CTA_HELP_MAX (__CTA_HELP_MAX - 1)
174 175
176enum ctattr_secctx {
177 CTA_SECCTX_UNSPEC,
178 CTA_SECCTX_NAME,
179 __CTA_SECCTX_MAX
180};
181#define CTA_SECCTX_MAX (__CTA_SECCTX_MAX - 1)
182
175#endif /* _IPCONNTRACK_NETLINK_H */ 183#endif /* _IPCONNTRACK_NETLINK_H */
diff --git a/include/linux/netfilter/xt_SECMARK.h b/include/linux/netfilter/xt_SECMARK.h
index 6fcd3448b18..989092bd627 100644
--- a/include/linux/netfilter/xt_SECMARK.h
+++ b/include/linux/netfilter/xt_SECMARK.h
@@ -11,18 +11,12 @@
11 * packets are being marked for. 11 * packets are being marked for.
12 */ 12 */
13#define SECMARK_MODE_SEL 0x01 /* SELinux */ 13#define SECMARK_MODE_SEL 0x01 /* SELinux */
14#define SECMARK_SELCTX_MAX 256 14#define SECMARK_SECCTX_MAX 256
15
16struct xt_secmark_target_selinux_info {
17 __u32 selsid;
18 char selctx[SECMARK_SELCTX_MAX];
19};
20 15
21struct xt_secmark_target_info { 16struct xt_secmark_target_info {
22 __u8 mode; 17 __u8 mode;
23 union { 18 __u32 secid;
24 struct xt_secmark_target_selinux_info sel; 19 char secctx[SECMARK_SECCTX_MAX];
25 } u;
26}; 20};
27 21
28#endif /*_XT_SECMARK_H_target */ 22#endif /*_XT_SECMARK_H_target */
diff --git a/include/linux/security.h b/include/linux/security.h
index a22219afff0..b8246a8df7d 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -74,7 +74,7 @@ extern int cap_file_mmap(struct file *file, unsigned long reqprot,
74extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); 74extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags);
75extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, 75extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
76 unsigned long arg4, unsigned long arg5); 76 unsigned long arg4, unsigned long arg5);
77extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp); 77extern int cap_task_setscheduler(struct task_struct *p);
78extern int cap_task_setioprio(struct task_struct *p, int ioprio); 78extern int cap_task_setioprio(struct task_struct *p, int ioprio);
79extern int cap_task_setnice(struct task_struct *p, int nice); 79extern int cap_task_setnice(struct task_struct *p, int nice);
80extern int cap_syslog(int type, bool from_file); 80extern int cap_syslog(int type, bool from_file);
@@ -959,6 +959,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
959 * Sets the new child socket's sid to the openreq sid. 959 * Sets the new child socket's sid to the openreq sid.
960 * @inet_conn_established: 960 * @inet_conn_established:
961 * Sets the connection's peersid to the secmark on skb. 961 * Sets the connection's peersid to the secmark on skb.
962 * @secmark_relabel_packet:
963 * check if the process should be allowed to relabel packets to the given secid
964 * @security_secmark_refcount_inc
965 * tells the LSM to increment the number of secmark labeling rules loaded
966 * @security_secmark_refcount_dec
967 * tells the LSM to decrement the number of secmark labeling rules loaded
962 * @req_classify_flow: 968 * @req_classify_flow:
963 * Sets the flow's sid to the openreq sid. 969 * Sets the flow's sid to the openreq sid.
964 * @tun_dev_create: 970 * @tun_dev_create:
@@ -1279,9 +1285,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
1279 * Return 0 if permission is granted. 1285 * Return 0 if permission is granted.
1280 * 1286 *
1281 * @secid_to_secctx: 1287 * @secid_to_secctx:
1282 * Convert secid to security context. 1288 * Convert secid to security context. If secdata is NULL the length of
1289 * the result will be returned in seclen, but no secdata will be returned.
1290 * This does mean that the length could change between calls to check the
1291 * length and the next call which actually allocates and returns the secdata.
1283 * @secid contains the security ID. 1292 * @secid contains the security ID.
1284 * @secdata contains the pointer that stores the converted security context. 1293 * @secdata contains the pointer that stores the converted security context.
1294 * @seclen pointer which contains the length of the data
1285 * @secctx_to_secid: 1295 * @secctx_to_secid:
1286 * Convert security context to secid. 1296 * Convert security context to secid.
1287 * @secid contains the pointer to the generated security ID. 1297 * @secid contains the pointer to the generated security ID.
@@ -1501,8 +1511,7 @@ struct security_operations {
1501 int (*task_getioprio) (struct task_struct *p); 1511 int (*task_getioprio) (struct task_struct *p);
1502 int (*task_setrlimit) (struct task_struct *p, unsigned int resource, 1512 int (*task_setrlimit) (struct task_struct *p, unsigned int resource,
1503 struct rlimit *new_rlim); 1513 struct rlimit *new_rlim);
1504 int (*task_setscheduler) (struct task_struct *p, int policy, 1514 int (*task_setscheduler) (struct task_struct *p);
1505 struct sched_param *lp);
1506 int (*task_getscheduler) (struct task_struct *p); 1515 int (*task_getscheduler) (struct task_struct *p);
1507 int (*task_movememory) (struct task_struct *p); 1516 int (*task_movememory) (struct task_struct *p);
1508 int (*task_kill) (struct task_struct *p, 1517 int (*task_kill) (struct task_struct *p,
@@ -1594,6 +1603,9 @@ struct security_operations {
1594 struct request_sock *req); 1603 struct request_sock *req);
1595 void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req); 1604 void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req);
1596 void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb); 1605 void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb);
1606 int (*secmark_relabel_packet) (u32 secid);
1607 void (*secmark_refcount_inc) (void);
1608 void (*secmark_refcount_dec) (void);
1597 void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl); 1609 void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
1598 int (*tun_dev_create)(void); 1610 int (*tun_dev_create)(void);
1599 void (*tun_dev_post_create)(struct sock *sk); 1611 void (*tun_dev_post_create)(struct sock *sk);
@@ -1752,8 +1764,7 @@ int security_task_setioprio(struct task_struct *p, int ioprio);
1752int security_task_getioprio(struct task_struct *p); 1764int security_task_getioprio(struct task_struct *p);
1753int security_task_setrlimit(struct task_struct *p, unsigned int resource, 1765int security_task_setrlimit(struct task_struct *p, unsigned int resource,
1754 struct rlimit *new_rlim); 1766 struct rlimit *new_rlim);
1755int security_task_setscheduler(struct task_struct *p, 1767int security_task_setscheduler(struct task_struct *p);
1756 int policy, struct sched_param *lp);
1757int security_task_getscheduler(struct task_struct *p); 1768int security_task_getscheduler(struct task_struct *p);
1758int security_task_movememory(struct task_struct *p); 1769int security_task_movememory(struct task_struct *p);
1759int security_task_kill(struct task_struct *p, struct siginfo *info, 1770int security_task_kill(struct task_struct *p, struct siginfo *info,
@@ -2320,11 +2331,9 @@ static inline int security_task_setrlimit(struct task_struct *p,
2320 return 0; 2331 return 0;
2321} 2332}
2322 2333
2323static inline int security_task_setscheduler(struct task_struct *p, 2334static inline int security_task_setscheduler(struct task_struct *p)
2324 int policy,
2325 struct sched_param *lp)
2326{ 2335{
2327 return cap_task_setscheduler(p, policy, lp); 2336 return cap_task_setscheduler(p);
2328} 2337}
2329 2338
2330static inline int security_task_getscheduler(struct task_struct *p) 2339static inline int security_task_getscheduler(struct task_struct *p)
@@ -2551,6 +2560,9 @@ void security_inet_csk_clone(struct sock *newsk,
2551 const struct request_sock *req); 2560 const struct request_sock *req);
2552void security_inet_conn_established(struct sock *sk, 2561void security_inet_conn_established(struct sock *sk,
2553 struct sk_buff *skb); 2562 struct sk_buff *skb);
2563int security_secmark_relabel_packet(u32 secid);
2564void security_secmark_refcount_inc(void);
2565void security_secmark_refcount_dec(void);
2554int security_tun_dev_create(void); 2566int security_tun_dev_create(void);
2555void security_tun_dev_post_create(struct sock *sk); 2567void security_tun_dev_post_create(struct sock *sk);
2556int security_tun_dev_attach(struct sock *sk); 2568int security_tun_dev_attach(struct sock *sk);
@@ -2705,6 +2717,19 @@ static inline void security_inet_conn_established(struct sock *sk,
2705{ 2717{
2706} 2718}
2707 2719
2720static inline int security_secmark_relabel_packet(u32 secid)
2721{
2722 return 0;
2723}
2724
2725static inline void security_secmark_refcount_inc(void)
2726{
2727}
2728
2729static inline void security_secmark_refcount_dec(void)
2730{
2731}
2732
2708static inline int security_tun_dev_create(void) 2733static inline int security_tun_dev_create(void)
2709{ 2734{
2710 return 0; 2735 return 0;
diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index 82e0f26a129..44f45961269 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -21,74 +21,11 @@ struct kern_ipc_perm;
21#ifdef CONFIG_SECURITY_SELINUX 21#ifdef CONFIG_SECURITY_SELINUX
22 22
23/** 23/**
24 * selinux_string_to_sid - map a security context string to a security ID
25 * @str: the security context string to be mapped
26 * @sid: ID value returned via this.
27 *
28 * Returns 0 if successful, with the SID stored in sid. A value
29 * of zero for sid indicates no SID could be determined (but no error
30 * occurred).
31 */
32int selinux_string_to_sid(char *str, u32 *sid);
33
34/**
35 * selinux_secmark_relabel_packet_permission - secmark permission check
36 * @sid: SECMARK ID value to be applied to network packet
37 *
38 * Returns 0 if the current task is allowed to set the SECMARK label of
39 * packets with the supplied security ID. Note that it is implicit that
40 * the packet is always being relabeled from the default unlabeled value,
41 * and that the access control decision is made in the AVC.
42 */
43int selinux_secmark_relabel_packet_permission(u32 sid);
44
45/**
46 * selinux_secmark_refcount_inc - increments the secmark use counter
47 *
48 * SELinux keeps track of the current SECMARK targets in use so it knows
49 * when to apply SECMARK label access checks to network packets. This
50 * function incements this reference count to indicate that a new SECMARK
51 * target has been configured.
52 */
53void selinux_secmark_refcount_inc(void);
54
55/**
56 * selinux_secmark_refcount_dec - decrements the secmark use counter
57 *
58 * SELinux keeps track of the current SECMARK targets in use so it knows
59 * when to apply SECMARK label access checks to network packets. This
60 * function decements this reference count to indicate that one of the
61 * existing SECMARK targets has been removed/flushed.
62 */
63void selinux_secmark_refcount_dec(void);
64
65/**
66 * selinux_is_enabled - is SELinux enabled? 24 * selinux_is_enabled - is SELinux enabled?
67 */ 25 */
68bool selinux_is_enabled(void); 26bool selinux_is_enabled(void);
69#else 27#else
70 28
71static inline int selinux_string_to_sid(const char *str, u32 *sid)
72{
73 *sid = 0;
74 return 0;
75}
76
77static inline int selinux_secmark_relabel_packet_permission(u32 sid)
78{
79 return 0;
80}
81
82static inline void selinux_secmark_refcount_inc(void)
83{
84 return;
85}
86
87static inline void selinux_secmark_refcount_dec(void)
88{
89 return;
90}
91
92static inline bool selinux_is_enabled(void) 29static inline bool selinux_is_enabled(void)
93{ 30{
94 return false; 31 return false;
diff --git a/include/linux/types.h b/include/linux/types.h
index 01a082f56ef..357dbc19606 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -121,7 +121,15 @@ typedef __u64 u_int64_t;
121typedef __s64 int64_t; 121typedef __s64 int64_t;
122#endif 122#endif
123 123
124/* this is a special 64bit data type that is 8-byte aligned */ 124/*
125 * aligned_u64 should be used in defining kernel<->userspace ABIs to avoid
126 * common 32/64-bit compat problems.
127 * 64-bit values align to 4-byte boundaries on x86_32 (and possibly other
128 * architectures) and to 8-byte boundaries on 64-bit architetures. The new
129 * aligned_64 type enforces 8-byte alignment so that structs containing
130 * aligned_64 values have the same alignment on 32-bit and 64-bit architectures.
131 * No conversions are necessary between 32-bit user-space and a 64-bit kernel.
132 */
125#define aligned_u64 __u64 __attribute__((aligned(8))) 133#define aligned_u64 __u64 __attribute__((aligned(8)))
126#define aligned_be64 __be64 __attribute__((aligned(8))) 134#define aligned_be64 __be64 __attribute__((aligned(8)))
127#define aligned_le64 __le64 __attribute__((aligned(8))) 135#define aligned_le64 __le64 __attribute__((aligned(8)))
@@ -178,6 +186,11 @@ typedef __u64 __bitwise __be64;
178typedef __u16 __bitwise __sum16; 186typedef __u16 __bitwise __sum16;
179typedef __u32 __bitwise __wsum; 187typedef __u32 __bitwise __wsum;
180 188
189/* this is a special 64bit data type that is 8-byte aligned */
190#define __aligned_u64 __u64 __attribute__((aligned(8)))
191#define __aligned_be64 __be64 __attribute__((aligned(8)))
192#define __aligned_le64 __le64 __attribute__((aligned(8)))
193
181#ifdef __KERNEL__ 194#ifdef __KERNEL__
182typedef unsigned __bitwise__ gfp_t; 195typedef unsigned __bitwise__ gfp_t;
183typedef unsigned __bitwise__ fmode_t; 196typedef unsigned __bitwise__ fmode_t;
diff --git a/include/media/videobuf-dma-sg.h b/include/media/videobuf-dma-sg.h
index 97e07f46a0f..aa4ebb42a56 100644
--- a/include/media/videobuf-dma-sg.h
+++ b/include/media/videobuf-dma-sg.h
@@ -48,6 +48,7 @@ struct videobuf_dmabuf {
48 48
49 /* for userland buffer */ 49 /* for userland buffer */
50 int offset; 50 int offset;
51 size_t size;
51 struct page **pages; 52 struct page **pages;
52 53
53 /* for kernel buffers */ 54 /* for kernel buffers */
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 27a902d9b3a..30fce0128dd 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -161,12 +161,30 @@ static inline struct sk_buff *bt_skb_send_alloc(struct sock *sk, unsigned long l
161{ 161{
162 struct sk_buff *skb; 162 struct sk_buff *skb;
163 163
164 release_sock(sk);
164 if ((skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err))) { 165 if ((skb = sock_alloc_send_skb(sk, len + BT_SKB_RESERVE, nb, err))) {
165 skb_reserve(skb, BT_SKB_RESERVE); 166 skb_reserve(skb, BT_SKB_RESERVE);
166 bt_cb(skb)->incoming = 0; 167 bt_cb(skb)->incoming = 0;
167 } 168 }
169 lock_sock(sk);
170
171 if (!skb && *err)
172 return NULL;
173
174 *err = sock_error(sk);
175 if (*err)
176 goto out;
177
178 if (sk->sk_shutdown) {
179 *err = -ECONNRESET;
180 goto out;
181 }
168 182
169 return skb; 183 return skb;
184
185out:
186 kfree_skb(skb);
187 return NULL;
170} 188}
171 189
172int bt_err(__u16 code); 190int bt_err(__u16 code);