aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_nl.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/drbd/drbd_nl.c')
-rw-r--r--drivers/block/drbd/drbd_nl.c3318
1 files changed, 1969 insertions, 1349 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index c8dda4e8dfce..76bb3a684b86 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -29,159 +29,317 @@
29#include <linux/fs.h> 29#include <linux/fs.h>
30#include <linux/file.h> 30#include <linux/file.h>
31#include <linux/slab.h> 31#include <linux/slab.h>
32#include <linux/connector.h>
33#include <linux/blkpg.h> 32#include <linux/blkpg.h>
34#include <linux/cpumask.h> 33#include <linux/cpumask.h>
35#include "drbd_int.h" 34#include "drbd_int.h"
36#include "drbd_req.h" 35#include "drbd_req.h"
37#include "drbd_wrappers.h" 36#include "drbd_wrappers.h"
38#include <asm/unaligned.h> 37#include <asm/unaligned.h>
39#include <linux/drbd_tag_magic.h>
40#include <linux/drbd_limits.h> 38#include <linux/drbd_limits.h>
41#include <linux/compiler.h>
42#include <linux/kthread.h> 39#include <linux/kthread.h>
43 40
44static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int); 41#include <net/genetlink.h>
45static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *); 42
46static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *); 43/* .doit */
47 44// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
48/* see get_sb_bdev and bd_claim */ 45// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
46
47int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info);
48int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info);
49
50int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
51int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
52int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
53
54int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
55int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
56int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
57int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
58int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
59int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
60int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
61int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
62int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
63int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
64int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
65int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
66int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
67int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
68int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
69int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
70int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
71int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
72int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
73int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
74/* .dumpit */
75int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
76
77#include <linux/drbd_genl_api.h>
78#include "drbd_nla.h"
79#include <linux/genl_magic_func.h>
80
81/* used blkdev_get_by_path, to claim our meta data device(s) */
49static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; 82static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
50 83
51/* Generate the tag_list to struct functions */ 84/* Configuration is strictly serialized, because generic netlink message
52#define NL_PACKET(name, number, fields) \ 85 * processing is strictly serialized by the genl_lock().
53static int name ## _from_tags(struct drbd_conf *mdev, \ 86 * Which means we can use one static global drbd_config_context struct.
54 unsigned short *tags, struct name *arg) __attribute__ ((unused)); \ 87 */
55static int name ## _from_tags(struct drbd_conf *mdev, \ 88static struct drbd_config_context {
56 unsigned short *tags, struct name *arg) \ 89 /* assigned from drbd_genlmsghdr */
57{ \ 90 unsigned int minor;
58 int tag; \ 91 /* assigned from request attributes, if present */
59 int dlen; \ 92 unsigned int volume;
60 \ 93#define VOLUME_UNSPECIFIED (-1U)
61 while ((tag = get_unaligned(tags++)) != TT_END) { \ 94 /* pointer into the request skb,
62 dlen = get_unaligned(tags++); \ 95 * limited lifetime! */
63 switch (tag_number(tag)) { \ 96 char *resource_name;
64 fields \ 97 struct nlattr *my_addr;
65 default: \ 98 struct nlattr *peer_addr;
66 if (tag & T_MANDATORY) { \ 99
67 dev_err(DEV, "Unknown tag: %d\n", tag_number(tag)); \ 100 /* reply buffer */
68 return 0; \ 101 struct sk_buff *reply_skb;
69 } \ 102 /* pointer into reply buffer */
70 } \ 103 struct drbd_genlmsghdr *reply_dh;
71 tags = (unsigned short *)((char *)tags + dlen); \ 104 /* resolved from attributes, if possible */
72 } \ 105 struct drbd_conf *mdev;
73 return 1; \ 106 struct drbd_tconn *tconn;
74} 107} adm_ctx;
75#define NL_INTEGER(pn, pr, member) \ 108
76 case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \ 109static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
77 arg->member = get_unaligned((int *)(tags)); \ 110{
78 break; 111 genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
79#define NL_INT64(pn, pr, member) \ 112 if (genlmsg_reply(skb, info))
80 case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \ 113 printk(KERN_ERR "drbd: error sending genl reply\n");
81 arg->member = get_unaligned((u64 *)(tags)); \ 114}
115
116/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
117 * reason it could fail was no space in skb, and there are 4k available. */
118int drbd_msg_put_info(const char *info)
119{
120 struct sk_buff *skb = adm_ctx.reply_skb;
121 struct nlattr *nla;
122 int err = -EMSGSIZE;
123
124 if (!info || !info[0])
125 return 0;
126
127 nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
128 if (!nla)
129 return err;
130
131 err = nla_put_string(skb, T_info_text, info);
132 if (err) {
133 nla_nest_cancel(skb, nla);
134 return err;
135 } else
136 nla_nest_end(skb, nla);
137 return 0;
138}
139
140/* This would be a good candidate for a "pre_doit" hook,
141 * and per-family private info->pointers.
142 * But we need to stay compatible with older kernels.
143 * If it returns successfully, adm_ctx members are valid.
144 */
145#define DRBD_ADM_NEED_MINOR 1
146#define DRBD_ADM_NEED_RESOURCE 2
147#define DRBD_ADM_NEED_CONNECTION 4
148static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
149 unsigned flags)
150{
151 struct drbd_genlmsghdr *d_in = info->userhdr;
152 const u8 cmd = info->genlhdr->cmd;
153 int err;
154
155 memset(&adm_ctx, 0, sizeof(adm_ctx));
156
157 /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
158 if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
159 return -EPERM;
160
161 adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
162 if (!adm_ctx.reply_skb) {
163 err = -ENOMEM;
164 goto fail;
165 }
166
167 adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
168 info, &drbd_genl_family, 0, cmd);
169 /* put of a few bytes into a fresh skb of >= 4k will always succeed.
170 * but anyways */
171 if (!adm_ctx.reply_dh) {
172 err = -ENOMEM;
173 goto fail;
174 }
175
176 adm_ctx.reply_dh->minor = d_in->minor;
177 adm_ctx.reply_dh->ret_code = NO_ERROR;
178
179 adm_ctx.volume = VOLUME_UNSPECIFIED;
180 if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
181 struct nlattr *nla;
182 /* parse and validate only */
183 err = drbd_cfg_context_from_attrs(NULL, info);
184 if (err)
185 goto fail;
186
187 /* It was present, and valid,
188 * copy it over to the reply skb. */
189 err = nla_put_nohdr(adm_ctx.reply_skb,
190 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
191 info->attrs[DRBD_NLA_CFG_CONTEXT]);
192 if (err)
193 goto fail;
194
195 /* and assign stuff to the global adm_ctx */
196 nla = nested_attr_tb[__nla_type(T_ctx_volume)];
197 if (nla)
198 adm_ctx.volume = nla_get_u32(nla);
199 nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
200 if (nla)
201 adm_ctx.resource_name = nla_data(nla);
202 adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
203 adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
204 if ((adm_ctx.my_addr &&
205 nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.tconn->my_addr)) ||
206 (adm_ctx.peer_addr &&
207 nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.tconn->peer_addr))) {
208 err = -EINVAL;
209 goto fail;
210 }
211 }
212
213 adm_ctx.minor = d_in->minor;
214 adm_ctx.mdev = minor_to_mdev(d_in->minor);
215 adm_ctx.tconn = conn_get_by_name(adm_ctx.resource_name);
216
217 if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) {
218 drbd_msg_put_info("unknown minor");
219 return ERR_MINOR_INVALID;
220 }
221 if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_RESOURCE)) {
222 drbd_msg_put_info("unknown resource");
223 return ERR_INVALID_REQUEST;
224 }
225
226 if (flags & DRBD_ADM_NEED_CONNECTION) {
227 if (adm_ctx.tconn && !(flags & DRBD_ADM_NEED_RESOURCE)) {
228 drbd_msg_put_info("no resource name expected");
229 return ERR_INVALID_REQUEST;
230 }
231 if (adm_ctx.mdev) {
232 drbd_msg_put_info("no minor number expected");
233 return ERR_INVALID_REQUEST;
234 }
235 if (adm_ctx.my_addr && adm_ctx.peer_addr)
236 adm_ctx.tconn = conn_get_by_addrs(nla_data(adm_ctx.my_addr),
237 nla_len(adm_ctx.my_addr),
238 nla_data(adm_ctx.peer_addr),
239 nla_len(adm_ctx.peer_addr));
240 if (!adm_ctx.tconn) {
241 drbd_msg_put_info("unknown connection");
242 return ERR_INVALID_REQUEST;
243 }
244 }
245
246 /* some more paranoia, if the request was over-determined */
247 if (adm_ctx.mdev && adm_ctx.tconn &&
248 adm_ctx.mdev->tconn != adm_ctx.tconn) {
249 pr_warning("request: minor=%u, resource=%s; but that minor belongs to connection %s\n",
250 adm_ctx.minor, adm_ctx.resource_name,
251 adm_ctx.mdev->tconn->name);
252 drbd_msg_put_info("minor exists in different resource");
253 return ERR_INVALID_REQUEST;
254 }
255 if (adm_ctx.mdev &&
256 adm_ctx.volume != VOLUME_UNSPECIFIED &&
257 adm_ctx.volume != adm_ctx.mdev->vnr) {
258 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
259 adm_ctx.minor, adm_ctx.volume,
260 adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name);
261 drbd_msg_put_info("minor exists as different volume");
262 return ERR_INVALID_REQUEST;
263 }
264
265 return NO_ERROR;
266
267fail:
268 nlmsg_free(adm_ctx.reply_skb);
269 adm_ctx.reply_skb = NULL;
270 return err;
271}
272
273static int drbd_adm_finish(struct genl_info *info, int retcode)
274{
275 if (adm_ctx.tconn) {
276 kref_put(&adm_ctx.tconn->kref, &conn_destroy);
277 adm_ctx.tconn = NULL;
278 }
279
280 if (!adm_ctx.reply_skb)
281 return -ENOMEM;
282
283 adm_ctx.reply_dh->ret_code = retcode;
284 drbd_adm_send_reply(adm_ctx.reply_skb, info);
285 return 0;
286}
287
288static void setup_khelper_env(struct drbd_tconn *tconn, char **envp)
289{
290 char *afs;
291
292 /* FIXME: A future version will not allow this case. */
293 if (tconn->my_addr_len == 0 || tconn->peer_addr_len == 0)
294 return;
295
296 switch (((struct sockaddr *)&tconn->peer_addr)->sa_family) {
297 case AF_INET6:
298 afs = "ipv6";
299 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
300 &((struct sockaddr_in6 *)&tconn->peer_addr)->sin6_addr);
82 break; 301 break;
83#define NL_BIT(pn, pr, member) \ 302 case AF_INET:
84 case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \ 303 afs = "ipv4";
85 arg->member = *(char *)(tags) ? 1 : 0; \ 304 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
305 &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr);
86 break; 306 break;
87#define NL_STRING(pn, pr, member, len) \ 307 default:
88 case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \ 308 afs = "ssocks";
89 if (dlen > len) { \ 309 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
90 dev_err(DEV, "arg too long: %s (%u wanted, max len: %u bytes)\n", \ 310 &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr);
91 #member, dlen, (unsigned int)len); \ 311 }
92 return 0; \ 312 snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
93 } \ 313}
94 arg->member ## _len = dlen; \
95 memcpy(arg->member, tags, min_t(size_t, dlen, len)); \
96 break;
97#include <linux/drbd_nl.h>
98
99/* Generate the struct to tag_list functions */
100#define NL_PACKET(name, number, fields) \
101static unsigned short* \
102name ## _to_tags(struct drbd_conf *mdev, \
103 struct name *arg, unsigned short *tags) __attribute__ ((unused)); \
104static unsigned short* \
105name ## _to_tags(struct drbd_conf *mdev, \
106 struct name *arg, unsigned short *tags) \
107{ \
108 fields \
109 return tags; \
110}
111
112#define NL_INTEGER(pn, pr, member) \
113 put_unaligned(pn | pr | TT_INTEGER, tags++); \
114 put_unaligned(sizeof(int), tags++); \
115 put_unaligned(arg->member, (int *)tags); \
116 tags = (unsigned short *)((char *)tags+sizeof(int));
117#define NL_INT64(pn, pr, member) \
118 put_unaligned(pn | pr | TT_INT64, tags++); \
119 put_unaligned(sizeof(u64), tags++); \
120 put_unaligned(arg->member, (u64 *)tags); \
121 tags = (unsigned short *)((char *)tags+sizeof(u64));
122#define NL_BIT(pn, pr, member) \
123 put_unaligned(pn | pr | TT_BIT, tags++); \
124 put_unaligned(sizeof(char), tags++); \
125 *(char *)tags = arg->member; \
126 tags = (unsigned short *)((char *)tags+sizeof(char));
127#define NL_STRING(pn, pr, member, len) \
128 put_unaligned(pn | pr | TT_STRING, tags++); \
129 put_unaligned(arg->member ## _len, tags++); \
130 memcpy(tags, arg->member, arg->member ## _len); \
131 tags = (unsigned short *)((char *)tags + arg->member ## _len);
132#include <linux/drbd_nl.h>
133
134void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name);
135void drbd_nl_send_reply(struct cn_msg *, int);
136 314
137int drbd_khelper(struct drbd_conf *mdev, char *cmd) 315int drbd_khelper(struct drbd_conf *mdev, char *cmd)
138{ 316{
139 char *envp[] = { "HOME=/", 317 char *envp[] = { "HOME=/",
140 "TERM=linux", 318 "TERM=linux",
141 "PATH=/sbin:/usr/sbin:/bin:/usr/bin", 319 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
142 NULL, /* Will be set to address family */ 320 (char[20]) { }, /* address family */
143 NULL, /* Will be set to address */ 321 (char[60]) { }, /* address */
144 NULL }; 322 NULL };
145 323 char mb[12];
146 char mb[12], af[20], ad[60], *afs;
147 char *argv[] = {usermode_helper, cmd, mb, NULL }; 324 char *argv[] = {usermode_helper, cmd, mb, NULL };
325 struct drbd_tconn *tconn = mdev->tconn;
326 struct sib_info sib;
148 int ret; 327 int ret;
149 328
150 if (current == mdev->worker.task) 329 if (current == tconn->worker.task)
151 drbd_set_flag(mdev, CALLBACK_PENDING); 330 set_bit(CALLBACK_PENDING, &tconn->flags);
152 331
153 snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev)); 332 snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
154 333 setup_khelper_env(tconn, envp);
155 if (get_net_conf(mdev)) {
156 switch (((struct sockaddr *)mdev->net_conf->peer_addr)->sa_family) {
157 case AF_INET6:
158 afs = "ipv6";
159 snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6",
160 &((struct sockaddr_in6 *)mdev->net_conf->peer_addr)->sin6_addr);
161 break;
162 case AF_INET:
163 afs = "ipv4";
164 snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
165 &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
166 break;
167 default:
168 afs = "ssocks";
169 snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
170 &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
171 }
172 snprintf(af, 20, "DRBD_PEER_AF=%s", afs);
173 envp[3]=af;
174 envp[4]=ad;
175 put_net_conf(mdev);
176 }
177 334
178 /* The helper may take some time. 335 /* The helper may take some time.
179 * write out any unsynced meta data changes now */ 336 * write out any unsynced meta data changes now */
180 drbd_md_sync(mdev); 337 drbd_md_sync(mdev);
181 338
182 dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); 339 dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
183 340 sib.sib_reason = SIB_HELPER_PRE;
184 drbd_bcast_ev_helper(mdev, cmd); 341 sib.helper_name = cmd;
342 drbd_bcast_event(mdev, &sib);
185 ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC); 343 ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
186 if (ret) 344 if (ret)
187 dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", 345 dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
@@ -191,9 +349,46 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
191 dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n", 349 dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
192 usermode_helper, cmd, mb, 350 usermode_helper, cmd, mb,
193 (ret >> 8) & 0xff, ret); 351 (ret >> 8) & 0xff, ret);
352 sib.sib_reason = SIB_HELPER_POST;
353 sib.helper_exit_code = ret;
354 drbd_bcast_event(mdev, &sib);
355
356 if (current == tconn->worker.task)
357 clear_bit(CALLBACK_PENDING, &tconn->flags);
358
359 if (ret < 0) /* Ignore any ERRNOs we got. */
360 ret = 0;
361
362 return ret;
363}
364
365int conn_khelper(struct drbd_tconn *tconn, char *cmd)
366{
367 char *envp[] = { "HOME=/",
368 "TERM=linux",
369 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
370 (char[20]) { }, /* address family */
371 (char[60]) { }, /* address */
372 NULL };
373 char *argv[] = {usermode_helper, cmd, tconn->name, NULL };
374 int ret;
375
376 setup_khelper_env(tconn, envp);
377 conn_md_sync(tconn);
194 378
195 if (current == mdev->worker.task) 379 conn_info(tconn, "helper command: %s %s %s\n", usermode_helper, cmd, tconn->name);
196 drbd_clear_flag(mdev, CALLBACK_PENDING); 380 /* TODO: conn_bcast_event() ?? */
381
382 ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
383 if (ret)
384 conn_warn(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
385 usermode_helper, cmd, tconn->name,
386 (ret >> 8) & 0xff, ret);
387 else
388 conn_info(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
389 usermode_helper, cmd, tconn->name,
390 (ret >> 8) & 0xff, ret);
391 /* TODO: conn_bcast_event() ?? */
197 392
198 if (ret < 0) /* Ignore any ERRNOs we got. */ 393 if (ret < 0) /* Ignore any ERRNOs we got. */
199 ret = 0; 394 ret = 0;
@@ -201,116 +396,129 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
201 return ret; 396 return ret;
202} 397}
203 398
204enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev) 399static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
205{ 400{
401 enum drbd_fencing_p fp = FP_NOT_AVAIL;
402 struct drbd_conf *mdev;
403 int vnr;
404
405 rcu_read_lock();
406 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
407 if (get_ldev_if_state(mdev, D_CONSISTENT)) {
408 fp = max_t(enum drbd_fencing_p, fp,
409 rcu_dereference(mdev->ldev->disk_conf)->fencing);
410 put_ldev(mdev);
411 }
412 }
413 rcu_read_unlock();
414
415 return fp;
416}
417
418bool conn_try_outdate_peer(struct drbd_tconn *tconn)
419{
420 union drbd_state mask = { };
421 union drbd_state val = { };
422 enum drbd_fencing_p fp;
206 char *ex_to_string; 423 char *ex_to_string;
207 int r; 424 int r;
208 enum drbd_disk_state nps;
209 enum drbd_fencing_p fp;
210 425
211 D_ASSERT(mdev->state.pdsk == D_UNKNOWN); 426 if (tconn->cstate >= C_WF_REPORT_PARAMS) {
427 conn_err(tconn, "Expected cstate < C_WF_REPORT_PARAMS\n");
428 return false;
429 }
212 430
213 if (get_ldev_if_state(mdev, D_CONSISTENT)) { 431 fp = highest_fencing_policy(tconn);
214 fp = mdev->ldev->dc.fencing; 432 switch (fp) {
215 put_ldev(mdev); 433 case FP_NOT_AVAIL:
216 } else { 434 conn_warn(tconn, "Not fencing peer, I'm not even Consistent myself.\n");
217 dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
218 nps = mdev->state.pdsk;
219 goto out; 435 goto out;
436 case FP_DONT_CARE:
437 return true;
438 default: ;
220 } 439 }
221 440
222 r = drbd_khelper(mdev, "fence-peer"); 441 r = conn_khelper(tconn, "fence-peer");
223 442
224 switch ((r>>8) & 0xff) { 443 switch ((r>>8) & 0xff) {
225 case 3: /* peer is inconsistent */ 444 case 3: /* peer is inconsistent */
226 ex_to_string = "peer is inconsistent or worse"; 445 ex_to_string = "peer is inconsistent or worse";
227 nps = D_INCONSISTENT; 446 mask.pdsk = D_MASK;
447 val.pdsk = D_INCONSISTENT;
228 break; 448 break;
229 case 4: /* peer got outdated, or was already outdated */ 449 case 4: /* peer got outdated, or was already outdated */
230 ex_to_string = "peer was fenced"; 450 ex_to_string = "peer was fenced";
231 nps = D_OUTDATED; 451 mask.pdsk = D_MASK;
452 val.pdsk = D_OUTDATED;
232 break; 453 break;
233 case 5: /* peer was down */ 454 case 5: /* peer was down */
234 if (mdev->state.disk == D_UP_TO_DATE) { 455 if (conn_highest_disk(tconn) == D_UP_TO_DATE) {
235 /* we will(have) create(d) a new UUID anyways... */ 456 /* we will(have) create(d) a new UUID anyways... */
236 ex_to_string = "peer is unreachable, assumed to be dead"; 457 ex_to_string = "peer is unreachable, assumed to be dead";
237 nps = D_OUTDATED; 458 mask.pdsk = D_MASK;
459 val.pdsk = D_OUTDATED;
238 } else { 460 } else {
239 ex_to_string = "peer unreachable, doing nothing since disk != UpToDate"; 461 ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
240 nps = mdev->state.pdsk;
241 } 462 }
242 break; 463 break;
243 case 6: /* Peer is primary, voluntarily outdate myself. 464 case 6: /* Peer is primary, voluntarily outdate myself.
244 * This is useful when an unconnected R_SECONDARY is asked to 465 * This is useful when an unconnected R_SECONDARY is asked to
245 * become R_PRIMARY, but finds the other peer being active. */ 466 * become R_PRIMARY, but finds the other peer being active. */
246 ex_to_string = "peer is active"; 467 ex_to_string = "peer is active";
247 dev_warn(DEV, "Peer is primary, outdating myself.\n"); 468 conn_warn(tconn, "Peer is primary, outdating myself.\n");
248 nps = D_UNKNOWN; 469 mask.disk = D_MASK;
249 _drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE); 470 val.disk = D_OUTDATED;
250 break; 471 break;
251 case 7: 472 case 7:
252 if (fp != FP_STONITH) 473 if (fp != FP_STONITH)
253 dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n"); 474 conn_err(tconn, "fence-peer() = 7 && fencing != Stonith !!!\n");
254 ex_to_string = "peer was stonithed"; 475 ex_to_string = "peer was stonithed";
255 nps = D_OUTDATED; 476 mask.pdsk = D_MASK;
477 val.pdsk = D_OUTDATED;
256 break; 478 break;
257 default: 479 default:
258 /* The script is broken ... */ 480 /* The script is broken ... */
259 nps = D_UNKNOWN; 481 conn_err(tconn, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
260 dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff); 482 return false; /* Eventually leave IO frozen */
261 return nps;
262 } 483 }
263 484
264 dev_info(DEV, "fence-peer helper returned %d (%s)\n", 485 conn_info(tconn, "fence-peer helper returned %d (%s)\n",
265 (r>>8) & 0xff, ex_to_string); 486 (r>>8) & 0xff, ex_to_string);
266 487
267out: 488 out:
268 if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
269 /* The handler was not successful... unfreeze here, the
270 state engine can not unfreeze... */
271 _drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
272 }
273 489
274 return nps; 490 /* Not using
491 conn_request_state(tconn, mask, val, CS_VERBOSE);
492 here, because we might were able to re-establish the connection in the
493 meantime. */
494 spin_lock_irq(&tconn->req_lock);
495 if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags))
496 _conn_request_state(tconn, mask, val, CS_VERBOSE);
497 spin_unlock_irq(&tconn->req_lock);
498
499 return conn_highest_pdsk(tconn) <= D_OUTDATED;
275} 500}
276 501
277static int _try_outdate_peer_async(void *data) 502static int _try_outdate_peer_async(void *data)
278{ 503{
279 struct drbd_conf *mdev = (struct drbd_conf *)data; 504 struct drbd_tconn *tconn = (struct drbd_tconn *)data;
280 enum drbd_disk_state nps;
281 union drbd_state ns;
282 505
283 nps = drbd_try_outdate_peer(mdev); 506 conn_try_outdate_peer(tconn);
284
285 /* Not using
286 drbd_request_state(mdev, NS(pdsk, nps));
287 here, because we might were able to re-establish the connection
288 in the meantime. This can only partially be solved in the state's
289 engine is_valid_state() and is_valid_state_transition()
290 functions.
291
292 nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
293 pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
294 therefore we have to have the pre state change check here.
295 */
296 spin_lock_irq(&mdev->req_lock);
297 ns = mdev->state;
298 if (ns.conn < C_WF_REPORT_PARAMS && !drbd_test_flag(mdev, STATE_SENT)) {
299 ns.pdsk = nps;
300 _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
301 }
302 spin_unlock_irq(&mdev->req_lock);
303 507
508 kref_put(&tconn->kref, &conn_destroy);
304 return 0; 509 return 0;
305} 510}
306 511
307void drbd_try_outdate_peer_async(struct drbd_conf *mdev) 512void conn_try_outdate_peer_async(struct drbd_tconn *tconn)
308{ 513{
309 struct task_struct *opa; 514 struct task_struct *opa;
310 515
311 opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev)); 516 kref_get(&tconn->kref);
312 if (IS_ERR(opa)) 517 opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h");
313 dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n"); 518 if (IS_ERR(opa)) {
519 conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n");
520 kref_put(&tconn->kref, &conn_destroy);
521 }
314} 522}
315 523
316enum drbd_state_rv 524enum drbd_state_rv
@@ -318,15 +526,15 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
318{ 526{
319 const int max_tries = 4; 527 const int max_tries = 4;
320 enum drbd_state_rv rv = SS_UNKNOWN_ERROR; 528 enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
529 struct net_conf *nc;
321 int try = 0; 530 int try = 0;
322 int forced = 0; 531 int forced = 0;
323 union drbd_state mask, val; 532 union drbd_state mask, val;
324 enum drbd_disk_state nps;
325 533
326 if (new_role == R_PRIMARY) 534 if (new_role == R_PRIMARY)
327 request_ping(mdev); /* Detect a dead peer ASAP */ 535 request_ping(mdev->tconn); /* Detect a dead peer ASAP */
328 536
329 mutex_lock(&mdev->state_mutex); 537 mutex_lock(mdev->state_mutex);
330 538
331 mask.i = 0; mask.role = R_MASK; 539 mask.i = 0; mask.role = R_MASK;
332 val.i = 0; val.role = new_role; 540 val.i = 0; val.role = new_role;
@@ -354,38 +562,34 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
354 if (rv == SS_NO_UP_TO_DATE_DISK && 562 if (rv == SS_NO_UP_TO_DATE_DISK &&
355 mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) { 563 mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
356 D_ASSERT(mdev->state.pdsk == D_UNKNOWN); 564 D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
357 nps = drbd_try_outdate_peer(mdev);
358 565
359 if (nps == D_OUTDATED || nps == D_INCONSISTENT) { 566 if (conn_try_outdate_peer(mdev->tconn)) {
360 val.disk = D_UP_TO_DATE; 567 val.disk = D_UP_TO_DATE;
361 mask.disk = D_MASK; 568 mask.disk = D_MASK;
362 } 569 }
363
364 val.pdsk = nps;
365 mask.pdsk = D_MASK;
366
367 continue; 570 continue;
368 } 571 }
369 572
370 if (rv == SS_NOTHING_TO_DO) 573 if (rv == SS_NOTHING_TO_DO)
371 goto fail; 574 goto out;
372 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { 575 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
373 nps = drbd_try_outdate_peer(mdev); 576 if (!conn_try_outdate_peer(mdev->tconn) && force) {
374
375 if (force && nps > D_OUTDATED) {
376 dev_warn(DEV, "Forced into split brain situation!\n"); 577 dev_warn(DEV, "Forced into split brain situation!\n");
377 nps = D_OUTDATED; 578 mask.pdsk = D_MASK;
378 } 579 val.pdsk = D_OUTDATED;
379
380 mask.pdsk = D_MASK;
381 val.pdsk = nps;
382 580
581 }
383 continue; 582 continue;
384 } 583 }
385 if (rv == SS_TWO_PRIMARIES) { 584 if (rv == SS_TWO_PRIMARIES) {
386 /* Maybe the peer is detected as dead very soon... 585 /* Maybe the peer is detected as dead very soon...
387 retry at most once more in this case. */ 586 retry at most once more in this case. */
388 schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10); 587 int timeo;
588 rcu_read_lock();
589 nc = rcu_dereference(mdev->tconn->net_conf);
590 timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
591 rcu_read_unlock();
592 schedule_timeout_interruptible(timeo);
389 if (try < max_tries) 593 if (try < max_tries)
390 try = max_tries - 1; 594 try = max_tries - 1;
391 continue; 595 continue;
@@ -394,13 +598,13 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
394 rv = _drbd_request_state(mdev, mask, val, 598 rv = _drbd_request_state(mdev, mask, val,
395 CS_VERBOSE + CS_WAIT_COMPLETE); 599 CS_VERBOSE + CS_WAIT_COMPLETE);
396 if (rv < SS_SUCCESS) 600 if (rv < SS_SUCCESS)
397 goto fail; 601 goto out;
398 } 602 }
399 break; 603 break;
400 } 604 }
401 605
402 if (rv < SS_SUCCESS) 606 if (rv < SS_SUCCESS)
403 goto fail; 607 goto out;
404 608
405 if (forced) 609 if (forced)
406 dev_warn(DEV, "Forced to consider local data as UpToDate!\n"); 610 dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
@@ -408,6 +612,8 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
408 /* Wait until nothing is on the fly :) */ 612 /* Wait until nothing is on the fly :) */
409 wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0); 613 wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
410 614
615 /* FIXME also wait for all pending P_BARRIER_ACK? */
616
411 if (new_role == R_SECONDARY) { 617 if (new_role == R_SECONDARY) {
412 set_disk_ro(mdev->vdisk, true); 618 set_disk_ro(mdev->vdisk, true);
413 if (get_ldev(mdev)) { 619 if (get_ldev(mdev)) {
@@ -415,10 +621,12 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
415 put_ldev(mdev); 621 put_ldev(mdev);
416 } 622 }
417 } else { 623 } else {
418 if (get_net_conf(mdev)) { 624 mutex_lock(&mdev->tconn->conf_update);
419 mdev->net_conf->want_lose = 0; 625 nc = mdev->tconn->net_conf;
420 put_net_conf(mdev); 626 if (nc)
421 } 627 nc->discard_my_data = 0; /* without copy; single bit op is atomic */
628 mutex_unlock(&mdev->tconn->conf_update);
629
422 set_disk_ro(mdev->vdisk, false); 630 set_disk_ro(mdev->vdisk, false);
423 if (get_ldev(mdev)) { 631 if (get_ldev(mdev)) {
424 if (((mdev->state.conn < C_CONNECTED || 632 if (((mdev->state.conn < C_CONNECTED ||
@@ -444,67 +652,47 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
444 drbd_md_sync(mdev); 652 drbd_md_sync(mdev);
445 653
446 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); 654 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
447 fail: 655out:
448 mutex_unlock(&mdev->state_mutex); 656 mutex_unlock(mdev->state_mutex);
449 return rv; 657 return rv;
450} 658}
451 659
452static struct drbd_conf *ensure_mdev(int minor, int create) 660static const char *from_attrs_err_to_txt(int err)
453{ 661{
454 struct drbd_conf *mdev; 662 return err == -ENOMSG ? "required attribute missing" :
455 663 err == -EOPNOTSUPP ? "unknown mandatory attribute" :
456 if (minor >= minor_count) 664 err == -EEXIST ? "can not change invariant setting" :
457 return NULL; 665 "invalid attribute value";
458
459 mdev = minor_to_mdev(minor);
460
461 if (!mdev && create) {
462 struct gendisk *disk = NULL;
463 mdev = drbd_new_device(minor);
464
465 spin_lock_irq(&drbd_pp_lock);
466 if (minor_table[minor] == NULL) {
467 minor_table[minor] = mdev;
468 disk = mdev->vdisk;
469 mdev = NULL;
470 } /* else: we lost the race */
471 spin_unlock_irq(&drbd_pp_lock);
472
473 if (disk) /* we won the race above */
474 /* in case we ever add a drbd_delete_device(),
475 * don't forget the del_gendisk! */
476 add_disk(disk);
477 else /* we lost the race above */
478 drbd_free_mdev(mdev);
479
480 mdev = minor_to_mdev(minor);
481 }
482
483 return mdev;
484} 666}
485 667
486static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 668int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
487 struct drbd_nl_cfg_reply *reply)
488{ 669{
489 struct primary primary_args; 670 struct set_role_parms parms;
490 671 int err;
491 memset(&primary_args, 0, sizeof(struct primary)); 672 enum drbd_ret_code retcode;
492 if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) {
493 reply->ret_code = ERR_MANDATORY_TAG;
494 return 0;
495 }
496
497 reply->ret_code =
498 drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force);
499 673
500 return 0; 674 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
501} 675 if (!adm_ctx.reply_skb)
676 return retcode;
677 if (retcode != NO_ERROR)
678 goto out;
502 679
503static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 680 memset(&parms, 0, sizeof(parms));
504 struct drbd_nl_cfg_reply *reply) 681 if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
505{ 682 err = set_role_parms_from_attrs(&parms, info);
506 reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0); 683 if (err) {
684 retcode = ERR_MANDATORY_TAG;
685 drbd_msg_put_info(from_attrs_err_to_txt(err));
686 goto out;
687 }
688 }
507 689
690 if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
691 retcode = drbd_set_role(adm_ctx.mdev, R_PRIMARY, parms.assume_uptodate);
692 else
693 retcode = drbd_set_role(adm_ctx.mdev, R_SECONDARY, 0);
694out:
695 drbd_adm_finish(info, retcode);
508 return 0; 696 return 0;
509} 697}
510 698
@@ -514,7 +702,12 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
514 struct drbd_backing_dev *bdev) 702 struct drbd_backing_dev *bdev)
515{ 703{
516 sector_t md_size_sect = 0; 704 sector_t md_size_sect = 0;
517 switch (bdev->dc.meta_dev_idx) { 705 int meta_dev_idx;
706
707 rcu_read_lock();
708 meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
709
710 switch (meta_dev_idx) {
518 default: 711 default:
519 /* v07 style fixed size indexed meta data */ 712 /* v07 style fixed size indexed meta data */
520 bdev->md.md_size_sect = MD_RESERVED_SECT; 713 bdev->md.md_size_sect = MD_RESERVED_SECT;
@@ -533,7 +726,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
533 case DRBD_MD_INDEX_FLEX_INT: 726 case DRBD_MD_INDEX_FLEX_INT:
534 bdev->md.md_offset = drbd_md_ss__(mdev, bdev); 727 bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
535 /* al size is still fixed */ 728 /* al size is still fixed */
536 bdev->md.al_offset = -MD_AL_MAX_SIZE; 729 bdev->md.al_offset = -MD_AL_SECTORS;
537 /* we need (slightly less than) ~ this much bitmap sectors: */ 730 /* we need (slightly less than) ~ this much bitmap sectors: */
538 md_size_sect = drbd_get_capacity(bdev->backing_bdev); 731 md_size_sect = drbd_get_capacity(bdev->backing_bdev);
539 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT); 732 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
@@ -549,6 +742,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
549 bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET; 742 bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET;
550 break; 743 break;
551 } 744 }
745 rcu_read_unlock();
552} 746}
553 747
554/* input size is expected to be in KB */ 748/* input size is expected to be in KB */
@@ -581,17 +775,23 @@ char *ppsize(char *buf, unsigned long long size)
581 * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET: 775 * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
582 * peer may not initiate a resize. 776 * peer may not initiate a resize.
583 */ 777 */
778/* Note these are not to be confused with
779 * drbd_adm_suspend_io/drbd_adm_resume_io,
780 * which are (sub) state changes triggered by admin (drbdsetup),
781 * and can be long lived.
782 * This changes an mdev->flag, is triggered by drbd internals,
783 * and should be short-lived. */
584void drbd_suspend_io(struct drbd_conf *mdev) 784void drbd_suspend_io(struct drbd_conf *mdev)
585{ 785{
586 drbd_set_flag(mdev, SUSPEND_IO); 786 set_bit(SUSPEND_IO, &mdev->flags);
587 if (is_susp(mdev->state)) 787 if (drbd_suspended(mdev))
588 return; 788 return;
589 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); 789 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
590} 790}
591 791
592void drbd_resume_io(struct drbd_conf *mdev) 792void drbd_resume_io(struct drbd_conf *mdev)
593{ 793{
594 drbd_clear_flag(mdev, SUSPEND_IO); 794 clear_bit(SUSPEND_IO, &mdev->flags);
595 wake_up(&mdev->misc_wait); 795 wake_up(&mdev->misc_wait);
596} 796}
597 797
@@ -605,7 +805,7 @@ void drbd_resume_io(struct drbd_conf *mdev)
605enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local) 805enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
606{ 806{
607 sector_t prev_first_sect, prev_size; /* previous meta location */ 807 sector_t prev_first_sect, prev_size; /* previous meta location */
608 sector_t la_size; 808 sector_t la_size, u_size;
609 sector_t size; 809 sector_t size;
610 char ppb[10]; 810 char ppb[10];
611 811
@@ -633,7 +833,10 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
633 /* TODO: should only be some assert here, not (re)init... */ 833 /* TODO: should only be some assert here, not (re)init... */
634 drbd_md_set_sector_offsets(mdev, mdev->ldev); 834 drbd_md_set_sector_offsets(mdev, mdev->ldev);
635 835
636 size = drbd_new_dev_size(mdev, mdev->ldev, flags & DDSF_FORCED); 836 rcu_read_lock();
837 u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
838 rcu_read_unlock();
839 size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);
637 840
638 if (drbd_get_capacity(mdev->this_bdev) != size || 841 if (drbd_get_capacity(mdev->this_bdev) != size ||
639 drbd_bm_capacity(mdev) != size) { 842 drbd_bm_capacity(mdev) != size) {
@@ -696,12 +899,12 @@ out:
696} 899}
697 900
698sector_t 901sector_t
699drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space) 902drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
903 sector_t u_size, int assume_peer_has_space)
700{ 904{
701 sector_t p_size = mdev->p_size; /* partner's disk size. */ 905 sector_t p_size = mdev->p_size; /* partner's disk size. */
702 sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */ 906 sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
703 sector_t m_size; /* my size */ 907 sector_t m_size; /* my size */
704 sector_t u_size = bdev->dc.disk_size; /* size requested by user. */
705 sector_t size = 0; 908 sector_t size = 0;
706 909
707 m_size = drbd_get_max_capacity(bdev); 910 m_size = drbd_get_max_capacity(bdev);
@@ -750,24 +953,21 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int ass
750 * failed, and 0 on success. You should call drbd_md_sync() after you called 953 * failed, and 0 on success. You should call drbd_md_sync() after you called
751 * this function. 954 * this function.
752 */ 955 */
753static int drbd_check_al_size(struct drbd_conf *mdev) 956static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc)
754{ 957{
755 struct lru_cache *n, *t; 958 struct lru_cache *n, *t;
756 struct lc_element *e; 959 struct lc_element *e;
757 unsigned int in_use; 960 unsigned int in_use;
758 int i; 961 int i;
759 962
760 ERR_IF(mdev->sync_conf.al_extents < 7)
761 mdev->sync_conf.al_extents = 127;
762
763 if (mdev->act_log && 963 if (mdev->act_log &&
764 mdev->act_log->nr_elements == mdev->sync_conf.al_extents) 964 mdev->act_log->nr_elements == dc->al_extents)
765 return 0; 965 return 0;
766 966
767 in_use = 0; 967 in_use = 0;
768 t = mdev->act_log; 968 t = mdev->act_log;
769 n = lc_create("act_log", drbd_al_ext_cache, 969 n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
770 mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); 970 dc->al_extents, sizeof(struct lc_element), 0);
771 971
772 if (n == NULL) { 972 if (n == NULL) {
773 dev_err(DEV, "Cannot allocate act_log lru!\n"); 973 dev_err(DEV, "Cannot allocate act_log lru!\n");
@@ -808,7 +1008,9 @@ static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_
808 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; 1008 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
809 1009
810 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); 1010 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
811 max_segments = mdev->ldev->dc.max_bio_bvecs; 1011 rcu_read_lock();
1012 max_segments = rcu_dereference(mdev->ldev->disk_conf)->max_bio_bvecs;
1013 rcu_read_unlock();
812 put_ldev(mdev); 1014 put_ldev(mdev);
813 } 1015 }
814 1016
@@ -852,12 +1054,14 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
852 Because new from 8.3.8 onwards the peer can use multiple 1054 Because new from 8.3.8 onwards the peer can use multiple
853 BIOs for a single peer_request */ 1055 BIOs for a single peer_request */
854 if (mdev->state.conn >= C_CONNECTED) { 1056 if (mdev->state.conn >= C_CONNECTED) {
855 if (mdev->agreed_pro_version < 94) { 1057 if (mdev->tconn->agreed_pro_version < 94)
856 peer = min(mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET); 1058 peer = min( mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
857 /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */ 1059 /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
858 } else if (mdev->agreed_pro_version == 94) 1060 else if (mdev->tconn->agreed_pro_version == 94)
859 peer = DRBD_MAX_SIZE_H80_PACKET; 1061 peer = DRBD_MAX_SIZE_H80_PACKET;
860 else /* drbd 8.3.8 onwards */ 1062 else if (mdev->tconn->agreed_pro_version < 100)
1063 peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */
1064 else
861 peer = DRBD_MAX_BIO_SIZE; 1065 peer = DRBD_MAX_BIO_SIZE;
862 } 1066 }
863 1067
@@ -872,36 +1076,27 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
872 drbd_setup_queue_param(mdev, new); 1076 drbd_setup_queue_param(mdev, new);
873} 1077}
874 1078
875/* serialize deconfig (worker exiting, doing cleanup) 1079/* Starts the worker thread */
876 * and reconfig (drbdsetup disk, drbdsetup net) 1080static void conn_reconfig_start(struct drbd_tconn *tconn)
877 *
878 * Wait for a potentially exiting worker, then restart it,
879 * or start a new one. Flush any pending work, there may still be an
880 * after_state_change queued.
881 */
882static void drbd_reconfig_start(struct drbd_conf *mdev)
883{ 1081{
884 wait_event(mdev->state_wait, !drbd_test_and_set_flag(mdev, CONFIG_PENDING)); 1082 drbd_thread_start(&tconn->worker);
885 wait_event(mdev->state_wait, !drbd_test_flag(mdev, DEVICE_DYING)); 1083 conn_flush_workqueue(tconn);
886 drbd_thread_start(&mdev->worker);
887 drbd_flush_workqueue(mdev);
888} 1084}
889 1085
890/* if still unconfigured, stops worker again. 1086/* if still unconfigured, stops worker again. */
891 * if configured now, clears CONFIG_PENDING. 1087static void conn_reconfig_done(struct drbd_tconn *tconn)
892 * wakes potential waiters */
893static void drbd_reconfig_done(struct drbd_conf *mdev)
894{ 1088{
895 spin_lock_irq(&mdev->req_lock); 1089 bool stop_threads;
896 if (mdev->state.disk == D_DISKLESS && 1090 spin_lock_irq(&tconn->req_lock);
897 mdev->state.conn == C_STANDALONE && 1091 stop_threads = conn_all_vols_unconf(tconn) &&
898 mdev->state.role == R_SECONDARY) { 1092 tconn->cstate == C_STANDALONE;
899 drbd_set_flag(mdev, DEVICE_DYING); 1093 spin_unlock_irq(&tconn->req_lock);
900 drbd_thread_stop_nowait(&mdev->worker); 1094 if (stop_threads) {
901 } else 1095 /* asender is implicitly stopped by receiver
902 drbd_clear_flag(mdev, CONFIG_PENDING); 1096 * in conn_disconnect() */
903 spin_unlock_irq(&mdev->req_lock); 1097 drbd_thread_stop(&tconn->receiver);
904 wake_up(&mdev->state_wait); 1098 drbd_thread_stop(&tconn->worker);
1099 }
905} 1100}
906 1101
907/* Make sure IO is suspended before calling this function(). */ 1102/* Make sure IO is suspended before calling this function(). */
@@ -909,42 +1104,182 @@ static void drbd_suspend_al(struct drbd_conf *mdev)
909{ 1104{
910 int s = 0; 1105 int s = 0;
911 1106
912 if (lc_try_lock(mdev->act_log)) { 1107 if (!lc_try_lock(mdev->act_log)) {
913 drbd_al_shrink(mdev);
914 lc_unlock(mdev->act_log);
915 } else {
916 dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n"); 1108 dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
917 return; 1109 return;
918 } 1110 }
919 1111
920 spin_lock_irq(&mdev->req_lock); 1112 drbd_al_shrink(mdev);
1113 spin_lock_irq(&mdev->tconn->req_lock);
921 if (mdev->state.conn < C_CONNECTED) 1114 if (mdev->state.conn < C_CONNECTED)
922 s = !drbd_test_and_set_flag(mdev, AL_SUSPENDED); 1115 s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
923 1116 spin_unlock_irq(&mdev->tconn->req_lock);
924 spin_unlock_irq(&mdev->req_lock); 1117 lc_unlock(mdev->act_log);
925 1118
926 if (s) 1119 if (s)
927 dev_info(DEV, "Suspended AL updates\n"); 1120 dev_info(DEV, "Suspended AL updates\n");
928} 1121}
929 1122
930/* does always return 0; 1123
931 * interesting return code is in reply->ret_code */ 1124static bool should_set_defaults(struct genl_info *info)
932static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1125{
933 struct drbd_nl_cfg_reply *reply) 1126 unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1127 return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1128}
1129
1130static void enforce_disk_conf_limits(struct disk_conf *dc)
1131{
1132 if (dc->al_extents < DRBD_AL_EXTENTS_MIN)
1133 dc->al_extents = DRBD_AL_EXTENTS_MIN;
1134 if (dc->al_extents > DRBD_AL_EXTENTS_MAX)
1135 dc->al_extents = DRBD_AL_EXTENTS_MAX;
1136
1137 if (dc->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1138 dc->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1139}
1140
1141int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
934{ 1142{
935 enum drbd_ret_code retcode; 1143 enum drbd_ret_code retcode;
1144 struct drbd_conf *mdev;
1145 struct disk_conf *new_disk_conf, *old_disk_conf;
1146 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1147 int err, fifo_size;
1148
1149 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1150 if (!adm_ctx.reply_skb)
1151 return retcode;
1152 if (retcode != NO_ERROR)
1153 goto out;
1154
1155 mdev = adm_ctx.mdev;
1156
1157 /* we also need a disk
1158 * to change the options on */
1159 if (!get_ldev(mdev)) {
1160 retcode = ERR_NO_DISK;
1161 goto out;
1162 }
1163
1164 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1165 if (!new_disk_conf) {
1166 retcode = ERR_NOMEM;
1167 goto fail;
1168 }
1169
1170 mutex_lock(&mdev->tconn->conf_update);
1171 old_disk_conf = mdev->ldev->disk_conf;
1172 *new_disk_conf = *old_disk_conf;
1173 if (should_set_defaults(info))
1174 set_disk_conf_defaults(new_disk_conf);
1175
1176 err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1177 if (err && err != -ENOMSG) {
1178 retcode = ERR_MANDATORY_TAG;
1179 drbd_msg_put_info(from_attrs_err_to_txt(err));
1180 }
1181
1182 if (!expect(new_disk_conf->resync_rate >= 1))
1183 new_disk_conf->resync_rate = 1;
1184
1185 enforce_disk_conf_limits(new_disk_conf);
1186
1187 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1188 if (fifo_size != mdev->rs_plan_s->size) {
1189 new_plan = fifo_alloc(fifo_size);
1190 if (!new_plan) {
1191 dev_err(DEV, "kmalloc of fifo_buffer failed");
1192 retcode = ERR_NOMEM;
1193 goto fail_unlock;
1194 }
1195 }
1196
1197 drbd_suspend_io(mdev);
1198 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
1199 drbd_al_shrink(mdev);
1200 err = drbd_check_al_size(mdev, new_disk_conf);
1201 lc_unlock(mdev->act_log);
1202 wake_up(&mdev->al_wait);
1203 drbd_resume_io(mdev);
1204
1205 if (err) {
1206 retcode = ERR_NOMEM;
1207 goto fail_unlock;
1208 }
1209
1210 write_lock_irq(&global_state_lock);
1211 retcode = drbd_resync_after_valid(mdev, new_disk_conf->resync_after);
1212 if (retcode == NO_ERROR) {
1213 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
1214 drbd_resync_after_changed(mdev);
1215 }
1216 write_unlock_irq(&global_state_lock);
1217
1218 if (retcode != NO_ERROR)
1219 goto fail_unlock;
1220
1221 if (new_plan) {
1222 old_plan = mdev->rs_plan_s;
1223 rcu_assign_pointer(mdev->rs_plan_s, new_plan);
1224 }
1225
1226 mutex_unlock(&mdev->tconn->conf_update);
1227
1228 if (new_disk_conf->al_updates)
1229 mdev->ldev->md.flags &= ~MDF_AL_DISABLED;
1230 else
1231 mdev->ldev->md.flags |= MDF_AL_DISABLED;
1232
1233 drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush);
1234
1235 drbd_md_sync(mdev);
1236
1237 if (mdev->state.conn >= C_CONNECTED)
1238 drbd_send_sync_param(mdev);
1239
1240 synchronize_rcu();
1241 kfree(old_disk_conf);
1242 kfree(old_plan);
1243 mod_timer(&mdev->request_timer, jiffies + HZ);
1244 goto success;
1245
1246fail_unlock:
1247 mutex_unlock(&mdev->tconn->conf_update);
1248 fail:
1249 kfree(new_disk_conf);
1250 kfree(new_plan);
1251success:
1252 put_ldev(mdev);
1253 out:
1254 drbd_adm_finish(info, retcode);
1255 return 0;
1256}
1257
1258int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1259{
1260 struct drbd_conf *mdev;
1261 int err;
1262 enum drbd_ret_code retcode;
936 enum determine_dev_size dd; 1263 enum determine_dev_size dd;
937 sector_t max_possible_sectors; 1264 sector_t max_possible_sectors;
938 sector_t min_md_device_sectors; 1265 sector_t min_md_device_sectors;
939 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */ 1266 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1267 struct disk_conf *new_disk_conf = NULL;
940 struct block_device *bdev; 1268 struct block_device *bdev;
941 struct lru_cache *resync_lru = NULL; 1269 struct lru_cache *resync_lru = NULL;
1270 struct fifo_buffer *new_plan = NULL;
942 union drbd_state ns, os; 1271 union drbd_state ns, os;
943 enum drbd_state_rv rv; 1272 enum drbd_state_rv rv;
944 int cp_discovered = 0; 1273 struct net_conf *nc;
945 int logical_block_size;
946 1274
947 drbd_reconfig_start(mdev); 1275 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1276 if (!adm_ctx.reply_skb)
1277 return retcode;
1278 if (retcode != NO_ERROR)
1279 goto finish;
1280
1281 mdev = adm_ctx.mdev;
1282 conn_reconfig_start(mdev->tconn);
948 1283
949 /* if you want to reconfigure, please tear down first */ 1284 /* if you want to reconfigure, please tear down first */
950 if (mdev->state.disk > D_DISKLESS) { 1285 if (mdev->state.disk > D_DISKLESS) {
@@ -958,52 +1293,66 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
958 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); 1293 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
959 1294
960 /* make sure there is no leftover from previous force-detach attempts */ 1295 /* make sure there is no leftover from previous force-detach attempts */
961 drbd_clear_flag(mdev, FORCE_DETACH); 1296 clear_bit(FORCE_DETACH, &mdev->flags);
962 drbd_clear_flag(mdev, WAS_IO_ERROR); 1297 clear_bit(WAS_IO_ERROR, &mdev->flags);
963 drbd_clear_flag(mdev, WAS_READ_ERROR); 1298 clear_bit(WAS_READ_ERROR, &mdev->flags);
964 1299
965 /* and no leftover from previously aborted resync or verify, either */ 1300 /* and no leftover from previously aborted resync or verify, either */
966 mdev->rs_total = 0; 1301 mdev->rs_total = 0;
967 mdev->rs_failed = 0; 1302 mdev->rs_failed = 0;
968 atomic_set(&mdev->rs_pending_cnt, 0); 1303 atomic_set(&mdev->rs_pending_cnt, 0);
969 1304
970 /* allocation not in the IO path, cqueue thread context */ 1305 /* allocation not in the IO path, drbdsetup context */
971 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL); 1306 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
972 if (!nbc) { 1307 if (!nbc) {
973 retcode = ERR_NOMEM; 1308 retcode = ERR_NOMEM;
974 goto fail; 1309 goto fail;
975 } 1310 }
976
977 nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF;
978 nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF;
979 nbc->dc.fencing = DRBD_FENCING_DEF;
980 nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF;
981
982 spin_lock_init(&nbc->md.uuid_lock); 1311 spin_lock_init(&nbc->md.uuid_lock);
983 1312
984 if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) { 1313 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1314 if (!new_disk_conf) {
1315 retcode = ERR_NOMEM;
1316 goto fail;
1317 }
1318 nbc->disk_conf = new_disk_conf;
1319
1320 set_disk_conf_defaults(new_disk_conf);
1321 err = disk_conf_from_attrs(new_disk_conf, info);
1322 if (err) {
985 retcode = ERR_MANDATORY_TAG; 1323 retcode = ERR_MANDATORY_TAG;
1324 drbd_msg_put_info(from_attrs_err_to_txt(err));
986 goto fail; 1325 goto fail;
987 } 1326 }
988 1327
989 if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) { 1328 enforce_disk_conf_limits(new_disk_conf);
1329
1330 new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1331 if (!new_plan) {
1332 retcode = ERR_NOMEM;
1333 goto fail;
1334 }
1335
1336 if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
990 retcode = ERR_MD_IDX_INVALID; 1337 retcode = ERR_MD_IDX_INVALID;
991 goto fail; 1338 goto fail;
992 } 1339 }
993 1340
994 if (get_net_conf(mdev)) { 1341 rcu_read_lock();
995 int prot = mdev->net_conf->wire_protocol; 1342 nc = rcu_dereference(mdev->tconn->net_conf);
996 put_net_conf(mdev); 1343 if (nc) {
997 if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) { 1344 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1345 rcu_read_unlock();
998 retcode = ERR_STONITH_AND_PROT_A; 1346 retcode = ERR_STONITH_AND_PROT_A;
999 goto fail; 1347 goto fail;
1000 } 1348 }
1001 } 1349 }
1350 rcu_read_unlock();
1002 1351
1003 bdev = blkdev_get_by_path(nbc->dc.backing_dev, 1352 bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
1004 FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev); 1353 FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
1005 if (IS_ERR(bdev)) { 1354 if (IS_ERR(bdev)) {
1006 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, 1355 dev_err(DEV, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
1007 PTR_ERR(bdev)); 1356 PTR_ERR(bdev));
1008 retcode = ERR_OPEN_DISK; 1357 retcode = ERR_OPEN_DISK;
1009 goto fail; 1358 goto fail;
@@ -1018,12 +1367,12 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1018 * should check it for you already; but if you don't, or 1367 * should check it for you already; but if you don't, or
1019 * someone fooled it, we need to double check here) 1368 * someone fooled it, we need to double check here)
1020 */ 1369 */
1021 bdev = blkdev_get_by_path(nbc->dc.meta_dev, 1370 bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
1022 FMODE_READ | FMODE_WRITE | FMODE_EXCL, 1371 FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1023 (nbc->dc.meta_dev_idx < 0) ? 1372 (new_disk_conf->meta_dev_idx < 0) ?
1024 (void *)mdev : (void *)drbd_m_holder); 1373 (void *)mdev : (void *)drbd_m_holder);
1025 if (IS_ERR(bdev)) { 1374 if (IS_ERR(bdev)) {
1026 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev, 1375 dev_err(DEV, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
1027 PTR_ERR(bdev)); 1376 PTR_ERR(bdev));
1028 retcode = ERR_OPEN_MD_DISK; 1377 retcode = ERR_OPEN_MD_DISK;
1029 goto fail; 1378 goto fail;
@@ -1031,14 +1380,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1031 nbc->md_bdev = bdev; 1380 nbc->md_bdev = bdev;
1032 1381
1033 if ((nbc->backing_bdev == nbc->md_bdev) != 1382 if ((nbc->backing_bdev == nbc->md_bdev) !=
1034 (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL || 1383 (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1035 nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) { 1384 new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1036 retcode = ERR_MD_IDX_INVALID; 1385 retcode = ERR_MD_IDX_INVALID;
1037 goto fail; 1386 goto fail;
1038 } 1387 }
1039 1388
1040 resync_lru = lc_create("resync", drbd_bm_ext_cache, 1389 resync_lru = lc_create("resync", drbd_bm_ext_cache,
1041 61, sizeof(struct bm_extent), 1390 1, 61, sizeof(struct bm_extent),
1042 offsetof(struct bm_extent, lce)); 1391 offsetof(struct bm_extent, lce));
1043 if (!resync_lru) { 1392 if (!resync_lru) {
1044 retcode = ERR_NOMEM; 1393 retcode = ERR_NOMEM;
@@ -1048,21 +1397,21 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1048 /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */ 1397 /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
1049 drbd_md_set_sector_offsets(mdev, nbc); 1398 drbd_md_set_sector_offsets(mdev, nbc);
1050 1399
1051 if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) { 1400 if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1052 dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", 1401 dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
1053 (unsigned long long) drbd_get_max_capacity(nbc), 1402 (unsigned long long) drbd_get_max_capacity(nbc),
1054 (unsigned long long) nbc->dc.disk_size); 1403 (unsigned long long) new_disk_conf->disk_size);
1055 retcode = ERR_DISK_TOO_SMALL; 1404 retcode = ERR_DISK_TOO_SMALL;
1056 goto fail; 1405 goto fail;
1057 } 1406 }
1058 1407
1059 if (nbc->dc.meta_dev_idx < 0) { 1408 if (new_disk_conf->meta_dev_idx < 0) {
1060 max_possible_sectors = DRBD_MAX_SECTORS_FLEX; 1409 max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1061 /* at least one MB, otherwise it does not make sense */ 1410 /* at least one MB, otherwise it does not make sense */
1062 min_md_device_sectors = (2<<10); 1411 min_md_device_sectors = (2<<10);
1063 } else { 1412 } else {
1064 max_possible_sectors = DRBD_MAX_SECTORS; 1413 max_possible_sectors = DRBD_MAX_SECTORS;
1065 min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1); 1414 min_md_device_sectors = MD_RESERVED_SECT * (new_disk_conf->meta_dev_idx + 1);
1066 } 1415 }
1067 1416
1068 if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { 1417 if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
@@ -1087,14 +1436,20 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1087 dev_warn(DEV, "==> truncating very big lower level device " 1436 dev_warn(DEV, "==> truncating very big lower level device "
1088 "to currently maximum possible %llu sectors <==\n", 1437 "to currently maximum possible %llu sectors <==\n",
1089 (unsigned long long) max_possible_sectors); 1438 (unsigned long long) max_possible_sectors);
1090 if (nbc->dc.meta_dev_idx >= 0) 1439 if (new_disk_conf->meta_dev_idx >= 0)
1091 dev_warn(DEV, "==>> using internal or flexible " 1440 dev_warn(DEV, "==>> using internal or flexible "
1092 "meta data may help <<==\n"); 1441 "meta data may help <<==\n");
1093 } 1442 }
1094 1443
1095 drbd_suspend_io(mdev); 1444 drbd_suspend_io(mdev);
1096 /* also wait for the last barrier ack. */ 1445 /* also wait for the last barrier ack. */
1097 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state)); 1446 /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1447 * We need a way to either ignore barrier acks for barriers sent before a device
1448 * was attached, or a way to wait for all pending barrier acks to come in.
1449 * As barriers are counted per resource,
1450 * we'd need to suspend io on all devices of a resource.
1451 */
1452 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || drbd_suspended(mdev));
1098 /* and for any other previously queued work */ 1453 /* and for any other previously queued work */
1099 drbd_flush_workqueue(mdev); 1454 drbd_flush_workqueue(mdev);
1100 1455
@@ -1109,25 +1464,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1109 1464
1110 drbd_md_set_sector_offsets(mdev, nbc); 1465 drbd_md_set_sector_offsets(mdev, nbc);
1111 1466
1112 /* allocate a second IO page if logical_block_size != 512 */
1113 logical_block_size = bdev_logical_block_size(nbc->md_bdev);
1114 if (logical_block_size == 0)
1115 logical_block_size = MD_SECTOR_SIZE;
1116
1117 if (logical_block_size != MD_SECTOR_SIZE) {
1118 if (!mdev->md_io_tmpp) {
1119 struct page *page = alloc_page(GFP_NOIO);
1120 if (!page)
1121 goto force_diskless_dec;
1122
1123 dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
1124 logical_block_size, MD_SECTOR_SIZE);
1125 dev_warn(DEV, "Workaround engaged (has performance impact).\n");
1126
1127 mdev->md_io_tmpp = page;
1128 }
1129 }
1130
1131 if (!mdev->bitmap) { 1467 if (!mdev->bitmap) {
1132 if (drbd_bm_init(mdev)) { 1468 if (drbd_bm_init(mdev)) {
1133 retcode = ERR_NOMEM; 1469 retcode = ERR_NOMEM;
@@ -1149,30 +1485,25 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1149 } 1485 }
1150 1486
1151 /* Since we are diskless, fix the activity log first... */ 1487 /* Since we are diskless, fix the activity log first... */
1152 if (drbd_check_al_size(mdev)) { 1488 if (drbd_check_al_size(mdev, new_disk_conf)) {
1153 retcode = ERR_NOMEM; 1489 retcode = ERR_NOMEM;
1154 goto force_diskless_dec; 1490 goto force_diskless_dec;
1155 } 1491 }
1156 1492
1157 /* Prevent shrinking of consistent devices ! */ 1493 /* Prevent shrinking of consistent devices ! */
1158 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && 1494 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1159 drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) { 1495 drbd_new_dev_size(mdev, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1160 dev_warn(DEV, "refusing to truncate a consistent device\n"); 1496 dev_warn(DEV, "refusing to truncate a consistent device\n");
1161 retcode = ERR_DISK_TOO_SMALL; 1497 retcode = ERR_DISK_TOO_SMALL;
1162 goto force_diskless_dec; 1498 goto force_diskless_dec;
1163 } 1499 }
1164 1500
1165 if (!drbd_al_read_log(mdev, nbc)) {
1166 retcode = ERR_IO_MD_DISK;
1167 goto force_diskless_dec;
1168 }
1169
1170 /* Reset the "barriers don't work" bits here, then force meta data to 1501 /* Reset the "barriers don't work" bits here, then force meta data to
1171 * be written, to ensure we determine if barriers are supported. */ 1502 * be written, to ensure we determine if barriers are supported. */
1172 if (nbc->dc.no_md_flush) 1503 if (new_disk_conf->md_flushes)
1173 drbd_set_flag(mdev, MD_NO_FUA); 1504 clear_bit(MD_NO_FUA, &mdev->flags);
1174 else 1505 else
1175 drbd_clear_flag(mdev, MD_NO_FUA); 1506 set_bit(MD_NO_FUA, &mdev->flags);
1176 1507
1177 /* Point of no return reached. 1508 /* Point of no return reached.
1178 * Devices and memory are no longer released by error cleanup below. 1509 * Devices and memory are no longer released by error cleanup below.
@@ -1181,22 +1512,22 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1181 D_ASSERT(mdev->ldev == NULL); 1512 D_ASSERT(mdev->ldev == NULL);
1182 mdev->ldev = nbc; 1513 mdev->ldev = nbc;
1183 mdev->resync = resync_lru; 1514 mdev->resync = resync_lru;
1515 mdev->rs_plan_s = new_plan;
1184 nbc = NULL; 1516 nbc = NULL;
1185 resync_lru = NULL; 1517 resync_lru = NULL;
1518 new_disk_conf = NULL;
1519 new_plan = NULL;
1186 1520
1187 mdev->write_ordering = WO_bdev_flush; 1521 drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush);
1188 drbd_bump_write_ordering(mdev, WO_bdev_flush);
1189 1522
1190 if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY)) 1523 if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
1191 drbd_set_flag(mdev, CRASHED_PRIMARY); 1524 set_bit(CRASHED_PRIMARY, &mdev->flags);
1192 else 1525 else
1193 drbd_clear_flag(mdev, CRASHED_PRIMARY); 1526 clear_bit(CRASHED_PRIMARY, &mdev->flags);
1194 1527
1195 if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && 1528 if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
1196 !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) { 1529 !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod))
1197 drbd_set_flag(mdev, CRASHED_PRIMARY); 1530 set_bit(CRASHED_PRIMARY, &mdev->flags);
1198 cp_discovered = 1;
1199 }
1200 1531
1201 mdev->send_cnt = 0; 1532 mdev->send_cnt = 0;
1202 mdev->recv_cnt = 0; 1533 mdev->recv_cnt = 0;
@@ -1219,20 +1550,22 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1219 * so we can automatically recover from a crash of a 1550 * so we can automatically recover from a crash of a
1220 * degraded but active "cluster" after a certain timeout. 1551 * degraded but active "cluster" after a certain timeout.
1221 */ 1552 */
1222 drbd_clear_flag(mdev, USE_DEGR_WFC_T); 1553 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
1223 if (mdev->state.role != R_PRIMARY && 1554 if (mdev->state.role != R_PRIMARY &&
1224 drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) && 1555 drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
1225 !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND)) 1556 !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
1226 drbd_set_flag(mdev, USE_DEGR_WFC_T); 1557 set_bit(USE_DEGR_WFC_T, &mdev->flags);
1227 1558
1228 dd = drbd_determine_dev_size(mdev, 0); 1559 dd = drbd_determine_dev_size(mdev, 0);
1229 if (dd == dev_size_error) { 1560 if (dd == dev_size_error) {
1230 retcode = ERR_NOMEM_BITMAP; 1561 retcode = ERR_NOMEM_BITMAP;
1231 goto force_diskless_dec; 1562 goto force_diskless_dec;
1232 } else if (dd == grew) 1563 } else if (dd == grew)
1233 drbd_set_flag(mdev, RESYNC_AFTER_NEG); 1564 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
1234 1565
1235 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { 1566 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) ||
1567 (test_bit(CRASHED_PRIMARY, &mdev->flags) &&
1568 drbd_md_test_flag(mdev->ldev, MDF_AL_DISABLED))) {
1236 dev_info(DEV, "Assuming that all blocks are out of sync " 1569 dev_info(DEV, "Assuming that all blocks are out of sync "
1237 "(aka FullSync)\n"); 1570 "(aka FullSync)\n");
1238 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, 1571 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
@@ -1242,16 +1575,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1242 } 1575 }
1243 } else { 1576 } else {
1244 if (drbd_bitmap_io(mdev, &drbd_bm_read, 1577 if (drbd_bitmap_io(mdev, &drbd_bm_read,
1245 "read from attaching", BM_LOCKED_MASK) < 0) { 1578 "read from attaching", BM_LOCKED_MASK)) {
1246 retcode = ERR_IO_MD_DISK;
1247 goto force_diskless_dec;
1248 }
1249 }
1250
1251 if (cp_discovered) {
1252 drbd_al_apply_to_bm(mdev);
1253 if (drbd_bitmap_io(mdev, &drbd_bm_write,
1254 "crashed primary apply AL", BM_LOCKED_MASK)) {
1255 retcode = ERR_IO_MD_DISK; 1579 retcode = ERR_IO_MD_DISK;
1256 goto force_diskless_dec; 1580 goto force_diskless_dec;
1257 } 1581 }
@@ -1260,9 +1584,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1260 if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev)) 1584 if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
1261 drbd_suspend_al(mdev); /* IO is still suspended here... */ 1585 drbd_suspend_al(mdev); /* IO is still suspended here... */
1262 1586
1263 spin_lock_irq(&mdev->req_lock); 1587 spin_lock_irq(&mdev->tconn->req_lock);
1264 os = mdev->state; 1588 os = drbd_read_state(mdev);
1265 ns.i = os.i; 1589 ns = os;
1266 /* If MDF_CONSISTENT is not set go into inconsistent state, 1590 /* If MDF_CONSISTENT is not set go into inconsistent state,
1267 otherwise investigate MDF_WasUpToDate... 1591 otherwise investigate MDF_WasUpToDate...
1268 If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state, 1592 If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
@@ -1280,8 +1604,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1280 if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED)) 1604 if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED))
1281 ns.pdsk = D_OUTDATED; 1605 ns.pdsk = D_OUTDATED;
1282 1606
1283 if ( ns.disk == D_CONSISTENT && 1607 rcu_read_lock();
1284 (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE)) 1608 if (ns.disk == D_CONSISTENT &&
1609 (ns.pdsk == D_OUTDATED || rcu_dereference(mdev->ldev->disk_conf)->fencing == FP_DONT_CARE))
1285 ns.disk = D_UP_TO_DATE; 1610 ns.disk = D_UP_TO_DATE;
1286 1611
1287 /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND, 1612 /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
@@ -1289,6 +1614,13 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1289 this point, because drbd_request_state() modifies these 1614 this point, because drbd_request_state() modifies these
1290 flags. */ 1615 flags. */
1291 1616
1617 if (rcu_dereference(mdev->ldev->disk_conf)->al_updates)
1618 mdev->ldev->md.flags &= ~MDF_AL_DISABLED;
1619 else
1620 mdev->ldev->md.flags |= MDF_AL_DISABLED;
1621
1622 rcu_read_unlock();
1623
1292 /* In case we are C_CONNECTED postpone any decision on the new disk 1624 /* In case we are C_CONNECTED postpone any decision on the new disk
1293 state after the negotiation phase. */ 1625 state after the negotiation phase. */
1294 if (mdev->state.conn == C_CONNECTED) { 1626 if (mdev->state.conn == C_CONNECTED) {
@@ -1304,12 +1636,13 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1304 } 1636 }
1305 1637
1306 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL); 1638 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
1307 ns = mdev->state; 1639 spin_unlock_irq(&mdev->tconn->req_lock);
1308 spin_unlock_irq(&mdev->req_lock);
1309 1640
1310 if (rv < SS_SUCCESS) 1641 if (rv < SS_SUCCESS)
1311 goto force_diskless_dec; 1642 goto force_diskless_dec;
1312 1643
1644 mod_timer(&mdev->request_timer, jiffies + HZ);
1645
1313 if (mdev->state.role == R_PRIMARY) 1646 if (mdev->state.role == R_PRIMARY)
1314 mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1; 1647 mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1;
1315 else 1648 else
@@ -1320,16 +1653,17 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1320 1653
1321 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); 1654 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
1322 put_ldev(mdev); 1655 put_ldev(mdev);
1323 reply->ret_code = retcode; 1656 conn_reconfig_done(mdev->tconn);
1324 drbd_reconfig_done(mdev); 1657 drbd_adm_finish(info, retcode);
1325 return 0; 1658 return 0;
1326 1659
1327 force_diskless_dec: 1660 force_diskless_dec:
1328 put_ldev(mdev); 1661 put_ldev(mdev);
1329 force_diskless: 1662 force_diskless:
1330 drbd_force_state(mdev, NS(disk, D_FAILED)); 1663 drbd_force_state(mdev, NS(disk, D_DISKLESS));
1331 drbd_md_sync(mdev); 1664 drbd_md_sync(mdev);
1332 fail: 1665 fail:
1666 conn_reconfig_done(mdev->tconn);
1333 if (nbc) { 1667 if (nbc) {
1334 if (nbc->backing_bdev) 1668 if (nbc->backing_bdev)
1335 blkdev_put(nbc->backing_bdev, 1669 blkdev_put(nbc->backing_bdev,
@@ -1339,34 +1673,24 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1339 FMODE_READ | FMODE_WRITE | FMODE_EXCL); 1673 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1340 kfree(nbc); 1674 kfree(nbc);
1341 } 1675 }
1676 kfree(new_disk_conf);
1342 lc_destroy(resync_lru); 1677 lc_destroy(resync_lru);
1678 kfree(new_plan);
1343 1679
1344 reply->ret_code = retcode; 1680 finish:
1345 drbd_reconfig_done(mdev); 1681 drbd_adm_finish(info, retcode);
1346 return 0; 1682 return 0;
1347} 1683}
1348 1684
1349/* Detaching the disk is a process in multiple stages. First we need to lock 1685static int adm_detach(struct drbd_conf *mdev, int force)
1350 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1351 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1352 * internal references as well.
1353 * Only then we have finally detached. */
1354static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1355 struct drbd_nl_cfg_reply *reply)
1356{ 1686{
1357 enum drbd_ret_code retcode; 1687 enum drbd_state_rv retcode;
1358 int ret; 1688 int ret;
1359 struct detach dt = {};
1360 1689
1361 if (!detach_from_tags(mdev, nlp->tag_list, &dt)) { 1690 if (force) {
1362 reply->ret_code = ERR_MANDATORY_TAG; 1691 set_bit(FORCE_DETACH, &mdev->flags);
1363 goto out;
1364 }
1365
1366 if (dt.detach_force) {
1367 drbd_set_flag(mdev, FORCE_DETACH);
1368 drbd_force_state(mdev, NS(disk, D_FAILED)); 1692 drbd_force_state(mdev, NS(disk, D_FAILED));
1369 reply->ret_code = SS_SUCCESS; 1693 retcode = SS_SUCCESS;
1370 goto out; 1694 goto out;
1371 } 1695 }
1372 1696
@@ -1378,326 +1702,529 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1378 ret = wait_event_interruptible(mdev->misc_wait, 1702 ret = wait_event_interruptible(mdev->misc_wait,
1379 mdev->state.disk != D_FAILED); 1703 mdev->state.disk != D_FAILED);
1380 drbd_resume_io(mdev); 1704 drbd_resume_io(mdev);
1381
1382 if ((int)retcode == (int)SS_IS_DISKLESS) 1705 if ((int)retcode == (int)SS_IS_DISKLESS)
1383 retcode = SS_NOTHING_TO_DO; 1706 retcode = SS_NOTHING_TO_DO;
1384 if (ret) 1707 if (ret)
1385 retcode = ERR_INTR; 1708 retcode = ERR_INTR;
1386 reply->ret_code = retcode;
1387out: 1709out:
1388 return 0; 1710 return retcode;
1389} 1711}
1390 1712
1391static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1713/* Detaching the disk is a process in multiple stages. First we need to lock
1392 struct drbd_nl_cfg_reply *reply) 1714 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1715 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1716 * internal references as well.
1717 * Only then we have finally detached. */
1718int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
1393{ 1719{
1394 int i, ns;
1395 enum drbd_ret_code retcode; 1720 enum drbd_ret_code retcode;
1396 struct net_conf *new_conf = NULL; 1721 struct detach_parms parms = { };
1397 struct crypto_hash *tfm = NULL; 1722 int err;
1398 struct crypto_hash *integrity_w_tfm = NULL;
1399 struct crypto_hash *integrity_r_tfm = NULL;
1400 struct hlist_head *new_tl_hash = NULL;
1401 struct hlist_head *new_ee_hash = NULL;
1402 struct drbd_conf *odev;
1403 char hmac_name[CRYPTO_MAX_ALG_NAME];
1404 void *int_dig_out = NULL;
1405 void *int_dig_in = NULL;
1406 void *int_dig_vv = NULL;
1407 struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
1408 1723
1409 drbd_reconfig_start(mdev); 1724 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1725 if (!adm_ctx.reply_skb)
1726 return retcode;
1727 if (retcode != NO_ERROR)
1728 goto out;
1410 1729
1411 if (mdev->state.conn > C_STANDALONE) { 1730 if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
1412 retcode = ERR_NET_CONFIGURED; 1731 err = detach_parms_from_attrs(&parms, info);
1413 goto fail; 1732 if (err) {
1733 retcode = ERR_MANDATORY_TAG;
1734 drbd_msg_put_info(from_attrs_err_to_txt(err));
1735 goto out;
1736 }
1737 }
1738
1739 retcode = adm_detach(adm_ctx.mdev, parms.force_detach);
1740out:
1741 drbd_adm_finish(info, retcode);
1742 return 0;
1743}
1744
1745static bool conn_resync_running(struct drbd_tconn *tconn)
1746{
1747 struct drbd_conf *mdev;
1748 bool rv = false;
1749 int vnr;
1750
1751 rcu_read_lock();
1752 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1753 if (mdev->state.conn == C_SYNC_SOURCE ||
1754 mdev->state.conn == C_SYNC_TARGET ||
1755 mdev->state.conn == C_PAUSED_SYNC_S ||
1756 mdev->state.conn == C_PAUSED_SYNC_T) {
1757 rv = true;
1758 break;
1759 }
1760 }
1761 rcu_read_unlock();
1762
1763 return rv;
1764}
1765
1766static bool conn_ov_running(struct drbd_tconn *tconn)
1767{
1768 struct drbd_conf *mdev;
1769 bool rv = false;
1770 int vnr;
1771
1772 rcu_read_lock();
1773 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1774 if (mdev->state.conn == C_VERIFY_S ||
1775 mdev->state.conn == C_VERIFY_T) {
1776 rv = true;
1777 break;
1778 }
1779 }
1780 rcu_read_unlock();
1781
1782 return rv;
1783}
1784
1785static enum drbd_ret_code
1786_check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct net_conf *new_conf)
1787{
1788 struct drbd_conf *mdev;
1789 int i;
1790
1791 if (old_conf && tconn->cstate == C_WF_REPORT_PARAMS && tconn->agreed_pro_version < 100) {
1792 if (new_conf->wire_protocol != old_conf->wire_protocol)
1793 return ERR_NEED_APV_100;
1794
1795 if (new_conf->two_primaries != old_conf->two_primaries)
1796 return ERR_NEED_APV_100;
1797
1798 if (strcmp(new_conf->integrity_alg, old_conf->integrity_alg))
1799 return ERR_NEED_APV_100;
1800 }
1801
1802 if (!new_conf->two_primaries &&
1803 conn_highest_role(tconn) == R_PRIMARY &&
1804 conn_highest_peer(tconn) == R_PRIMARY)
1805 return ERR_NEED_ALLOW_TWO_PRI;
1806
1807 if (new_conf->two_primaries &&
1808 (new_conf->wire_protocol != DRBD_PROT_C))
1809 return ERR_NOT_PROTO_C;
1810
1811 idr_for_each_entry(&tconn->volumes, mdev, i) {
1812 if (get_ldev(mdev)) {
1813 enum drbd_fencing_p fp = rcu_dereference(mdev->ldev->disk_conf)->fencing;
1814 put_ldev(mdev);
1815 if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
1816 return ERR_STONITH_AND_PROT_A;
1817 }
1818 if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data)
1819 return ERR_DISCARD_IMPOSSIBLE;
1820 }
1821
1822 if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A)
1823 return ERR_CONG_NOT_PROTO_A;
1824
1825 return NO_ERROR;
1826}
1827
1828static enum drbd_ret_code
1829check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf)
1830{
1831 static enum drbd_ret_code rv;
1832 struct drbd_conf *mdev;
1833 int i;
1834
1835 rcu_read_lock();
1836 rv = _check_net_options(tconn, rcu_dereference(tconn->net_conf), new_conf);
1837 rcu_read_unlock();
1838
1839 /* tconn->volumes protected by genl_lock() here */
1840 idr_for_each_entry(&tconn->volumes, mdev, i) {
1841 if (!mdev->bitmap) {
1842 if(drbd_bm_init(mdev))
1843 return ERR_NOMEM;
1844 }
1845 }
1846
1847 return rv;
1848}
1849
1850struct crypto {
1851 struct crypto_hash *verify_tfm;
1852 struct crypto_hash *csums_tfm;
1853 struct crypto_hash *cram_hmac_tfm;
1854 struct crypto_hash *integrity_tfm;
1855};
1856
1857static int
1858alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
1859{
1860 if (!tfm_name[0])
1861 return NO_ERROR;
1862
1863 *tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
1864 if (IS_ERR(*tfm)) {
1865 *tfm = NULL;
1866 return err_alg;
1414 } 1867 }
1415 1868
1416 /* allocation not in the IO path, cqueue thread context */ 1869 return NO_ERROR;
1870}
1871
1872static enum drbd_ret_code
1873alloc_crypto(struct crypto *crypto, struct net_conf *new_conf)
1874{
1875 char hmac_name[CRYPTO_MAX_ALG_NAME];
1876 enum drbd_ret_code rv;
1877
1878 rv = alloc_hash(&crypto->csums_tfm, new_conf->csums_alg,
1879 ERR_CSUMS_ALG);
1880 if (rv != NO_ERROR)
1881 return rv;
1882 rv = alloc_hash(&crypto->verify_tfm, new_conf->verify_alg,
1883 ERR_VERIFY_ALG);
1884 if (rv != NO_ERROR)
1885 return rv;
1886 rv = alloc_hash(&crypto->integrity_tfm, new_conf->integrity_alg,
1887 ERR_INTEGRITY_ALG);
1888 if (rv != NO_ERROR)
1889 return rv;
1890 if (new_conf->cram_hmac_alg[0] != 0) {
1891 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
1892 new_conf->cram_hmac_alg);
1893
1894 rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
1895 ERR_AUTH_ALG);
1896 }
1897
1898 return rv;
1899}
1900
1901static void free_crypto(struct crypto *crypto)
1902{
1903 crypto_free_hash(crypto->cram_hmac_tfm);
1904 crypto_free_hash(crypto->integrity_tfm);
1905 crypto_free_hash(crypto->csums_tfm);
1906 crypto_free_hash(crypto->verify_tfm);
1907}
1908
1909int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
1910{
1911 enum drbd_ret_code retcode;
1912 struct drbd_tconn *tconn;
1913 struct net_conf *old_conf, *new_conf = NULL;
1914 int err;
1915 int ovr; /* online verify running */
1916 int rsr; /* re-sync running */
1917 struct crypto crypto = { };
1918
1919 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
1920 if (!adm_ctx.reply_skb)
1921 return retcode;
1922 if (retcode != NO_ERROR)
1923 goto out;
1924
1925 tconn = adm_ctx.tconn;
1926
1417 new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); 1927 new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
1418 if (!new_conf) { 1928 if (!new_conf) {
1419 retcode = ERR_NOMEM; 1929 retcode = ERR_NOMEM;
1930 goto out;
1931 }
1932
1933 conn_reconfig_start(tconn);
1934
1935 mutex_lock(&tconn->data.mutex);
1936 mutex_lock(&tconn->conf_update);
1937 old_conf = tconn->net_conf;
1938
1939 if (!old_conf) {
1940 drbd_msg_put_info("net conf missing, try connect");
1941 retcode = ERR_INVALID_REQUEST;
1420 goto fail; 1942 goto fail;
1421 } 1943 }
1422 1944
1423 new_conf->timeout = DRBD_TIMEOUT_DEF; 1945 *new_conf = *old_conf;
1424 new_conf->try_connect_int = DRBD_CONNECT_INT_DEF; 1946 if (should_set_defaults(info))
1425 new_conf->ping_int = DRBD_PING_INT_DEF; 1947 set_net_conf_defaults(new_conf);
1426 new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF; 1948
1427 new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF; 1949 err = net_conf_from_attrs_for_change(new_conf, info);
1428 new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF; 1950 if (err && err != -ENOMSG) {
1429 new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF;
1430 new_conf->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF;
1431 new_conf->ko_count = DRBD_KO_COUNT_DEF;
1432 new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF;
1433 new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF;
1434 new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF;
1435 new_conf->want_lose = 0;
1436 new_conf->two_primaries = 0;
1437 new_conf->wire_protocol = DRBD_PROT_C;
1438 new_conf->ping_timeo = DRBD_PING_TIMEO_DEF;
1439 new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF;
1440 new_conf->on_congestion = DRBD_ON_CONGESTION_DEF;
1441 new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF;
1442
1443 if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) {
1444 retcode = ERR_MANDATORY_TAG; 1951 retcode = ERR_MANDATORY_TAG;
1952 drbd_msg_put_info(from_attrs_err_to_txt(err));
1445 goto fail; 1953 goto fail;
1446 } 1954 }
1447 1955
1448 if (new_conf->two_primaries 1956 retcode = check_net_options(tconn, new_conf);
1449 && (new_conf->wire_protocol != DRBD_PROT_C)) { 1957 if (retcode != NO_ERROR)
1450 retcode = ERR_NOT_PROTO_C;
1451 goto fail; 1958 goto fail;
1452 }
1453 1959
1454 if (get_ldev(mdev)) { 1960 /* re-sync running */
1455 enum drbd_fencing_p fp = mdev->ldev->dc.fencing; 1961 rsr = conn_resync_running(tconn);
1456 put_ldev(mdev); 1962 if (rsr && strcmp(new_conf->csums_alg, old_conf->csums_alg)) {
1457 if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) { 1963 retcode = ERR_CSUMS_RESYNC_RUNNING;
1458 retcode = ERR_STONITH_AND_PROT_A; 1964 goto fail;
1459 goto fail;
1460 }
1461 } 1965 }
1462 1966
1463 if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) { 1967 /* online verify running */
1464 retcode = ERR_CONG_NOT_PROTO_A; 1968 ovr = conn_ov_running(tconn);
1969 if (ovr && strcmp(new_conf->verify_alg, old_conf->verify_alg)) {
1970 retcode = ERR_VERIFY_RUNNING;
1465 goto fail; 1971 goto fail;
1466 } 1972 }
1467 1973
1468 if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { 1974 retcode = alloc_crypto(&crypto, new_conf);
1469 retcode = ERR_DISCARD; 1975 if (retcode != NO_ERROR)
1470 goto fail; 1976 goto fail;
1471 }
1472 1977
1473 retcode = NO_ERROR; 1978 rcu_assign_pointer(tconn->net_conf, new_conf);
1474 1979
1475 new_my_addr = (struct sockaddr *)&new_conf->my_addr; 1980 if (!rsr) {
1476 new_peer_addr = (struct sockaddr *)&new_conf->peer_addr; 1981 crypto_free_hash(tconn->csums_tfm);
1477 for (i = 0; i < minor_count; i++) { 1982 tconn->csums_tfm = crypto.csums_tfm;
1478 odev = minor_to_mdev(i); 1983 crypto.csums_tfm = NULL;
1479 if (!odev || odev == mdev) 1984 }
1480 continue; 1985 if (!ovr) {
1481 if (get_net_conf(odev)) { 1986 crypto_free_hash(tconn->verify_tfm);
1482 taken_addr = (struct sockaddr *)&odev->net_conf->my_addr; 1987 tconn->verify_tfm = crypto.verify_tfm;
1483 if (new_conf->my_addr_len == odev->net_conf->my_addr_len && 1988 crypto.verify_tfm = NULL;
1484 !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len))
1485 retcode = ERR_LOCAL_ADDR;
1486
1487 taken_addr = (struct sockaddr *)&odev->net_conf->peer_addr;
1488 if (new_conf->peer_addr_len == odev->net_conf->peer_addr_len &&
1489 !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len))
1490 retcode = ERR_PEER_ADDR;
1491
1492 put_net_conf(odev);
1493 if (retcode != NO_ERROR)
1494 goto fail;
1495 }
1496 } 1989 }
1497 1990
1498 if (new_conf->cram_hmac_alg[0] != 0) { 1991 crypto_free_hash(tconn->integrity_tfm);
1499 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", 1992 tconn->integrity_tfm = crypto.integrity_tfm;
1500 new_conf->cram_hmac_alg); 1993 if (tconn->cstate >= C_WF_REPORT_PARAMS && tconn->agreed_pro_version >= 100)
1501 tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC); 1994 /* Do this without trying to take tconn->data.mutex again. */
1502 if (IS_ERR(tfm)) { 1995 __drbd_send_protocol(tconn, P_PROTOCOL_UPDATE);
1503 tfm = NULL;
1504 retcode = ERR_AUTH_ALG;
1505 goto fail;
1506 }
1507 1996
1508 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) { 1997 crypto_free_hash(tconn->cram_hmac_tfm);
1509 retcode = ERR_AUTH_ALG_ND; 1998 tconn->cram_hmac_tfm = crypto.cram_hmac_tfm;
1510 goto fail;
1511 }
1512 }
1513 1999
1514 if (new_conf->integrity_alg[0]) { 2000 mutex_unlock(&tconn->conf_update);
1515 integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC); 2001 mutex_unlock(&tconn->data.mutex);
1516 if (IS_ERR(integrity_w_tfm)) { 2002 synchronize_rcu();
1517 integrity_w_tfm = NULL; 2003 kfree(old_conf);
1518 retcode=ERR_INTEGRITY_ALG;
1519 goto fail;
1520 }
1521 2004
1522 if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) { 2005 if (tconn->cstate >= C_WF_REPORT_PARAMS)
1523 retcode=ERR_INTEGRITY_ALG_ND; 2006 drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn)));
1524 goto fail;
1525 }
1526 2007
1527 integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC); 2008 goto done;
1528 if (IS_ERR(integrity_r_tfm)) { 2009
1529 integrity_r_tfm = NULL; 2010 fail:
1530 retcode=ERR_INTEGRITY_ALG; 2011 mutex_unlock(&tconn->conf_update);
1531 goto fail; 2012 mutex_unlock(&tconn->data.mutex);
1532 } 2013 free_crypto(&crypto);
2014 kfree(new_conf);
2015 done:
2016 conn_reconfig_done(tconn);
2017 out:
2018 drbd_adm_finish(info, retcode);
2019 return 0;
2020}
2021
2022int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2023{
2024 struct drbd_conf *mdev;
2025 struct net_conf *old_conf, *new_conf = NULL;
2026 struct crypto crypto = { };
2027 struct drbd_tconn *tconn;
2028 enum drbd_ret_code retcode;
2029 int i;
2030 int err;
2031
2032 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
2033
2034 if (!adm_ctx.reply_skb)
2035 return retcode;
2036 if (retcode != NO_ERROR)
2037 goto out;
2038 if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2039 drbd_msg_put_info("connection endpoint(s) missing");
2040 retcode = ERR_INVALID_REQUEST;
2041 goto out;
1533 } 2042 }
1534 2043
1535 ns = new_conf->max_epoch_size/8; 2044 /* No need for _rcu here. All reconfiguration is
1536 if (mdev->tl_hash_s != ns) { 2045 * strictly serialized on genl_lock(). We are protected against
1537 new_tl_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); 2046 * concurrent reconfiguration/addition/deletion */
1538 if (!new_tl_hash) { 2047 list_for_each_entry(tconn, &drbd_tconns, all_tconn) {
1539 retcode = ERR_NOMEM; 2048 if (nla_len(adm_ctx.my_addr) == tconn->my_addr_len &&
1540 goto fail; 2049 !memcmp(nla_data(adm_ctx.my_addr), &tconn->my_addr, tconn->my_addr_len)) {
2050 retcode = ERR_LOCAL_ADDR;
2051 goto out;
1541 } 2052 }
1542 }
1543 2053
1544 ns = new_conf->max_buffers/8; 2054 if (nla_len(adm_ctx.peer_addr) == tconn->peer_addr_len &&
1545 if (new_conf->two_primaries && (mdev->ee_hash_s != ns)) { 2055 !memcmp(nla_data(adm_ctx.peer_addr), &tconn->peer_addr, tconn->peer_addr_len)) {
1546 new_ee_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL); 2056 retcode = ERR_PEER_ADDR;
1547 if (!new_ee_hash) { 2057 goto out;
1548 retcode = ERR_NOMEM;
1549 goto fail;
1550 } 2058 }
1551 } 2059 }
1552 2060
1553 ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0; 2061 tconn = adm_ctx.tconn;
2062 conn_reconfig_start(tconn);
1554 2063
1555 if (integrity_w_tfm) { 2064 if (tconn->cstate > C_STANDALONE) {
1556 i = crypto_hash_digestsize(integrity_w_tfm); 2065 retcode = ERR_NET_CONFIGURED;
1557 int_dig_out = kmalloc(i, GFP_KERNEL); 2066 goto fail;
1558 if (!int_dig_out) {
1559 retcode = ERR_NOMEM;
1560 goto fail;
1561 }
1562 int_dig_in = kmalloc(i, GFP_KERNEL);
1563 if (!int_dig_in) {
1564 retcode = ERR_NOMEM;
1565 goto fail;
1566 }
1567 int_dig_vv = kmalloc(i, GFP_KERNEL);
1568 if (!int_dig_vv) {
1569 retcode = ERR_NOMEM;
1570 goto fail;
1571 }
1572 } 2067 }
1573 2068
1574 if (!mdev->bitmap) { 2069 /* allocation not in the IO path, drbdsetup / netlink process context */
1575 if(drbd_bm_init(mdev)) { 2070 new_conf = kzalloc(sizeof(*new_conf), GFP_KERNEL);
1576 retcode = ERR_NOMEM; 2071 if (!new_conf) {
1577 goto fail; 2072 retcode = ERR_NOMEM;
1578 } 2073 goto fail;
1579 } 2074 }
1580 2075
1581 drbd_flush_workqueue(mdev); 2076 set_net_conf_defaults(new_conf);
1582 spin_lock_irq(&mdev->req_lock); 2077
1583 if (mdev->net_conf != NULL) { 2078 err = net_conf_from_attrs(new_conf, info);
1584 retcode = ERR_NET_CONFIGURED; 2079 if (err && err != -ENOMSG) {
1585 spin_unlock_irq(&mdev->req_lock); 2080 retcode = ERR_MANDATORY_TAG;
2081 drbd_msg_put_info(from_attrs_err_to_txt(err));
1586 goto fail; 2082 goto fail;
1587 } 2083 }
1588 mdev->net_conf = new_conf;
1589 2084
1590 mdev->send_cnt = 0; 2085 retcode = check_net_options(tconn, new_conf);
1591 mdev->recv_cnt = 0; 2086 if (retcode != NO_ERROR)
2087 goto fail;
1592 2088
1593 if (new_tl_hash) { 2089 retcode = alloc_crypto(&crypto, new_conf);
1594 kfree(mdev->tl_hash); 2090 if (retcode != NO_ERROR)
1595 mdev->tl_hash_s = mdev->net_conf->max_epoch_size/8; 2091 goto fail;
1596 mdev->tl_hash = new_tl_hash;
1597 }
1598 2092
1599 if (new_ee_hash) { 2093 ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
1600 kfree(mdev->ee_hash); 2094
1601 mdev->ee_hash_s = mdev->net_conf->max_buffers/8; 2095 conn_flush_workqueue(tconn);
1602 mdev->ee_hash = new_ee_hash; 2096
2097 mutex_lock(&tconn->conf_update);
2098 old_conf = tconn->net_conf;
2099 if (old_conf) {
2100 retcode = ERR_NET_CONFIGURED;
2101 mutex_unlock(&tconn->conf_update);
2102 goto fail;
1603 } 2103 }
2104 rcu_assign_pointer(tconn->net_conf, new_conf);
1604 2105
1605 crypto_free_hash(mdev->cram_hmac_tfm); 2106 conn_free_crypto(tconn);
1606 mdev->cram_hmac_tfm = tfm; 2107 tconn->cram_hmac_tfm = crypto.cram_hmac_tfm;
2108 tconn->integrity_tfm = crypto.integrity_tfm;
2109 tconn->csums_tfm = crypto.csums_tfm;
2110 tconn->verify_tfm = crypto.verify_tfm;
1607 2111
1608 crypto_free_hash(mdev->integrity_w_tfm); 2112 tconn->my_addr_len = nla_len(adm_ctx.my_addr);
1609 mdev->integrity_w_tfm = integrity_w_tfm; 2113 memcpy(&tconn->my_addr, nla_data(adm_ctx.my_addr), tconn->my_addr_len);
2114 tconn->peer_addr_len = nla_len(adm_ctx.peer_addr);
2115 memcpy(&tconn->peer_addr, nla_data(adm_ctx.peer_addr), tconn->peer_addr_len);
1610 2116
1611 crypto_free_hash(mdev->integrity_r_tfm); 2117 mutex_unlock(&tconn->conf_update);
1612 mdev->integrity_r_tfm = integrity_r_tfm;
1613 2118
1614 kfree(mdev->int_dig_out); 2119 rcu_read_lock();
1615 kfree(mdev->int_dig_in); 2120 idr_for_each_entry(&tconn->volumes, mdev, i) {
1616 kfree(mdev->int_dig_vv); 2121 mdev->send_cnt = 0;
1617 mdev->int_dig_out=int_dig_out; 2122 mdev->recv_cnt = 0;
1618 mdev->int_dig_in=int_dig_in; 2123 }
1619 mdev->int_dig_vv=int_dig_vv; 2124 rcu_read_unlock();
1620 retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL);
1621 spin_unlock_irq(&mdev->req_lock);
1622 2125
1623 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); 2126 retcode = conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
1624 reply->ret_code = retcode; 2127
1625 drbd_reconfig_done(mdev); 2128 conn_reconfig_done(tconn);
2129 drbd_adm_finish(info, retcode);
1626 return 0; 2130 return 0;
1627 2131
1628fail: 2132fail:
1629 kfree(int_dig_out); 2133 free_crypto(&crypto);
1630 kfree(int_dig_in);
1631 kfree(int_dig_vv);
1632 crypto_free_hash(tfm);
1633 crypto_free_hash(integrity_w_tfm);
1634 crypto_free_hash(integrity_r_tfm);
1635 kfree(new_tl_hash);
1636 kfree(new_ee_hash);
1637 kfree(new_conf); 2134 kfree(new_conf);
1638 2135
1639 reply->ret_code = retcode; 2136 conn_reconfig_done(tconn);
1640 drbd_reconfig_done(mdev); 2137out:
2138 drbd_adm_finish(info, retcode);
1641 return 0; 2139 return 0;
1642} 2140}
1643 2141
1644static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2142static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force)
1645 struct drbd_nl_cfg_reply *reply)
1646{ 2143{
1647 int retcode; 2144 enum drbd_state_rv rv;
1648 struct disconnect dc;
1649
1650 memset(&dc, 0, sizeof(struct disconnect));
1651 if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) {
1652 retcode = ERR_MANDATORY_TAG;
1653 goto fail;
1654 }
1655
1656 if (dc.force) {
1657 spin_lock_irq(&mdev->req_lock);
1658 if (mdev->state.conn >= C_WF_CONNECTION)
1659 _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL);
1660 spin_unlock_irq(&mdev->req_lock);
1661 goto done;
1662 }
1663 2145
1664 retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED); 2146 rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING),
2147 force ? CS_HARD : 0);
1665 2148
1666 if (retcode == SS_NOTHING_TO_DO) 2149 switch (rv) {
1667 goto done; 2150 case SS_NOTHING_TO_DO:
1668 else if (retcode == SS_ALREADY_STANDALONE) 2151 break;
1669 goto done; 2152 case SS_ALREADY_STANDALONE:
1670 else if (retcode == SS_PRIMARY_NOP) { 2153 return SS_SUCCESS;
1671 /* Our statche checking code wants to see the peer outdated. */ 2154 case SS_PRIMARY_NOP:
1672 retcode = drbd_request_state(mdev, NS2(conn, C_DISCONNECTING, 2155 /* Our state checking code wants to see the peer outdated. */
1673 pdsk, D_OUTDATED)); 2156 rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
1674 } else if (retcode == SS_CW_FAILED_BY_PEER) { 2157 pdsk, D_OUTDATED), CS_VERBOSE);
2158 break;
2159 case SS_CW_FAILED_BY_PEER:
1675 /* The peer probably wants to see us outdated. */ 2160 /* The peer probably wants to see us outdated. */
1676 retcode = _drbd_request_state(mdev, NS2(conn, C_DISCONNECTING, 2161 rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
1677 disk, D_OUTDATED), 2162 disk, D_OUTDATED), 0);
1678 CS_ORDERED); 2163 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
1679 if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) { 2164 rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING),
1680 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 2165 CS_HARD);
1681 retcode = SS_SUCCESS;
1682 } 2166 }
2167 break;
2168 default:;
2169 /* no special handling necessary */
2170 }
2171
2172 if (rv >= SS_SUCCESS) {
2173 enum drbd_state_rv rv2;
2174 /* No one else can reconfigure the network while I am here.
2175 * The state handling only uses drbd_thread_stop_nowait(),
2176 * we want to really wait here until the receiver is no more.
2177 */
2178 drbd_thread_stop(&adm_ctx.tconn->receiver);
2179
2180 /* Race breaker. This additional state change request may be
2181 * necessary, if this was a forced disconnect during a receiver
2182 * restart. We may have "killed" the receiver thread just
2183 * after drbdd_init() returned. Typically, we should be
2184 * C_STANDALONE already, now, and this becomes a no-op.
2185 */
2186 rv2 = conn_request_state(tconn, NS(conn, C_STANDALONE),
2187 CS_VERBOSE | CS_HARD);
2188 if (rv2 < SS_SUCCESS)
2189 conn_err(tconn,
2190 "unexpected rv2=%d in conn_try_disconnect()\n",
2191 rv2);
1683 } 2192 }
2193 return rv;
2194}
1684 2195
1685 if (retcode < SS_SUCCESS) 2196int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
1686 goto fail; 2197{
2198 struct disconnect_parms parms;
2199 struct drbd_tconn *tconn;
2200 enum drbd_state_rv rv;
2201 enum drbd_ret_code retcode;
2202 int err;
1687 2203
1688 if (wait_event_interruptible(mdev->state_wait, 2204 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
1689 mdev->state.conn != C_DISCONNECTING)) { 2205 if (!adm_ctx.reply_skb)
1690 /* Do not test for mdev->state.conn == C_STANDALONE, since 2206 return retcode;
1691 someone else might connect us in the mean time! */ 2207 if (retcode != NO_ERROR)
1692 retcode = ERR_INTR;
1693 goto fail; 2208 goto fail;
2209
2210 tconn = adm_ctx.tconn;
2211 memset(&parms, 0, sizeof(parms));
2212 if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2213 err = disconnect_parms_from_attrs(&parms, info);
2214 if (err) {
2215 retcode = ERR_MANDATORY_TAG;
2216 drbd_msg_put_info(from_attrs_err_to_txt(err));
2217 goto fail;
2218 }
1694 } 2219 }
1695 2220
1696 done: 2221 rv = conn_try_disconnect(tconn, parms.force_disconnect);
1697 retcode = NO_ERROR; 2222 if (rv < SS_SUCCESS)
2223 retcode = rv; /* FIXME: Type mismatch. */
2224 else
2225 retcode = NO_ERROR;
1698 fail: 2226 fail:
1699 drbd_md_sync(mdev); 2227 drbd_adm_finish(info, retcode);
1700 reply->ret_code = retcode;
1701 return 0; 2228 return 0;
1702} 2229}
1703 2230
@@ -1709,7 +2236,7 @@ void resync_after_online_grow(struct drbd_conf *mdev)
1709 if (mdev->state.role != mdev->state.peer) 2236 if (mdev->state.role != mdev->state.peer)
1710 iass = (mdev->state.role == R_PRIMARY); 2237 iass = (mdev->state.role == R_PRIMARY);
1711 else 2238 else
1712 iass = drbd_test_flag(mdev, DISCARD_CONCURRENT); 2239 iass = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags);
1713 2240
1714 if (iass) 2241 if (iass)
1715 drbd_start_resync(mdev, C_SYNC_SOURCE); 2242 drbd_start_resync(mdev, C_SYNC_SOURCE);
@@ -1717,20 +2244,34 @@ void resync_after_online_grow(struct drbd_conf *mdev)
1717 _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE); 2244 _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
1718} 2245}
1719 2246
1720static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2247int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
1721 struct drbd_nl_cfg_reply *reply)
1722{ 2248{
1723 struct resize rs; 2249 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
1724 int retcode = NO_ERROR; 2250 struct resize_parms rs;
2251 struct drbd_conf *mdev;
2252 enum drbd_ret_code retcode;
1725 enum determine_dev_size dd; 2253 enum determine_dev_size dd;
1726 enum dds_flags ddsf; 2254 enum dds_flags ddsf;
2255 sector_t u_size;
2256 int err;
1727 2257
1728 memset(&rs, 0, sizeof(struct resize)); 2258 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1729 if (!resize_from_tags(mdev, nlp->tag_list, &rs)) { 2259 if (!adm_ctx.reply_skb)
1730 retcode = ERR_MANDATORY_TAG; 2260 return retcode;
2261 if (retcode != NO_ERROR)
1731 goto fail; 2262 goto fail;
2263
2264 memset(&rs, 0, sizeof(struct resize_parms));
2265 if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2266 err = resize_parms_from_attrs(&rs, info);
2267 if (err) {
2268 retcode = ERR_MANDATORY_TAG;
2269 drbd_msg_put_info(from_attrs_err_to_txt(err));
2270 goto fail;
2271 }
1732 } 2272 }
1733 2273
2274 mdev = adm_ctx.mdev;
1734 if (mdev->state.conn > C_CONNECTED) { 2275 if (mdev->state.conn > C_CONNECTED) {
1735 retcode = ERR_RESIZE_RESYNC; 2276 retcode = ERR_RESIZE_RESYNC;
1736 goto fail; 2277 goto fail;
@@ -1747,15 +2288,36 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1747 goto fail; 2288 goto fail;
1748 } 2289 }
1749 2290
1750 if (rs.no_resync && mdev->agreed_pro_version < 93) { 2291 if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
1751 retcode = ERR_NEED_APV_93; 2292 retcode = ERR_NEED_APV_93;
1752 goto fail_ldev; 2293 goto fail_ldev;
1753 } 2294 }
1754 2295
2296 rcu_read_lock();
2297 u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
2298 rcu_read_unlock();
2299 if (u_size != (sector_t)rs.resize_size) {
2300 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2301 if (!new_disk_conf) {
2302 retcode = ERR_NOMEM;
2303 goto fail_ldev;
2304 }
2305 }
2306
1755 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) 2307 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
1756 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev); 2308 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
1757 2309
1758 mdev->ldev->dc.disk_size = (sector_t)rs.resize_size; 2310 if (new_disk_conf) {
2311 mutex_lock(&mdev->tconn->conf_update);
2312 old_disk_conf = mdev->ldev->disk_conf;
2313 *new_disk_conf = *old_disk_conf;
2314 new_disk_conf->disk_size = (sector_t)rs.resize_size;
2315 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
2316 mutex_unlock(&mdev->tconn->conf_update);
2317 synchronize_rcu();
2318 kfree(old_disk_conf);
2319 }
2320
1759 ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0); 2321 ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
1760 dd = drbd_determine_dev_size(mdev, ddsf); 2322 dd = drbd_determine_dev_size(mdev, ddsf);
1761 drbd_md_sync(mdev); 2323 drbd_md_sync(mdev);
@@ -1767,14 +2329,14 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1767 2329
1768 if (mdev->state.conn == C_CONNECTED) { 2330 if (mdev->state.conn == C_CONNECTED) {
1769 if (dd == grew) 2331 if (dd == grew)
1770 drbd_set_flag(mdev, RESIZE_PENDING); 2332 set_bit(RESIZE_PENDING, &mdev->flags);
1771 2333
1772 drbd_send_uuids(mdev); 2334 drbd_send_uuids(mdev);
1773 drbd_send_sizes(mdev, 1, ddsf); 2335 drbd_send_sizes(mdev, 1, ddsf);
1774 } 2336 }
1775 2337
1776 fail: 2338 fail:
1777 reply->ret_code = retcode; 2339 drbd_adm_finish(info, retcode);
1778 return 0; 2340 return 0;
1779 2341
1780 fail_ldev: 2342 fail_ldev:
@@ -1782,210 +2344,61 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1782 goto fail; 2344 goto fail;
1783} 2345}
1784 2346
1785static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2347int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
1786 struct drbd_nl_cfg_reply *reply)
1787{ 2348{
1788 int retcode = NO_ERROR; 2349 enum drbd_ret_code retcode;
2350 struct drbd_tconn *tconn;
2351 struct res_opts res_opts;
1789 int err; 2352 int err;
1790 int ovr; /* online verify running */
1791 int rsr; /* re-sync running */
1792 struct crypto_hash *verify_tfm = NULL;
1793 struct crypto_hash *csums_tfm = NULL;
1794 struct syncer_conf sc;
1795 cpumask_var_t new_cpu_mask;
1796 int *rs_plan_s = NULL;
1797 int fifo_size;
1798
1799 if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
1800 retcode = ERR_NOMEM;
1801 goto fail;
1802 }
1803
1804 if (nlp->flags & DRBD_NL_SET_DEFAULTS) {
1805 memset(&sc, 0, sizeof(struct syncer_conf));
1806 sc.rate = DRBD_RATE_DEF;
1807 sc.after = DRBD_AFTER_DEF;
1808 sc.al_extents = DRBD_AL_EXTENTS_DEF;
1809 sc.on_no_data = DRBD_ON_NO_DATA_DEF;
1810 sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
1811 sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF;
1812 sc.c_fill_target = DRBD_C_FILL_TARGET_DEF;
1813 sc.c_max_rate = DRBD_C_MAX_RATE_DEF;
1814 sc.c_min_rate = DRBD_C_MIN_RATE_DEF;
1815 } else
1816 memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
1817 2353
1818 if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) { 2354 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
1819 retcode = ERR_MANDATORY_TAG; 2355 if (!adm_ctx.reply_skb)
1820 goto fail; 2356 return retcode;
1821 }
1822
1823 /* re-sync running */
1824 rsr = ( mdev->state.conn == C_SYNC_SOURCE ||
1825 mdev->state.conn == C_SYNC_TARGET ||
1826 mdev->state.conn == C_PAUSED_SYNC_S ||
1827 mdev->state.conn == C_PAUSED_SYNC_T );
1828
1829 if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) {
1830 retcode = ERR_CSUMS_RESYNC_RUNNING;
1831 goto fail;
1832 }
1833
1834 if (!rsr && sc.csums_alg[0]) {
1835 csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC);
1836 if (IS_ERR(csums_tfm)) {
1837 csums_tfm = NULL;
1838 retcode = ERR_CSUMS_ALG;
1839 goto fail;
1840 }
1841
1842 if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
1843 retcode = ERR_CSUMS_ALG_ND;
1844 goto fail;
1845 }
1846 }
1847
1848 /* online verify running */
1849 ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T);
1850
1851 if (ovr) {
1852 if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) {
1853 retcode = ERR_VERIFY_RUNNING;
1854 goto fail;
1855 }
1856 }
1857
1858 if (!ovr && sc.verify_alg[0]) {
1859 verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC);
1860 if (IS_ERR(verify_tfm)) {
1861 verify_tfm = NULL;
1862 retcode = ERR_VERIFY_ALG;
1863 goto fail;
1864 }
1865
1866 if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
1867 retcode = ERR_VERIFY_ALG_ND;
1868 goto fail;
1869 }
1870 }
1871
1872 /* silently ignore cpu mask on UP kernel */
1873 if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
1874 err = bitmap_parse(sc.cpu_mask, 32,
1875 cpumask_bits(new_cpu_mask), nr_cpu_ids);
1876 if (err) {
1877 dev_warn(DEV, "bitmap_parse() failed with %d\n", err);
1878 retcode = ERR_CPU_MASK_PARSE;
1879 goto fail;
1880 }
1881 }
1882
1883 ERR_IF (sc.rate < 1) sc.rate = 1;
1884 ERR_IF (sc.al_extents < 7) sc.al_extents = 127; /* arbitrary minimum */
1885#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT)
1886 if (sc.al_extents > AL_MAX) {
1887 dev_err(DEV, "sc.al_extents > %d\n", AL_MAX);
1888 sc.al_extents = AL_MAX;
1889 }
1890#undef AL_MAX
1891
1892 /* to avoid spurious errors when configuring minors before configuring
1893 * the minors they depend on: if necessary, first create the minor we
1894 * depend on */
1895 if (sc.after >= 0)
1896 ensure_mdev(sc.after, 1);
1897
1898 /* most sanity checks done, try to assign the new sync-after
1899 * dependency. need to hold the global lock in there,
1900 * to avoid a race in the dependency loop check. */
1901 retcode = drbd_alter_sa(mdev, sc.after);
1902 if (retcode != NO_ERROR) 2357 if (retcode != NO_ERROR)
1903 goto fail; 2358 goto fail;
2359 tconn = adm_ctx.tconn;
1904 2360
1905 fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ; 2361 res_opts = tconn->res_opts;
1906 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { 2362 if (should_set_defaults(info))
1907 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); 2363 set_res_opts_defaults(&res_opts);
1908 if (!rs_plan_s) {
1909 dev_err(DEV, "kmalloc of fifo_buffer failed");
1910 retcode = ERR_NOMEM;
1911 goto fail;
1912 }
1913 }
1914 2364
1915 /* ok, assign the rest of it as well. 2365 err = res_opts_from_attrs(&res_opts, info);
1916 * lock against receive_SyncParam() */ 2366 if (err && err != -ENOMSG) {
1917 spin_lock(&mdev->peer_seq_lock); 2367 retcode = ERR_MANDATORY_TAG;
1918 mdev->sync_conf = sc; 2368 drbd_msg_put_info(from_attrs_err_to_txt(err));
1919 2369 goto fail;
1920 if (!rsr) {
1921 crypto_free_hash(mdev->csums_tfm);
1922 mdev->csums_tfm = csums_tfm;
1923 csums_tfm = NULL;
1924 }
1925
1926 if (!ovr) {
1927 crypto_free_hash(mdev->verify_tfm);
1928 mdev->verify_tfm = verify_tfm;
1929 verify_tfm = NULL;
1930 }
1931
1932 if (fifo_size != mdev->rs_plan_s.size) {
1933 kfree(mdev->rs_plan_s.values);
1934 mdev->rs_plan_s.values = rs_plan_s;
1935 mdev->rs_plan_s.size = fifo_size;
1936 mdev->rs_planed = 0;
1937 rs_plan_s = NULL;
1938 } 2370 }
1939 2371
1940 spin_unlock(&mdev->peer_seq_lock); 2372 err = set_resource_options(tconn, &res_opts);
1941 2373 if (err) {
1942 if (get_ldev(mdev)) { 2374 retcode = ERR_INVALID_REQUEST;
1943 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log)); 2375 if (err == -ENOMEM)
1944 drbd_al_shrink(mdev);
1945 err = drbd_check_al_size(mdev);
1946 lc_unlock(mdev->act_log);
1947 wake_up(&mdev->al_wait);
1948
1949 put_ldev(mdev);
1950 drbd_md_sync(mdev);
1951
1952 if (err) {
1953 retcode = ERR_NOMEM; 2376 retcode = ERR_NOMEM;
1954 goto fail;
1955 }
1956 }
1957
1958 if (mdev->state.conn >= C_CONNECTED)
1959 drbd_send_sync_param(mdev, &sc);
1960
1961 if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) {
1962 cpumask_copy(mdev->cpu_mask, new_cpu_mask);
1963 drbd_calc_cpu_mask(mdev);
1964 mdev->receiver.reset_cpu_mask = 1;
1965 mdev->asender.reset_cpu_mask = 1;
1966 mdev->worker.reset_cpu_mask = 1;
1967 } 2377 }
1968 2378
1969 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
1970fail: 2379fail:
1971 kfree(rs_plan_s); 2380 drbd_adm_finish(info, retcode);
1972 free_cpumask_var(new_cpu_mask);
1973 crypto_free_hash(csums_tfm);
1974 crypto_free_hash(verify_tfm);
1975 reply->ret_code = retcode;
1976 return 0; 2381 return 0;
1977} 2382}
1978 2383
1979static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2384int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
1980 struct drbd_nl_cfg_reply *reply)
1981{ 2385{
1982 int retcode; 2386 struct drbd_conf *mdev;
2387 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2388
2389 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2390 if (!adm_ctx.reply_skb)
2391 return retcode;
2392 if (retcode != NO_ERROR)
2393 goto out;
2394
2395 mdev = adm_ctx.mdev;
1983 2396
1984 /* If there is still bitmap IO pending, probably because of a previous 2397 /* If there is still bitmap IO pending, probably because of a previous
1985 * resync just being finished, wait for it before requesting a new resync. 2398 * resync just being finished, wait for it before requesting a new resync.
1986 * Also wait for it's after_state_ch(). */ 2399 * Also wait for it's after_state_ch(). */
1987 drbd_suspend_io(mdev); 2400 drbd_suspend_io(mdev);
1988 wait_event(mdev->misc_wait, !drbd_test_flag(mdev, BITMAP_IO)); 2401 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
1989 drbd_flush_workqueue(mdev); 2402 drbd_flush_workqueue(mdev);
1990 2403
1991 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); 2404 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
@@ -1994,10 +2407,10 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
1994 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); 2407 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
1995 2408
1996 while (retcode == SS_NEED_CONNECTION) { 2409 while (retcode == SS_NEED_CONNECTION) {
1997 spin_lock_irq(&mdev->req_lock); 2410 spin_lock_irq(&mdev->tconn->req_lock);
1998 if (mdev->state.conn < C_CONNECTED) 2411 if (mdev->state.conn < C_CONNECTED)
1999 retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL); 2412 retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL);
2000 spin_unlock_irq(&mdev->req_lock); 2413 spin_unlock_irq(&mdev->tconn->req_lock);
2001 2414
2002 if (retcode != SS_NEED_CONNECTION) 2415 if (retcode != SS_NEED_CONNECTION)
2003 break; 2416 break;
@@ -2006,7 +2419,25 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
2006 } 2419 }
2007 drbd_resume_io(mdev); 2420 drbd_resume_io(mdev);
2008 2421
2009 reply->ret_code = retcode; 2422out:
2423 drbd_adm_finish(info, retcode);
2424 return 0;
2425}
2426
2427static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2428 union drbd_state mask, union drbd_state val)
2429{
2430 enum drbd_ret_code retcode;
2431
2432 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2433 if (!adm_ctx.reply_skb)
2434 return retcode;
2435 if (retcode != NO_ERROR)
2436 goto out;
2437
2438 retcode = drbd_request_state(adm_ctx.mdev, mask, val);
2439out:
2440 drbd_adm_finish(info, retcode);
2010 return 0; 2441 return 0;
2011} 2442}
2012 2443
@@ -2019,29 +2450,36 @@ static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
2019 return rv; 2450 return rv;
2020} 2451}
2021 2452
2022static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2453int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2023 struct drbd_nl_cfg_reply *reply)
2024{ 2454{
2025 int retcode; 2455 int retcode; /* drbd_ret_code, drbd_state_rv */
2456 struct drbd_conf *mdev;
2457
2458 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2459 if (!adm_ctx.reply_skb)
2460 return retcode;
2461 if (retcode != NO_ERROR)
2462 goto out;
2463
2464 mdev = adm_ctx.mdev;
2026 2465
2027 /* If there is still bitmap IO pending, probably because of a previous 2466 /* If there is still bitmap IO pending, probably because of a previous
2028 * resync just being finished, wait for it before requesting a new resync. 2467 * resync just being finished, wait for it before requesting a new resync.
2029 * Also wait for it's after_state_ch(). */ 2468 * Also wait for it's after_state_ch(). */
2030 drbd_suspend_io(mdev); 2469 drbd_suspend_io(mdev);
2031 wait_event(mdev->misc_wait, !drbd_test_flag(mdev, BITMAP_IO)); 2470 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
2032 drbd_flush_workqueue(mdev); 2471 drbd_flush_workqueue(mdev);
2033 2472
2034 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); 2473 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
2035
2036 if (retcode < SS_SUCCESS) { 2474 if (retcode < SS_SUCCESS) {
2037 if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) { 2475 if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) {
2038 /* The peer will get a resync upon connect anyways. Just make that 2476 /* The peer will get a resync upon connect anyways.
2039 into a full resync. */ 2477 * Just make that into a full resync. */
2040 retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); 2478 retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
2041 if (retcode >= SS_SUCCESS) { 2479 if (retcode >= SS_SUCCESS) {
2042 if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, 2480 if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
2043 "set_n_write from invalidate_peer", 2481 "set_n_write from invalidate_peer",
2044 BM_LOCKED_SET_ALLOWED)) 2482 BM_LOCKED_SET_ALLOWED))
2045 retcode = ERR_IO_MD_DISK; 2483 retcode = ERR_IO_MD_DISK;
2046 } 2484 }
2047 } else 2485 } else
@@ -2049,30 +2487,41 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
2049 } 2487 }
2050 drbd_resume_io(mdev); 2488 drbd_resume_io(mdev);
2051 2489
2052 reply->ret_code = retcode; 2490out:
2491 drbd_adm_finish(info, retcode);
2053 return 0; 2492 return 0;
2054} 2493}
2055 2494
2056static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2495int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2057 struct drbd_nl_cfg_reply *reply)
2058{ 2496{
2059 int retcode = NO_ERROR; 2497 enum drbd_ret_code retcode;
2060 2498
2061 if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO) 2499 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2062 retcode = ERR_PAUSE_IS_SET; 2500 if (!adm_ctx.reply_skb)
2501 return retcode;
2502 if (retcode != NO_ERROR)
2503 goto out;
2063 2504
2064 reply->ret_code = retcode; 2505 if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2506 retcode = ERR_PAUSE_IS_SET;
2507out:
2508 drbd_adm_finish(info, retcode);
2065 return 0; 2509 return 0;
2066} 2510}
2067 2511
2068static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2512int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
2069 struct drbd_nl_cfg_reply *reply)
2070{ 2513{
2071 int retcode = NO_ERROR; 2514 union drbd_dev_state s;
2072 union drbd_state s; 2515 enum drbd_ret_code retcode;
2516
2517 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2518 if (!adm_ctx.reply_skb)
2519 return retcode;
2520 if (retcode != NO_ERROR)
2521 goto out;
2073 2522
2074 if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { 2523 if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2075 s = mdev->state; 2524 s = adm_ctx.mdev->state;
2076 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) { 2525 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2077 retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP : 2526 retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2078 s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR; 2527 s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
@@ -2081,178 +2530,482 @@ static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
2081 } 2530 }
2082 } 2531 }
2083 2532
2084 reply->ret_code = retcode; 2533out:
2534 drbd_adm_finish(info, retcode);
2085 return 0; 2535 return 0;
2086} 2536}
2087 2537
2088static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2538int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
2089 struct drbd_nl_cfg_reply *reply)
2090{ 2539{
2091 reply->ret_code = drbd_request_state(mdev, NS(susp, 1)); 2540 return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
2092
2093 return 0;
2094} 2541}
2095 2542
2096static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2543int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
2097 struct drbd_nl_cfg_reply *reply)
2098{ 2544{
2099 if (drbd_test_flag(mdev, NEW_CUR_UUID)) { 2545 struct drbd_conf *mdev;
2546 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2547
2548 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2549 if (!adm_ctx.reply_skb)
2550 return retcode;
2551 if (retcode != NO_ERROR)
2552 goto out;
2553
2554 mdev = adm_ctx.mdev;
2555 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
2100 drbd_uuid_new_current(mdev); 2556 drbd_uuid_new_current(mdev);
2101 drbd_clear_flag(mdev, NEW_CUR_UUID); 2557 clear_bit(NEW_CUR_UUID, &mdev->flags);
2102 } 2558 }
2103 drbd_suspend_io(mdev); 2559 drbd_suspend_io(mdev);
2104 reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0)); 2560 retcode = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2105 if (reply->ret_code == SS_SUCCESS) { 2561 if (retcode == SS_SUCCESS) {
2106 if (mdev->state.conn < C_CONNECTED) 2562 if (mdev->state.conn < C_CONNECTED)
2107 tl_clear(mdev); 2563 tl_clear(mdev->tconn);
2108 if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED) 2564 if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
2109 tl_restart(mdev, fail_frozen_disk_io); 2565 tl_restart(mdev->tconn, FAIL_FROZEN_DISK_IO);
2110 } 2566 }
2111 drbd_resume_io(mdev); 2567 drbd_resume_io(mdev);
2112 2568
2569out:
2570 drbd_adm_finish(info, retcode);
2113 return 0; 2571 return 0;
2114} 2572}
2115 2573
2116static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2574int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
2117 struct drbd_nl_cfg_reply *reply)
2118{ 2575{
2119 reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED)); 2576 return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
2120 return 0;
2121} 2577}
2122 2578
2123static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2579int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsigned vnr)
2124 struct drbd_nl_cfg_reply *reply)
2125{ 2580{
2126 unsigned short *tl; 2581 struct nlattr *nla;
2582 nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2583 if (!nla)
2584 goto nla_put_failure;
2585 if (vnr != VOLUME_UNSPECIFIED &&
2586 nla_put_u32(skb, T_ctx_volume, vnr))
2587 goto nla_put_failure;
2588 if (nla_put_string(skb, T_ctx_resource_name, tconn->name))
2589 goto nla_put_failure;
2590 if (tconn->my_addr_len &&
2591 nla_put(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr))
2592 goto nla_put_failure;
2593 if (tconn->peer_addr_len &&
2594 nla_put(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr))
2595 goto nla_put_failure;
2596 nla_nest_end(skb, nla);
2597 return 0;
2127 2598
2128 tl = reply->tag_list; 2599nla_put_failure:
2600 if (nla)
2601 nla_nest_cancel(skb, nla);
2602 return -EMSGSIZE;
2603}
2129 2604
2130 if (get_ldev(mdev)) { 2605int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
2131 tl = disk_conf_to_tags(mdev, &mdev->ldev->dc, tl); 2606 const struct sib_info *sib)
2132 put_ldev(mdev); 2607{
2133 } 2608 struct state_info *si = NULL; /* for sizeof(si->member); */
2609 struct net_conf *nc;
2610 struct nlattr *nla;
2611 int got_ldev;
2612 int err = 0;
2613 int exclude_sensitive;
2614
2615 /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2616 * to. So we better exclude_sensitive information.
2617 *
2618 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2619 * in the context of the requesting user process. Exclude sensitive
2620 * information, unless current has superuser.
2621 *
2622 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2623 * relies on the current implementation of netlink_dump(), which
2624 * executes the dump callback successively from netlink_recvmsg(),
2625 * always in the context of the receiving process */
2626 exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2627
2628 got_ldev = get_ldev(mdev);
2629
2630 /* We need to add connection name and volume number information still.
2631 * Minor number is in drbd_genlmsghdr. */
2632 if (nla_put_drbd_cfg_context(skb, mdev->tconn, mdev->vnr))
2633 goto nla_put_failure;
2634
2635 if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive))
2636 goto nla_put_failure;
2637
2638 rcu_read_lock();
2639 if (got_ldev)
2640 if (disk_conf_to_skb(skb, rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive))
2641 goto nla_put_failure;
2642
2643 nc = rcu_dereference(mdev->tconn->net_conf);
2644 if (nc)
2645 err = net_conf_to_skb(skb, nc, exclude_sensitive);
2646 rcu_read_unlock();
2647 if (err)
2648 goto nla_put_failure;
2649
2650 nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2651 if (!nla)
2652 goto nla_put_failure;
2653 if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
2654 nla_put_u32(skb, T_current_state, mdev->state.i) ||
2655 nla_put_u64(skb, T_ed_uuid, mdev->ed_uuid) ||
2656 nla_put_u64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev)) ||
2657 nla_put_u64(skb, T_send_cnt, mdev->send_cnt) ||
2658 nla_put_u64(skb, T_recv_cnt, mdev->recv_cnt) ||
2659 nla_put_u64(skb, T_read_cnt, mdev->read_cnt) ||
2660 nla_put_u64(skb, T_writ_cnt, mdev->writ_cnt) ||
2661 nla_put_u64(skb, T_al_writ_cnt, mdev->al_writ_cnt) ||
2662 nla_put_u64(skb, T_bm_writ_cnt, mdev->bm_writ_cnt) ||
2663 nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&mdev->ap_bio_cnt)) ||
2664 nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&mdev->ap_pending_cnt)) ||
2665 nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&mdev->rs_pending_cnt)))
2666 goto nla_put_failure;
2667
2668 if (got_ldev) {
2669 int err;
2134 2670
2135 if (get_net_conf(mdev)) { 2671 spin_lock_irq(&mdev->ldev->md.uuid_lock);
2136 tl = net_conf_to_tags(mdev, mdev->net_conf, tl); 2672 err = nla_put(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid);
2137 put_net_conf(mdev); 2673 spin_unlock_irq(&mdev->ldev->md.uuid_lock);
2674
2675 if (err)
2676 goto nla_put_failure;
2677
2678 if (nla_put_u32(skb, T_disk_flags, mdev->ldev->md.flags) ||
2679 nla_put_u64(skb, T_bits_total, drbd_bm_bits(mdev)) ||
2680 nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(mdev)))
2681 goto nla_put_failure;
2682 if (C_SYNC_SOURCE <= mdev->state.conn &&
2683 C_PAUSED_SYNC_T >= mdev->state.conn) {
2684 if (nla_put_u64(skb, T_bits_rs_total, mdev->rs_total) ||
2685 nla_put_u64(skb, T_bits_rs_failed, mdev->rs_failed))
2686 goto nla_put_failure;
2687 }
2138 } 2688 }
2139 tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl);
2140 2689
2141 put_unaligned(TT_END, tl++); /* Close the tag list */ 2690 if (sib) {
2691 switch(sib->sib_reason) {
2692 case SIB_SYNC_PROGRESS:
2693 case SIB_GET_STATUS_REPLY:
2694 break;
2695 case SIB_STATE_CHANGE:
2696 if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
2697 nla_put_u32(skb, T_new_state, sib->ns.i))
2698 goto nla_put_failure;
2699 break;
2700 case SIB_HELPER_POST:
2701 if (nla_put_u32(skb, T_helper_exit_code,
2702 sib->helper_exit_code))
2703 goto nla_put_failure;
2704 /* fall through */
2705 case SIB_HELPER_PRE:
2706 if (nla_put_string(skb, T_helper, sib->helper_name))
2707 goto nla_put_failure;
2708 break;
2709 }
2710 }
2711 nla_nest_end(skb, nla);
2142 2712
2143 return (int)((char *)tl - (char *)reply->tag_list); 2713 if (0)
2714nla_put_failure:
2715 err = -EMSGSIZE;
2716 if (got_ldev)
2717 put_ldev(mdev);
2718 return err;
2144} 2719}
2145 2720
2146static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2721int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
2147 struct drbd_nl_cfg_reply *reply)
2148{ 2722{
2149 unsigned short *tl = reply->tag_list; 2723 enum drbd_ret_code retcode;
2150 union drbd_state s = mdev->state; 2724 int err;
2151 unsigned long rs_left;
2152 unsigned int res;
2153 2725
2154 tl = get_state_to_tags(mdev, (struct get_state *)&s, tl); 2726 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2727 if (!adm_ctx.reply_skb)
2728 return retcode;
2729 if (retcode != NO_ERROR)
2730 goto out;
2155 2731
2156 /* no local ref, no bitmap, no syncer progress. */ 2732 err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.mdev, NULL);
2157 if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) { 2733 if (err) {
2158 if (get_ldev(mdev)) { 2734 nlmsg_free(adm_ctx.reply_skb);
2159 drbd_get_syncer_progress(mdev, &rs_left, &res); 2735 return err;
2160 tl = tl_add_int(tl, T_sync_progress, &res);
2161 put_ldev(mdev);
2162 }
2163 } 2736 }
2164 put_unaligned(TT_END, tl++); /* Close the tag list */ 2737out:
2165 2738 drbd_adm_finish(info, retcode);
2166 return (int)((char *)tl - (char *)reply->tag_list); 2739 return 0;
2167} 2740}
2168 2741
2169static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2742int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
2170 struct drbd_nl_cfg_reply *reply)
2171{ 2743{
2172 unsigned short *tl; 2744 struct drbd_conf *mdev;
2173 2745 struct drbd_genlmsghdr *dh;
2174 tl = reply->tag_list; 2746 struct drbd_tconn *pos = (struct drbd_tconn*)cb->args[0];
2747 struct drbd_tconn *tconn = NULL;
2748 struct drbd_tconn *tmp;
2749 unsigned volume = cb->args[1];
2750
2751 /* Open coded, deferred, iteration:
2752 * list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) {
2753 * idr_for_each_entry(&tconn->volumes, mdev, i) {
2754 * ...
2755 * }
2756 * }
2757 * where tconn is cb->args[0];
2758 * and i is cb->args[1];
2759 *
2760 * cb->args[2] indicates if we shall loop over all resources,
2761 * or just dump all volumes of a single resource.
2762 *
2763 * This may miss entries inserted after this dump started,
2764 * or entries deleted before they are reached.
2765 *
2766 * We need to make sure the mdev won't disappear while
2767 * we are looking at it, and revalidate our iterators
2768 * on each iteration.
2769 */
2175 2770
2176 if (get_ldev(mdev)) { 2771 /* synchronize with conn_create()/conn_destroy() */
2177 unsigned long flags; 2772 rcu_read_lock();
2178 spin_lock_irqsave(&mdev->ldev->md.uuid_lock, flags); 2773 /* revalidate iterator position */
2179 tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64)); 2774 list_for_each_entry_rcu(tmp, &drbd_tconns, all_tconn) {
2180 tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags); 2775 if (pos == NULL) {
2181 spin_unlock_irqrestore(&mdev->ldev->md.uuid_lock, flags); 2776 /* first iteration */
2182 put_ldev(mdev); 2777 pos = tmp;
2778 tconn = pos;
2779 break;
2780 }
2781 if (tmp == pos) {
2782 tconn = pos;
2783 break;
2784 }
2183 } 2785 }
2184 put_unaligned(TT_END, tl++); /* Close the tag list */ 2786 if (tconn) {
2787next_tconn:
2788 mdev = idr_get_next(&tconn->volumes, &volume);
2789 if (!mdev) {
2790 /* No more volumes to dump on this tconn.
2791 * Advance tconn iterator. */
2792 pos = list_entry_rcu(tconn->all_tconn.next,
2793 struct drbd_tconn, all_tconn);
2794 /* Did we dump any volume on this tconn yet? */
2795 if (volume != 0) {
2796 /* If we reached the end of the list,
2797 * or only a single resource dump was requested,
2798 * we are done. */
2799 if (&pos->all_tconn == &drbd_tconns || cb->args[2])
2800 goto out;
2801 volume = 0;
2802 tconn = pos;
2803 goto next_tconn;
2804 }
2805 }
2185 2806
2186 return (int)((char *)tl - (char *)reply->tag_list); 2807 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
2808 cb->nlh->nlmsg_seq, &drbd_genl_family,
2809 NLM_F_MULTI, DRBD_ADM_GET_STATUS);
2810 if (!dh)
2811 goto out;
2812
2813 if (!mdev) {
2814 /* This is a tconn without a single volume.
2815 * Suprisingly enough, it may have a network
2816 * configuration. */
2817 struct net_conf *nc;
2818 dh->minor = -1U;
2819 dh->ret_code = NO_ERROR;
2820 if (nla_put_drbd_cfg_context(skb, tconn, VOLUME_UNSPECIFIED))
2821 goto cancel;
2822 nc = rcu_dereference(tconn->net_conf);
2823 if (nc && net_conf_to_skb(skb, nc, 1) != 0)
2824 goto cancel;
2825 goto done;
2826 }
2827
2828 D_ASSERT(mdev->vnr == volume);
2829 D_ASSERT(mdev->tconn == tconn);
2830
2831 dh->minor = mdev_to_minor(mdev);
2832 dh->ret_code = NO_ERROR;
2833
2834 if (nla_put_status_info(skb, mdev, NULL)) {
2835cancel:
2836 genlmsg_cancel(skb, dh);
2837 goto out;
2838 }
2839done:
2840 genlmsg_end(skb, dh);
2841 }
2842
2843out:
2844 rcu_read_unlock();
2845 /* where to start the next iteration */
2846 cb->args[0] = (long)pos;
2847 cb->args[1] = (pos == tconn) ? volume + 1 : 0;
2848
2849 /* No more tconns/volumes/minors found results in an empty skb.
2850 * Which will terminate the dump. */
2851 return skb->len;
2187} 2852}
2188 2853
2189/** 2854/*
2190 * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use 2855 * Request status of all resources, or of all volumes within a single resource.
2191 * @mdev: DRBD device. 2856 *
2192 * @nlp: Netlink/connector packet from drbdsetup 2857 * This is a dump, as the answer may not fit in a single reply skb otherwise.
2193 * @reply: Reply packet for drbdsetup 2858 * Which means we cannot use the family->attrbuf or other such members, because
2859 * dump is NOT protected by the genl_lock(). During dump, we only have access
2860 * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
2861 *
2862 * Once things are setup properly, we call into get_one_status().
2194 */ 2863 */
2195static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2864int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
2196 struct drbd_nl_cfg_reply *reply)
2197{ 2865{
2198 unsigned short *tl; 2866 const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
2199 char rv; 2867 struct nlattr *nla;
2868 const char *resource_name;
2869 struct drbd_tconn *tconn;
2870 int maxtype;
2871
2872 /* Is this a followup call? */
2873 if (cb->args[0]) {
2874 /* ... of a single resource dump,
2875 * and the resource iterator has been advanced already? */
2876 if (cb->args[2] && cb->args[2] != cb->args[0])
2877 return 0; /* DONE. */
2878 goto dump;
2879 }
2880
2881 /* First call (from netlink_dump_start). We need to figure out
2882 * which resource(s) the user wants us to dump. */
2883 nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
2884 nlmsg_attrlen(cb->nlh, hdrlen),
2885 DRBD_NLA_CFG_CONTEXT);
2886
2887 /* No explicit context given. Dump all. */
2888 if (!nla)
2889 goto dump;
2890 maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
2891 nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
2892 if (IS_ERR(nla))
2893 return PTR_ERR(nla);
2894 /* context given, but no name present? */
2895 if (!nla)
2896 return -EINVAL;
2897 resource_name = nla_data(nla);
2898 tconn = conn_get_by_name(resource_name);
2899
2900 if (!tconn)
2901 return -ENODEV;
2902
2903 kref_put(&tconn->kref, &conn_destroy); /* get_one_status() (re)validates tconn by itself */
2904
2905 /* prime iterators, and set "filter" mode mark:
2906 * only dump this tconn. */
2907 cb->args[0] = (long)tconn;
2908 /* cb->args[1] = 0; passed in this way. */
2909 cb->args[2] = (long)tconn;
2910
2911dump:
2912 return get_one_status(skb, cb);
2913}
2200 2914
2201 tl = reply->tag_list; 2915int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
2916{
2917 enum drbd_ret_code retcode;
2918 struct timeout_parms tp;
2919 int err;
2202 2920
2203 rv = mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED : 2921 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2204 drbd_test_flag(mdev, USE_DEGR_WFC_T) ? UT_DEGRADED : UT_DEFAULT; 2922 if (!adm_ctx.reply_skb)
2923 return retcode;
2924 if (retcode != NO_ERROR)
2925 goto out;
2205 2926
2206 tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv)); 2927 tp.timeout_type =
2207 put_unaligned(TT_END, tl++); /* Close the tag list */ 2928 adm_ctx.mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
2929 test_bit(USE_DEGR_WFC_T, &adm_ctx.mdev->flags) ? UT_DEGRADED :
2930 UT_DEFAULT;
2208 2931
2209 return (int)((char *)tl - (char *)reply->tag_list); 2932 err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
2933 if (err) {
2934 nlmsg_free(adm_ctx.reply_skb);
2935 return err;
2936 }
2937out:
2938 drbd_adm_finish(info, retcode);
2939 return 0;
2210} 2940}
2211 2941
2212static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2942int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
2213 struct drbd_nl_cfg_reply *reply)
2214{ 2943{
2215 /* default to resume from last known position, if possible */ 2944 struct drbd_conf *mdev;
2216 struct start_ov args = { 2945 enum drbd_ret_code retcode;
2217 .start_sector = mdev->ov_start_sector, 2946 struct start_ov_parms parms;
2218 .stop_sector = ULLONG_MAX,
2219 };
2220 2947
2221 if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) { 2948 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2222 reply->ret_code = ERR_MANDATORY_TAG; 2949 if (!adm_ctx.reply_skb)
2223 return 0; 2950 return retcode;
2951 if (retcode != NO_ERROR)
2952 goto out;
2953
2954 mdev = adm_ctx.mdev;
2955
2956 /* resume from last known position, if possible */
2957 parms.ov_start_sector = mdev->ov_start_sector;
2958 parms.ov_stop_sector = ULLONG_MAX;
2959 if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
2960 int err = start_ov_parms_from_attrs(&parms, info);
2961 if (err) {
2962 retcode = ERR_MANDATORY_TAG;
2963 drbd_msg_put_info(from_attrs_err_to_txt(err));
2964 goto out;
2965 }
2224 } 2966 }
2967 /* w_make_ov_request expects position to be aligned */
2968 mdev->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
2969 mdev->ov_stop_sector = parms.ov_stop_sector;
2225 2970
2226 /* If there is still bitmap IO pending, e.g. previous resync or verify 2971 /* If there is still bitmap IO pending, e.g. previous resync or verify
2227 * just being finished, wait for it before requesting a new resync. */ 2972 * just being finished, wait for it before requesting a new resync. */
2228 drbd_suspend_io(mdev); 2973 drbd_suspend_io(mdev);
2229 wait_event(mdev->misc_wait, !drbd_test_flag(mdev, BITMAP_IO)); 2974 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
2230 2975 retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
2231 /* w_make_ov_request expects start position to be aligned */
2232 mdev->ov_start_sector = args.start_sector & ~(BM_SECT_PER_BIT-1);
2233 mdev->ov_stop_sector = args.stop_sector;
2234 reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
2235 drbd_resume_io(mdev); 2976 drbd_resume_io(mdev);
2977out:
2978 drbd_adm_finish(info, retcode);
2236 return 0; 2979 return 0;
2237} 2980}
2238 2981
2239 2982
2240static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 2983int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
2241 struct drbd_nl_cfg_reply *reply)
2242{ 2984{
2243 int retcode = NO_ERROR; 2985 struct drbd_conf *mdev;
2986 enum drbd_ret_code retcode;
2244 int skip_initial_sync = 0; 2987 int skip_initial_sync = 0;
2245 int err; 2988 int err;
2989 struct new_c_uuid_parms args;
2246 2990
2247 struct new_c_uuid args; 2991 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2992 if (!adm_ctx.reply_skb)
2993 return retcode;
2994 if (retcode != NO_ERROR)
2995 goto out_nolock;
2248 2996
2249 memset(&args, 0, sizeof(struct new_c_uuid)); 2997 mdev = adm_ctx.mdev;
2250 if (!new_c_uuid_from_tags(mdev, nlp->tag_list, &args)) { 2998 memset(&args, 0, sizeof(args));
2251 reply->ret_code = ERR_MANDATORY_TAG; 2999 if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
2252 return 0; 3000 err = new_c_uuid_parms_from_attrs(&args, info);
3001 if (err) {
3002 retcode = ERR_MANDATORY_TAG;
3003 drbd_msg_put_info(from_attrs_err_to_txt(err));
3004 goto out_nolock;
3005 }
2253 } 3006 }
2254 3007
2255 mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */ 3008 mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */
2256 3009
2257 if (!get_ldev(mdev)) { 3010 if (!get_ldev(mdev)) {
2258 retcode = ERR_NO_DISK; 3011 retcode = ERR_NO_DISK;
@@ -2260,7 +3013,7 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
2260 } 3013 }
2261 3014
2262 /* this is "skip initial sync", assume to be clean */ 3015 /* this is "skip initial sync", assume to be clean */
2263 if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 && 3016 if (mdev->state.conn == C_CONNECTED && mdev->tconn->agreed_pro_version >= 90 &&
2264 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) { 3017 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
2265 dev_info(DEV, "Preparing to skip initial sync\n"); 3018 dev_info(DEV, "Preparing to skip initial sync\n");
2266 skip_initial_sync = 1; 3019 skip_initial_sync = 1;
@@ -2283,10 +3036,10 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
2283 drbd_send_uuids_skip_initial_sync(mdev); 3036 drbd_send_uuids_skip_initial_sync(mdev);
2284 _drbd_uuid_set(mdev, UI_BITMAP, 0); 3037 _drbd_uuid_set(mdev, UI_BITMAP, 0);
2285 drbd_print_uuids(mdev, "cleared bitmap UUID"); 3038 drbd_print_uuids(mdev, "cleared bitmap UUID");
2286 spin_lock_irq(&mdev->req_lock); 3039 spin_lock_irq(&mdev->tconn->req_lock);
2287 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), 3040 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
2288 CS_VERBOSE, NULL); 3041 CS_VERBOSE, NULL);
2289 spin_unlock_irq(&mdev->req_lock); 3042 spin_unlock_irq(&mdev->tconn->req_lock);
2290 } 3043 }
2291 } 3044 }
2292 3045
@@ -2294,416 +3047,283 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
2294out_dec: 3047out_dec:
2295 put_ldev(mdev); 3048 put_ldev(mdev);
2296out: 3049out:
2297 mutex_unlock(&mdev->state_mutex); 3050 mutex_unlock(mdev->state_mutex);
2298 3051out_nolock:
2299 reply->ret_code = retcode; 3052 drbd_adm_finish(info, retcode);
2300 return 0; 3053 return 0;
2301} 3054}
2302 3055
2303struct cn_handler_struct { 3056static enum drbd_ret_code
2304 int (*function)(struct drbd_conf *, 3057drbd_check_resource_name(const char *name)
2305 struct drbd_nl_cfg_req *,
2306 struct drbd_nl_cfg_reply *);
2307 int reply_body_size;
2308};
2309
2310static struct cn_handler_struct cnd_table[] = {
2311 [ P_primary ] = { &drbd_nl_primary, 0 },
2312 [ P_secondary ] = { &drbd_nl_secondary, 0 },
2313 [ P_disk_conf ] = { &drbd_nl_disk_conf, 0 },
2314 [ P_detach ] = { &drbd_nl_detach, 0 },
2315 [ P_net_conf ] = { &drbd_nl_net_conf, 0 },
2316 [ P_disconnect ] = { &drbd_nl_disconnect, 0 },
2317 [ P_resize ] = { &drbd_nl_resize, 0 },
2318 [ P_syncer_conf ] = { &drbd_nl_syncer_conf, 0 },
2319 [ P_invalidate ] = { &drbd_nl_invalidate, 0 },
2320 [ P_invalidate_peer ] = { &drbd_nl_invalidate_peer, 0 },
2321 [ P_pause_sync ] = { &drbd_nl_pause_sync, 0 },
2322 [ P_resume_sync ] = { &drbd_nl_resume_sync, 0 },
2323 [ P_suspend_io ] = { &drbd_nl_suspend_io, 0 },
2324 [ P_resume_io ] = { &drbd_nl_resume_io, 0 },
2325 [ P_outdate ] = { &drbd_nl_outdate, 0 },
2326 [ P_get_config ] = { &drbd_nl_get_config,
2327 sizeof(struct syncer_conf_tag_len_struct) +
2328 sizeof(struct disk_conf_tag_len_struct) +
2329 sizeof(struct net_conf_tag_len_struct) },
2330 [ P_get_state ] = { &drbd_nl_get_state,
2331 sizeof(struct get_state_tag_len_struct) +
2332 sizeof(struct sync_progress_tag_len_struct) },
2333 [ P_get_uuids ] = { &drbd_nl_get_uuids,
2334 sizeof(struct get_uuids_tag_len_struct) },
2335 [ P_get_timeout_flag ] = { &drbd_nl_get_timeout_flag,
2336 sizeof(struct get_timeout_flag_tag_len_struct)},
2337 [ P_start_ov ] = { &drbd_nl_start_ov, 0 },
2338 [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 },
2339};
2340
2341static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp)
2342{ 3058{
2343 struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data; 3059 if (!name || !name[0]) {
2344 struct cn_handler_struct *cm; 3060 drbd_msg_put_info("resource name missing");
2345 struct cn_msg *cn_reply; 3061 return ERR_MANDATORY_TAG;
2346 struct drbd_nl_cfg_reply *reply;
2347 struct drbd_conf *mdev;
2348 int retcode, rr;
2349 int reply_size = sizeof(struct cn_msg)
2350 + sizeof(struct drbd_nl_cfg_reply)
2351 + sizeof(short int);
2352
2353 if (!try_module_get(THIS_MODULE)) {
2354 printk(KERN_ERR "drbd: try_module_get() failed!\n");
2355 return;
2356 }
2357
2358 if (!capable(CAP_SYS_ADMIN)) {
2359 retcode = ERR_PERM;
2360 goto fail;
2361 } 3062 }
2362 3063 /* if we want to use these in sysfs/configfs/debugfs some day,
2363 mdev = ensure_mdev(nlp->drbd_minor, 3064 * we must not allow slashes */
2364 (nlp->flags & DRBD_NL_CREATE_DEVICE)); 3065 if (strchr(name, '/')) {
2365 if (!mdev) { 3066 drbd_msg_put_info("invalid resource name");
2366 retcode = ERR_MINOR_INVALID; 3067 return ERR_INVALID_REQUEST;
2367 goto fail;
2368 } 3068 }
3069 return NO_ERROR;
3070}
2369 3071
2370 if (nlp->packet_type >= P_nl_after_last_packet || 3072int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
2371 nlp->packet_type == P_return_code_only) { 3073{
2372 retcode = ERR_PACKET_NR; 3074 enum drbd_ret_code retcode;
2373 goto fail; 3075 struct res_opts res_opts;
2374 } 3076 int err;
2375 3077
2376 cm = cnd_table + nlp->packet_type; 3078 retcode = drbd_adm_prepare(skb, info, 0);
3079 if (!adm_ctx.reply_skb)
3080 return retcode;
3081 if (retcode != NO_ERROR)
3082 goto out;
2377 3083
2378 /* This may happen if packet number is 0: */ 3084 set_res_opts_defaults(&res_opts);
2379 if (cm->function == NULL) { 3085 err = res_opts_from_attrs(&res_opts, info);
2380 retcode = ERR_PACKET_NR; 3086 if (err && err != -ENOMSG) {
2381 goto fail; 3087 retcode = ERR_MANDATORY_TAG;
3088 drbd_msg_put_info(from_attrs_err_to_txt(err));
3089 goto out;
2382 } 3090 }
2383 3091
2384 reply_size += cm->reply_body_size; 3092 retcode = drbd_check_resource_name(adm_ctx.resource_name);
3093 if (retcode != NO_ERROR)
3094 goto out;
2385 3095
2386 /* allocation not in the IO path, cqueue thread context */ 3096 if (adm_ctx.tconn) {
2387 cn_reply = kzalloc(reply_size, GFP_KERNEL); 3097 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
2388 if (!cn_reply) { 3098 retcode = ERR_INVALID_REQUEST;
2389 retcode = ERR_NOMEM; 3099 drbd_msg_put_info("resource exists");
2390 goto fail; 3100 }
3101 /* else: still NO_ERROR */
3102 goto out;
2391 } 3103 }
2392 reply = (struct drbd_nl_cfg_reply *) cn_reply->data;
2393
2394 reply->packet_type =
2395 cm->reply_body_size ? nlp->packet_type : P_return_code_only;
2396 reply->minor = nlp->drbd_minor;
2397 reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
2398 /* reply->tag_list; might be modified by cm->function. */
2399
2400 rr = cm->function(mdev, nlp, reply);
2401
2402 cn_reply->id = req->id;
2403 cn_reply->seq = req->seq;
2404 cn_reply->ack = req->ack + 1;
2405 cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr;
2406 cn_reply->flags = 0;
2407 3104
2408 rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL); 3105 if (!conn_create(adm_ctx.resource_name, &res_opts))
2409 if (rr && rr != -ESRCH) 3106 retcode = ERR_NOMEM;
2410 printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); 3107out:
2411 3108 drbd_adm_finish(info, retcode);
2412 kfree(cn_reply); 3109 return 0;
2413 module_put(THIS_MODULE);
2414 return;
2415 fail:
2416 drbd_nl_send_reply(req, retcode);
2417 module_put(THIS_MODULE);
2418} 3110}
2419 3111
2420static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */ 3112int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info)
2421
2422static unsigned short *
2423__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data,
2424 unsigned short len, int nul_terminated)
2425{ 3113{
2426 unsigned short l = tag_descriptions[tag_number(tag)].max_len; 3114 struct drbd_genlmsghdr *dh = info->userhdr;
2427 len = (len < l) ? len : l; 3115 enum drbd_ret_code retcode;
2428 put_unaligned(tag, tl++);
2429 put_unaligned(len, tl++);
2430 memcpy(tl, data, len);
2431 tl = (unsigned short*)((char*)tl + len);
2432 if (nul_terminated)
2433 *((char*)tl - 1) = 0;
2434 return tl;
2435}
2436 3116
2437static unsigned short * 3117 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
2438tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len) 3118 if (!adm_ctx.reply_skb)
2439{ 3119 return retcode;
2440 return __tl_add_blob(tl, tag, data, len, 0); 3120 if (retcode != NO_ERROR)
2441} 3121 goto out;
2442 3122
2443static unsigned short * 3123 if (dh->minor > MINORMASK) {
2444tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str) 3124 drbd_msg_put_info("requested minor out of range");
2445{ 3125 retcode = ERR_INVALID_REQUEST;
2446 return __tl_add_blob(tl, tag, str, strlen(str)+1, 0); 3126 goto out;
2447} 3127 }
3128 if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3129 drbd_msg_put_info("requested volume id out of range");
3130 retcode = ERR_INVALID_REQUEST;
3131 goto out;
3132 }
2448 3133
2449static unsigned short * 3134 /* drbd_adm_prepare made sure already
2450tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val) 3135 * that mdev->tconn and mdev->vnr match the request. */
2451{ 3136 if (adm_ctx.mdev) {
2452 put_unaligned(tag, tl++); 3137 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
2453 switch(tag_type(tag)) { 3138 retcode = ERR_MINOR_EXISTS;
2454 case TT_INTEGER: 3139 /* else: still NO_ERROR */
2455 put_unaligned(sizeof(int), tl++); 3140 goto out;
2456 put_unaligned(*(int *)val, (int *)tl);
2457 tl = (unsigned short*)((char*)tl+sizeof(int));
2458 break;
2459 case TT_INT64:
2460 put_unaligned(sizeof(u64), tl++);
2461 put_unaligned(*(u64 *)val, (u64 *)tl);
2462 tl = (unsigned short*)((char*)tl+sizeof(u64));
2463 break;
2464 default:
2465 /* someone did something stupid. */
2466 ;
2467 } 3141 }
2468 return tl; 3142
3143 retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume);
3144out:
3145 drbd_adm_finish(info, retcode);
3146 return 0;
2469} 3147}
2470 3148
2471void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state) 3149static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev)
2472{ 3150{
2473 char buffer[sizeof(struct cn_msg)+ 3151 if (mdev->state.disk == D_DISKLESS &&
2474 sizeof(struct drbd_nl_cfg_reply)+ 3152 /* no need to be mdev->state.conn == C_STANDALONE &&
2475 sizeof(struct get_state_tag_len_struct)+ 3153 * we may want to delete a minor from a live replication group.
2476 sizeof(short int)]; 3154 */
2477 struct cn_msg *cn_reply = (struct cn_msg *) buffer; 3155 mdev->state.role == R_SECONDARY) {
2478 struct drbd_nl_cfg_reply *reply = 3156 _drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS),
2479 (struct drbd_nl_cfg_reply *)cn_reply->data; 3157 CS_VERBOSE + CS_WAIT_COMPLETE);
2480 unsigned short *tl = reply->tag_list; 3158 idr_remove(&mdev->tconn->volumes, mdev->vnr);
2481 3159 idr_remove(&minors, mdev_to_minor(mdev));
2482 /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */ 3160 del_gendisk(mdev->vdisk);
2483 3161 synchronize_rcu();
2484 tl = get_state_to_tags(mdev, (struct get_state *)&state, tl); 3162 kref_put(&mdev->kref, &drbd_minor_destroy);
2485 3163 return NO_ERROR;
2486 put_unaligned(TT_END, tl++); /* Close the tag list */ 3164 } else
2487 3165 return ERR_MINOR_CONFIGURED;
2488 cn_reply->id.idx = CN_IDX_DRBD;
2489 cn_reply->id.val = CN_VAL_DRBD;
2490
2491 cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
2492 cn_reply->ack = 0; /* not used here. */
2493 cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
2494 (int)((char *)tl - (char *)reply->tag_list);
2495 cn_reply->flags = 0;
2496
2497 reply->packet_type = P_get_state;
2498 reply->minor = mdev_to_minor(mdev);
2499 reply->ret_code = NO_ERROR;
2500
2501 cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
2502} 3166}
2503 3167
2504void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name) 3168int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info)
2505{ 3169{
2506 char buffer[sizeof(struct cn_msg)+ 3170 enum drbd_ret_code retcode;
2507 sizeof(struct drbd_nl_cfg_reply)+
2508 sizeof(struct call_helper_tag_len_struct)+
2509 sizeof(short int)];
2510 struct cn_msg *cn_reply = (struct cn_msg *) buffer;
2511 struct drbd_nl_cfg_reply *reply =
2512 (struct drbd_nl_cfg_reply *)cn_reply->data;
2513 unsigned short *tl = reply->tag_list;
2514
2515 /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
2516
2517 tl = tl_add_str(tl, T_helper, helper_name);
2518 put_unaligned(TT_END, tl++); /* Close the tag list */
2519
2520 cn_reply->id.idx = CN_IDX_DRBD;
2521 cn_reply->id.val = CN_VAL_DRBD;
2522
2523 cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
2524 cn_reply->ack = 0; /* not used here. */
2525 cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
2526 (int)((char *)tl - (char *)reply->tag_list);
2527 cn_reply->flags = 0;
2528 3171
2529 reply->packet_type = P_call_helper; 3172 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2530 reply->minor = mdev_to_minor(mdev); 3173 if (!adm_ctx.reply_skb)
2531 reply->ret_code = NO_ERROR; 3174 return retcode;
3175 if (retcode != NO_ERROR)
3176 goto out;
2532 3177
2533 cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); 3178 retcode = adm_delete_minor(adm_ctx.mdev);
3179out:
3180 drbd_adm_finish(info, retcode);
3181 return 0;
2534} 3182}
2535 3183
2536void drbd_bcast_ee(struct drbd_conf *mdev, 3184int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
2537 const char *reason, const int dgs,
2538 const char* seen_hash, const char* calc_hash,
2539 const struct drbd_epoch_entry* e)
2540{ 3185{
2541 struct cn_msg *cn_reply; 3186 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2542 struct drbd_nl_cfg_reply *reply; 3187 struct drbd_conf *mdev;
2543 unsigned short *tl; 3188 unsigned i;
2544 struct page *page;
2545 unsigned len;
2546 3189
2547 if (!e) 3190 retcode = drbd_adm_prepare(skb, info, 0);
2548 return; 3191 if (!adm_ctx.reply_skb)
2549 if (!reason || !reason[0]) 3192 return retcode;
2550 return; 3193 if (retcode != NO_ERROR)
3194 goto out;
2551 3195
2552 /* apparently we have to memcpy twice, first to prepare the data for the 3196 if (!adm_ctx.tconn) {
2553 * struct cn_msg, then within cn_netlink_send from the cn_msg to the 3197 retcode = ERR_RES_NOT_KNOWN;
2554 * netlink skb. */ 3198 goto out;
2555 /* receiver thread context, which is not in the writeout path (of this node),
2556 * but may be in the writeout path of the _other_ node.
2557 * GFP_NOIO to avoid potential "distributed deadlock". */
2558 cn_reply = kzalloc(
2559 sizeof(struct cn_msg)+
2560 sizeof(struct drbd_nl_cfg_reply)+
2561 sizeof(struct dump_ee_tag_len_struct)+
2562 sizeof(short int),
2563 GFP_NOIO);
2564
2565 if (!cn_reply) {
2566 dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n",
2567 (unsigned long long)e->sector, e->size);
2568 return;
2569 } 3199 }
2570 3200
2571 reply = (struct drbd_nl_cfg_reply*)cn_reply->data; 3201 /* demote */
2572 tl = reply->tag_list; 3202 idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
2573 3203 retcode = drbd_set_role(mdev, R_SECONDARY, 0);
2574 tl = tl_add_str(tl, T_dump_ee_reason, reason); 3204 if (retcode < SS_SUCCESS) {
2575 tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs); 3205 drbd_msg_put_info("failed to demote");
2576 tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs); 3206 goto out;
2577 tl = tl_add_int(tl, T_ee_sector, &e->sector); 3207 }
2578 tl = tl_add_int(tl, T_ee_block_id, &e->block_id);
2579
2580 /* dump the first 32k */
2581 len = min_t(unsigned, e->size, 32 << 10);
2582 put_unaligned(T_ee_data, tl++);
2583 put_unaligned(len, tl++);
2584
2585 page = e->pages;
2586 page_chain_for_each(page) {
2587 void *d = kmap_atomic(page);
2588 unsigned l = min_t(unsigned, len, PAGE_SIZE);
2589 memcpy(tl, d, l);
2590 kunmap_atomic(d);
2591 tl = (unsigned short*)((char*)tl + l);
2592 len -= l;
2593 if (len == 0)
2594 break;
2595 } 3208 }
2596 put_unaligned(TT_END, tl++); /* Close the tag list */
2597
2598 cn_reply->id.idx = CN_IDX_DRBD;
2599 cn_reply->id.val = CN_VAL_DRBD;
2600 3209
2601 cn_reply->seq = atomic_add_return(1,&drbd_nl_seq); 3210 retcode = conn_try_disconnect(adm_ctx.tconn, 0);
2602 cn_reply->ack = 0; // not used here. 3211 if (retcode < SS_SUCCESS) {
2603 cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + 3212 drbd_msg_put_info("failed to disconnect");
2604 (int)((char*)tl - (char*)reply->tag_list); 3213 goto out;
2605 cn_reply->flags = 0; 3214 }
2606
2607 reply->packet_type = P_dump_ee;
2608 reply->minor = mdev_to_minor(mdev);
2609 reply->ret_code = NO_ERROR;
2610
2611 cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
2612 kfree(cn_reply);
2613}
2614
2615void drbd_bcast_sync_progress(struct drbd_conf *mdev)
2616{
2617 char buffer[sizeof(struct cn_msg)+
2618 sizeof(struct drbd_nl_cfg_reply)+
2619 sizeof(struct sync_progress_tag_len_struct)+
2620 sizeof(short int)];
2621 struct cn_msg *cn_reply = (struct cn_msg *) buffer;
2622 struct drbd_nl_cfg_reply *reply =
2623 (struct drbd_nl_cfg_reply *)cn_reply->data;
2624 unsigned short *tl = reply->tag_list;
2625 unsigned long rs_left;
2626 unsigned int res;
2627 3215
2628 /* no local ref, no bitmap, no syncer progress, no broadcast. */ 3216 /* detach */
2629 if (!get_ldev(mdev)) 3217 idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
2630 return; 3218 retcode = adm_detach(mdev, 0);
2631 drbd_get_syncer_progress(mdev, &rs_left, &res); 3219 if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
2632 put_ldev(mdev); 3220 drbd_msg_put_info("failed to detach");
3221 goto out;
3222 }
3223 }
2633 3224
2634 tl = tl_add_int(tl, T_sync_progress, &res); 3225 /* If we reach this, all volumes (of this tconn) are Secondary,
2635 put_unaligned(TT_END, tl++); /* Close the tag list */ 3226 * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
3227 * actually stopped, state handling only does drbd_thread_stop_nowait(). */
3228 drbd_thread_stop(&adm_ctx.tconn->worker);
2636 3229
2637 cn_reply->id.idx = CN_IDX_DRBD; 3230 /* Now, nothing can fail anymore */
2638 cn_reply->id.val = CN_VAL_DRBD;
2639 3231
2640 cn_reply->seq = atomic_add_return(1, &drbd_nl_seq); 3232 /* delete volumes */
2641 cn_reply->ack = 0; /* not used here. */ 3233 idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
2642 cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + 3234 retcode = adm_delete_minor(mdev);
2643 (int)((char *)tl - (char *)reply->tag_list); 3235 if (retcode != NO_ERROR) {
2644 cn_reply->flags = 0; 3236 /* "can not happen" */
3237 drbd_msg_put_info("failed to delete volume");
3238 goto out;
3239 }
3240 }
2645 3241
2646 reply->packet_type = P_sync_progress; 3242 /* delete connection */
2647 reply->minor = mdev_to_minor(mdev); 3243 if (conn_lowest_minor(adm_ctx.tconn) < 0) {
2648 reply->ret_code = NO_ERROR; 3244 list_del_rcu(&adm_ctx.tconn->all_tconn);
3245 synchronize_rcu();
3246 kref_put(&adm_ctx.tconn->kref, &conn_destroy);
2649 3247
2650 cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); 3248 retcode = NO_ERROR;
3249 } else {
3250 /* "can not happen" */
3251 retcode = ERR_RES_IN_USE;
3252 drbd_msg_put_info("failed to delete connection");
3253 }
3254 goto out;
3255out:
3256 drbd_adm_finish(info, retcode);
3257 return 0;
2651} 3258}
2652 3259
2653int __init drbd_nl_init(void) 3260int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
2654{ 3261{
2655 static struct cb_id cn_id_drbd; 3262 enum drbd_ret_code retcode;
2656 int err, try=10;
2657 3263
2658 cn_id_drbd.val = CN_VAL_DRBD; 3264 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
2659 do { 3265 if (!adm_ctx.reply_skb)
2660 cn_id_drbd.idx = cn_idx; 3266 return retcode;
2661 err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback); 3267 if (retcode != NO_ERROR)
2662 if (!err) 3268 goto out;
2663 break;
2664 cn_idx = (cn_idx + CN_IDX_STEP);
2665 } while (try--);
2666 3269
2667 if (err) { 3270 if (conn_lowest_minor(adm_ctx.tconn) < 0) {
2668 printk(KERN_ERR "drbd: cn_drbd failed to register\n"); 3271 list_del_rcu(&adm_ctx.tconn->all_tconn);
2669 return err; 3272 synchronize_rcu();
3273 kref_put(&adm_ctx.tconn->kref, &conn_destroy);
3274
3275 retcode = NO_ERROR;
3276 } else {
3277 retcode = ERR_RES_IN_USE;
2670 } 3278 }
2671 3279
3280 if (retcode == NO_ERROR)
3281 drbd_thread_stop(&adm_ctx.tconn->worker);
3282out:
3283 drbd_adm_finish(info, retcode);
2672 return 0; 3284 return 0;
2673} 3285}
2674 3286
2675void drbd_nl_cleanup(void) 3287void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib)
2676{ 3288{
2677 static struct cb_id cn_id_drbd; 3289 static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
2678 3290 struct sk_buff *msg;
2679 cn_id_drbd.idx = cn_idx; 3291 struct drbd_genlmsghdr *d_out;
2680 cn_id_drbd.val = CN_VAL_DRBD; 3292 unsigned seq;
2681 3293 int err = -ENOMEM;
2682 cn_del_callback(&cn_id_drbd); 3294
2683} 3295 if (sib->sib_reason == SIB_SYNC_PROGRESS &&
2684 3296 time_after(jiffies, mdev->rs_last_bcast + HZ))
2685void drbd_nl_send_reply(struct cn_msg *req, int ret_code) 3297 mdev->rs_last_bcast = jiffies;
2686{ 3298 else
2687 char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)]; 3299 return;
2688 struct cn_msg *cn_reply = (struct cn_msg *) buffer;
2689 struct drbd_nl_cfg_reply *reply =
2690 (struct drbd_nl_cfg_reply *)cn_reply->data;
2691 int rr;
2692
2693 memset(buffer, 0, sizeof(buffer));
2694 cn_reply->id = req->id;
2695 3300
2696 cn_reply->seq = req->seq; 3301 seq = atomic_inc_return(&drbd_genl_seq);
2697 cn_reply->ack = req->ack + 1; 3302 msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
2698 cn_reply->len = sizeof(struct drbd_nl_cfg_reply); 3303 if (!msg)
2699 cn_reply->flags = 0; 3304 goto failed;
3305
3306 err = -EMSGSIZE;
3307 d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3308 if (!d_out) /* cannot happen, but anyways. */
3309 goto nla_put_failure;
3310 d_out->minor = mdev_to_minor(mdev);
3311 d_out->ret_code = NO_ERROR;
3312
3313 if (nla_put_status_info(msg, mdev, sib))
3314 goto nla_put_failure;
3315 genlmsg_end(msg, d_out);
3316 err = drbd_genl_multicast_events(msg, 0);
3317 /* msg has been consumed or freed in netlink_broadcast() */
3318 if (err && err != -ESRCH)
3319 goto failed;
2700 3320
2701 reply->packet_type = P_return_code_only; 3321 return;
2702 reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
2703 reply->ret_code = ret_code;
2704 3322
2705 rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO); 3323nla_put_failure:
2706 if (rr && rr != -ESRCH) 3324 nlmsg_free(msg);
2707 printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr); 3325failed:
3326 dev_err(DEV, "Error %d while broadcasting event. "
3327 "Event seq:%u sib_reason:%u\n",
3328 err, seq, sib->sib_reason);
2708} 3329}
2709