aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/bluetooth/btintel.c
blob: 54410479f2f596250889a7dccbc89fe228d8150e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
/*
 *
 *  Bluetooth support for Intel devices
 *
 *  Copyright (C) 2015  Intel Corporation
 *
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

#include <linux/module.h>
#include <linux/firmware.h>
#include <linux/regmap.h>

#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>

#include "btintel.h"

#define VERSION "0.1"

#define BDADDR_INTEL (&(bdaddr_t) {{0x00, 0x8b, 0x9e, 0x19, 0x03, 0x00}})

int btintel_check_bdaddr(struct hci_dev *hdev)
{
	struct hci_rp_read_bd_addr *bda;
	struct sk_buff *skb;

	skb = __hci_cmd_sync(hdev, HCI_OP_READ_BD_ADDR, 0, NULL,
			     HCI_INIT_TIMEOUT);
	if (IS_ERR(skb)) {
		int err = PTR_ERR(skb);
		BT_ERR("%s: Reading Intel device address failed (%d)",
		       hdev->name, err);
		return err;
	}

	if (skb->len != sizeof(*bda)) {
		BT_ERR("%s: Intel device address length mismatch", hdev->name);
		kfree_skb(skb);
		return -EIO;
	}

	bda = (struct hci_rp_read_bd_addr *)skb->data;

	/* For some Intel based controllers, the default Bluetooth device
	 * address 00:03:19:9E:8B:00 can be found. These controllers are
	 * fully operational, but have the danger of duplicate addresses
	 * and that in turn can cause problems with Bluetooth operation.
	 */
	if (!bacmp(&bda->bdaddr, BDADDR_INTEL)) {
		BT_ERR("%s: Found Intel default device address (%pMR)",
		       hdev->name, &bda->bdaddr);
		set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
	}

	kfree_skb(skb);

	return 0;
}
EXPORT_SYMBOL_GPL(btintel_check_bdaddr);

int btintel_enter_mfg(struct hci_dev *hdev)
{
	const u8 param[] = { 0x01, 0x00 };
	struct sk_buff *skb;

	skb = __hci_cmd_sync(hdev, 0xfc11, 2, param, HCI_CMD_TIMEOUT);
	if (IS_ERR(skb)) {
		bt_dev_err(hdev, "Entering manufacturer mode failed (%ld)",
			   PTR_ERR(skb));
		return PTR_ERR(skb);
	}
	kfree_skb(skb);

	return 0;
}
EXPORT_SYMBOL_GPL(btintel_enter_mfg);

int btintel_exit_mfg(struct hci_dev *hdev, bool reset, bool patched)
{
	u8 param[] = { 0x00, 0x00 };
	struct sk_buff *skb;

	/* The 2nd command parameter specifies the manufacturing exit method:
	 * 0x00: Just disable the manufacturing mode (0x00).
	 * 0x01: Disable manufacturing mode and reset with patches deactivated.
	 * 0x02: Disable manufacturing mode and reset with patches activated.
	 */
	if (reset)
		param[1] |= patched ? 0x02 : 0x01;

	skb = __hci_cmd_sync(hdev, 0xfc11, 2, param, HCI_CMD_TIMEOUT);
	if (IS_ERR(skb)) {
		bt_dev_err(hdev, "Exiting manufacturer mode failed (%ld)",
			   PTR_ERR(skb));
		return PTR_ERR(skb);
	}
	kfree_skb(skb);

	return 0;
}
EXPORT_SYMBOL_GPL(btintel_exit_mfg);

int btintel_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
{
	struct sk_buff *skb;
	int err;

	skb = __hci_cmd_sync(hdev, 0xfc31, 6, bdaddr, HCI_INIT_TIMEOUT);
	if (IS_ERR(skb)) {
		err = PTR_ERR(skb);
		BT_ERR("%s: Changing Intel device address failed (%d)",
		       hdev->name, err);
		return err;
	}
	kfree_skb(skb);

	return 0;
}
EXPORT_SYMBOL_GPL(btintel_set_bdaddr);

int btintel_set_diag(struct hci_dev *hdev, bool enable)
{
	struct sk_buff *skb;
	u8 param[3];
	int err;

	if (enable) {
		param[0] = 0x03;
		param[1] = 0x03;
		param[2] = 0x03;
	} else {
		param[0] = 0x00;
		param[1] = 0x00;
		param[2] = 0x00;
	}

	skb = __hci_cmd_sync(hdev, 0xfc43, 3, param, HCI_INIT_TIMEOUT);
	if (IS_ERR(skb)) {
		err = PTR_ERR(skb);
		if (err == -ENODATA)
			goto done;
		BT_ERR("%s: Changing Intel diagnostic mode failed (%d)",
		       hdev->name, err);
		return err;
	}
	kfree_skb(skb);

done:
	btintel_set_event_mask(hdev, enable);
	return 0;
}
EXPORT_SYMBOL_GPL(btintel_set_diag);

int btintel_set_diag_mfg(struct hci_dev *hdev, bool enable)
{
	int err, ret;

	err = btintel_enter_mfg(hdev);
	if (err)
		return err;

	ret = btintel_set_diag(hdev, enable);

	err = btintel_exit_mfg(hdev, false, false);
	if (err)
		return err;

	return ret;
}
EXPORT_SYMBOL_GPL(btintel_set_diag_mfg);

void btintel_hw_error(struct hci_dev *hdev, u8 code)
{
	struct sk_buff *skb;
	u8 type = 0x00;

	BT_ERR("%s: Hardware error 0x%2.2x", hdev->name, code);

	skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
	if (IS_ERR(skb)) {
		BT_ERR("%s: Reset after hardware error failed (%ld)",
		       hdev->name, PTR_ERR(skb));
		return;
	}
	kfree_skb(skb);

	skb = __hci_cmd_sync(hdev, 0xfc22, 1, &type, HCI_INIT_TIMEOUT);
	if (IS_ERR(skb)) {
		BT_ERR("%s: Retrieving Intel exception info failed (%ld)",
		       hdev->name, PTR_ERR(skb));
		return;
	}

	if (skb->len != 13) {
		BT_ERR("%s: Exception info size mismatch", hdev->name);
		kfree_skb(skb);
		return;
	}

	BT_ERR("%s: Exception info %s", hdev->name, (char *)(skb->data + 1));

	kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(btintel_hw_error);

void btintel_version_info(struct hci_dev *hdev, struct intel_version *ver)
{
	const char *variant;

	switch (ver->fw_variant) {
	case 0x06:
		variant = "Bootloader";
		break;
	case 0x23:
		variant = "Firmware";
		break;
	default:
		return;
	}

	BT_INFO("%s: %s revision %u.%u build %u week %u %u", hdev->name,
		variant, ver->fw_revision >> 4, ver->fw_revision & 0x0f,
		ver->fw_build_num, ver->fw_build_ww, 2000 + ver->fw_build_yy);
}
EXPORT_SYMBOL_GPL(btintel_version_info);

int btintel_secure_send(struct hci_dev *hdev, u8 fragment_type, u32 plen,
			const void *param)
{
	while (plen > 0) {
		struct sk_buff *skb;
		u8 cmd_param[253], fragment_len = (plen > 252) ? 252 : plen;

		cmd_param[0] = fragment_type;
		memcpy(cmd_param + 1, param, fragment_len);

		skb = __hci_cmd_sync(hdev, 0xfc09, fragment_len + 1,
				     cmd_param, HCI_INIT_TIMEOUT);
		if (IS_ERR(skb))
			return PTR_ERR(skb);

		kfree_skb(skb);

		plen -= fragment_len;
		param += fragment_len;
	}

	return 0;
}
EXPORT_SYMBOL_GPL(btintel_secure_send);

int btintel_load_ddc_config(struct hci_dev *hdev, const char *ddc_name)
{
	const struct firmware *fw;
	struct sk_buff *skb;
	const u8 *fw_ptr;
	int err;

	err = request_firmware_direct(&fw, ddc_name, &hdev->dev);
	if (err < 0) {
		bt_dev_err(hdev, "Failed to load Intel DDC file %s (%d)",
			   ddc_name, err);
		return err;
	}

	bt_dev_info(hdev, "Found Intel DDC parameters: %s", ddc_name);

	fw_ptr = fw->data;

	/* DDC file contains one or more DDC structure which has
	 * Length (1 byte), DDC ID (2 bytes), and DDC value (Length - 2).
	 */
	while (fw->size > fw_ptr - fw->data) {
		u8 cmd_plen = fw_ptr[0] + sizeof(u8);

		skb = __hci_cmd_sync(hdev, 0xfc8b, cmd_plen, fw_ptr,
				     HCI_INIT_TIMEOUT);
		if (IS_ERR(skb)) {
			bt_dev_err(hdev, "Failed to send Intel_Write_DDC (%ld)",
				   PTR_ERR(skb));
			release_firmware(fw);
			return PTR_ERR(skb);
		}

		fw_ptr += cmd_plen;
		kfree_skb(skb);
	}

	release_firmware(fw);

	bt_dev_info(hdev, "Applying Intel DDC parameters completed");

	return 0;
}
EXPORT_SYMBOL_GPL(btintel_load_ddc_config);

int btintel_set_event_mask(struct hci_dev *hdev, bool debug)
{
	u8 mask[8] = { 0x87, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
	struct sk_buff *skb;
	int err;

	if (debug)
		mask[1] |= 0x62;

	skb = __hci_cmd_sync(hdev, 0xfc52, 8, mask, HCI_INIT_TIMEOUT);
	if (IS_ERR(skb)) {
		err = PTR_ERR(skb);
		BT_ERR("%s: Setting Intel event mask failed (%d)",
		       hdev->name, err);
		return err;
	}
	kfree_skb(skb);

	return 0;
}
EXPORT_SYMBOL_GPL(btintel_set_event_mask);

int btintel_set_event_mask_mfg(struct hci_dev *hdev, bool debug)
{
	int err, ret;

	err = btintel_enter_mfg(hdev);
	if (err)
		return err;

	ret = btintel_set_event_mask(hdev, debug);

	err = btintel_exit_mfg(hdev, false, false);
	if (err)
		return err;

	return ret;
}
EXPORT_SYMBOL_GPL(btintel_set_event_mask_mfg);

/* ------- REGMAP IBT SUPPORT ------- */

#define IBT_REG_MODE_8BIT  0x00
#define IBT_REG_MODE_16BIT 0x01
#define IBT_REG_MODE_32BIT 0x02

struct regmap_ibt_context {
	struct hci_dev *hdev;
	__u16 op_write;
	__u16 op_read;
};

struct ibt_cp_reg_access {
	__le32  addr;
	__u8    mode;
	__u8    len;
	__u8    data[0];
} __packed;

struct ibt_rp_reg_access {
	__u8    status;
	__le32  addr;
	__u8    data[0];
} __packed;

static int regmap_ibt_read(void *context, const void *addr, size_t reg_size,
			   void *val, size_t val_size)
{
	struct regmap_ibt_context *ctx = context;
	struct ibt_cp_reg_access cp;
	struct ibt_rp_reg_access *rp;
	struct sk_buff *skb;
	int err = 0;

	if (reg_size != sizeof(__le32))
		return -EINVAL;

	switch (val_size) {
	case 1:
		cp.mode = IBT_REG_MODE_8BIT;
		break;
	case 2:
		cp.mode = IBT_REG_MODE_16BIT;
		break;
	case 4:
		cp.mode = IBT_REG_MODE_32BIT;
		break;
	default:
		return -EINVAL;
	}

	/* regmap provides a little-endian formatted addr */
	cp.addr = *(__le32 *)addr;
	cp.len = val_size;

	bt_dev_dbg(ctx->hdev, "Register (0x%x) read", le32_to_cpu(cp.addr));

	skb = hci_cmd_sync(ctx->hdev, ctx->op_read, sizeof(cp), &cp,
			   HCI_CMD_TIMEOUT);
	if (IS_ERR(skb)) {
		err = PTR_ERR(skb);
		bt_dev_err(ctx->hdev, "regmap: Register (0x%x) read error (%d)",
			   le32_to_cpu(cp.addr), err);
		return err;
	}

	if (skb->len != sizeof(*rp) + val_size) {
		bt_dev_err(ctx->hdev, "regmap: Register (0x%x) read error, bad len",
			   le32_to_cpu(cp.addr));
		err = -EINVAL;
		goto done;
	}

	rp = (struct ibt_rp_reg_access *)skb->data;

	if (rp->addr != cp.addr) {
		bt_dev_err(ctx->hdev, "regmap: Register (0x%x) read error, bad addr",
			   le32_to_cpu(rp->addr));
		err = -EINVAL;
		goto done;
	}

	memcpy(val, rp->data, val_size);

done:
	kfree_skb(skb);
	return err;
}

static int regmap_ibt_gather_write(void *context,
				   const void *addr, size_t reg_size,
				   const void *val, size_t val_size)
{
	struct regmap_ibt_context *ctx = context;
	struct ibt_cp_reg_access *cp;
	struct sk_buff *skb;
	int plen = sizeof(*cp) + val_size;
	u8 mode;
	int err = 0;

	if (reg_size != sizeof(__le32))
		return -EINVAL;

	switch (val_size) {
	case 1:
		mode = IBT_REG_MODE_8BIT;
		break;
	case 2:
		mode = IBT_REG_MODE_16BIT;
		break;
	case 4:
		mode = IBT_REG_MODE_32BIT;
		break;
	default:
		return -EINVAL;
	}

	cp = kmalloc(plen, GFP_KERNEL);
	if (!cp)
		return -ENOMEM;

	/* regmap provides a little-endian formatted addr/value */
	cp->addr = *(__le32 *)addr;
	cp->mode = mode;
	cp->len = val_size;
	memcpy(&cp->data, val, val_size);

	bt_dev_dbg(ctx->hdev, "Register (0x%x) write", le32_to_cpu(cp->addr));

	skb = hci_cmd_sync(ctx->hdev, ctx->op_write, plen, cp, HCI_CMD_TIMEOUT);
	if (IS_ERR(skb)) {
		err = PTR_ERR(skb);
		bt_dev_err(ctx->hdev, "regmap: Register (0x%x) write error (%d)",
			   le32_to_cpu(cp->addr), err);
		goto done;
	}
	kfree_skb(skb);

done:
	kfree(cp);
	return err;
}

static int regmap_ibt_write(void *context, const void *data, size_t count)
{
	/* data contains register+value, since we only support 32bit addr,
	 * minimum data size is 4 bytes.
	 */
	if (WARN_ONCE(count < 4, "Invalid register access"))
		return -EINVAL;

	return regmap_ibt_gather_write(context, data, 4, data + 4, count - 4);
}

static void regmap_ibt_free_context(void *context)
{
	kfree(context);
}

static struct regmap_bus regmap_ibt = {
	.read = regmap_ibt_read,
	.write = regmap_ibt_write,
	.gather_write = regmap_ibt_gather_write,
	.free_context = regmap_ibt_free_context,
	.reg_format_endian_default = REGMAP_ENDIAN_LITTLE,
	.val_format_endian_default = REGMAP_ENDIAN_LITTLE,
};

/* Config is the same for all register regions */
static const struct regmap_config regmap_ibt_cfg = {
	.name      = "btintel_regmap",
	.reg_bits  = 32,
	.val_bits  = 32,
};

struct regmap *btintel_regmap_init(struct hci_dev *hdev, u16 opcode_read,
				   u16 opcode_write)
{
	struct regmap_ibt_context *ctx;

	bt_dev_info(hdev, "regmap: Init R%x-W%x region", opcode_read,
		    opcode_write);

	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
	if (!ctx)
		return ERR_PTR(-ENOMEM);

	ctx->op_read = opcode_read;
	ctx->op_write = opcode_write;
	ctx->hdev = hdev;

	return regmap_init(&hdev->dev, &regmap_ibt, ctx, &regmap_ibt_cfg);
}
EXPORT_SYMBOL_GPL(btintel_regmap_init);

MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
MODULE_DESCRIPTION("Bluetooth support for Intel devices ver " VERSION);
MODULE_VERSION(VERSION);
MODULE_LICENSE("GPL");
MODULE_FIRMWARE("intel/ibt-11-5.sfi");
MODULE_FIRMWARE("intel/ibt-11-5.ddc");
l opt">; vif->rx.sring->req_event = prod + 1; /* Make sure event is visible before we check prod * again. */ mb(); } while (vif->rx.sring->req_prod != prod); return false; } /* * Returns true if we should start a new receive buffer instead of * adding 'size' bytes to a buffer which currently contains 'offset' * bytes. */ static bool start_new_rx_buffer(int offset, unsigned long size, int head) { /* simple case: we have completely filled the current buffer. */ if (offset == MAX_BUFFER_OFFSET) return true; /* * complex case: start a fresh buffer if the current frag * would overflow the current buffer but only if: * (i) this frag would fit completely in the next buffer * and (ii) there is already some data in the current buffer * and (iii) this is not the head buffer. * * Where: * - (i) stops us splitting a frag into two copies * unless the frag is too large for a single buffer. * - (ii) stops us from leaving a buffer pointlessly empty. * - (iii) stops us leaving the first buffer * empty. Strictly speaking this is already covered * by (ii) but is explicitly checked because * netfront relies on the first buffer being * non-empty and can crash otherwise. * * This means we will effectively linearise small * frags but do not needlessly split large buffers * into multiple copies tend to give large frags their * own buffers as before. */ if ((offset + size > MAX_BUFFER_OFFSET) && (size <= MAX_BUFFER_OFFSET) && offset && !head) return true; return false; } struct netrx_pending_operations { unsigned copy_prod, copy_cons; unsigned meta_prod, meta_cons; struct gnttab_copy *copy; struct xenvif_rx_meta *meta; int copy_off; grant_ref_t copy_gref; }; static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif, struct netrx_pending_operations *npo) { struct xenvif_rx_meta *meta; struct xen_netif_rx_request *req; req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); meta = npo->meta + npo->meta_prod++; meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; meta->gso_size = 0; meta->size = 0; meta->id = req->id; npo->copy_off = 0; npo->copy_gref = req->gref; return meta; } /* * Set up the grant operations for this fragment. If it's a flipping * interface, we also set up the unmap request from here. */ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, struct netrx_pending_operations *npo, struct page *page, unsigned long size, unsigned long offset, int *head) { struct gnttab_copy *copy_gop; struct xenvif_rx_meta *meta; unsigned long bytes; int gso_type; /* Data must not cross a page boundary. */ BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); meta = npo->meta + npo->meta_prod - 1; /* Skip unused frames from start of page */ page += offset >> PAGE_SHIFT; offset &= ~PAGE_MASK; while (size > 0) { BUG_ON(offset >= PAGE_SIZE); BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); bytes = PAGE_SIZE - offset; if (bytes > size) bytes = size; if (start_new_rx_buffer(npo->copy_off, bytes, *head)) { /* * Netfront requires there to be some data in the head * buffer. */ BUG_ON(*head); meta = get_next_rx_buffer(vif, npo); } if (npo->copy_off + bytes > MAX_BUFFER_OFFSET) bytes = MAX_BUFFER_OFFSET - npo->copy_off; copy_gop = npo->copy + npo->copy_prod++; copy_gop->flags = GNTCOPY_dest_gref; copy_gop->len = bytes; copy_gop->source.domid = DOMID_SELF; copy_gop->source.u.gmfn = virt_to_mfn(page_address(page)); copy_gop->source.offset = offset; copy_gop->dest.domid = vif->domid; copy_gop->dest.offset = npo->copy_off; copy_gop->dest.u.ref = npo->copy_gref; npo->copy_off += bytes; meta->size += bytes; offset += bytes; size -= bytes; /* Next frame */ if (offset == PAGE_SIZE && size) { BUG_ON(!PageCompound(page)); page++; offset = 0; } /* Leave a gap for the GSO descriptor. */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) gso_type = XEN_NETIF_GSO_TYPE_TCPV4; else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) gso_type = XEN_NETIF_GSO_TYPE_TCPV6; else gso_type = XEN_NETIF_GSO_TYPE_NONE; if (*head && ((1 << gso_type) & vif->gso_mask)) vif->rx.req_cons++; *head = 0; /* There must be something in this buffer now. */ } } /* * Prepare an SKB to be transmitted to the frontend. * * This function is responsible for allocating grant operations, meta * structures, etc. * * It returns the number of meta structures consumed. The number of * ring slots used is always equal to the number of meta slots used * plus the number of GSO descriptors used. Currently, we use either * zero GSO descriptors (for non-GSO packets) or one descriptor (for * frontend-side LRO). */ static int xenvif_gop_skb(struct sk_buff *skb, struct netrx_pending_operations *npo) { struct xenvif *vif = netdev_priv(skb->dev); int nr_frags = skb_shinfo(skb)->nr_frags; int i; struct xen_netif_rx_request *req; struct xenvif_rx_meta *meta; unsigned char *data; int head = 1; int old_meta_prod; int gso_type; int gso_size; old_meta_prod = npo->meta_prod; if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { gso_type = XEN_NETIF_GSO_TYPE_TCPV4; gso_size = skb_shinfo(skb)->gso_size; } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { gso_type = XEN_NETIF_GSO_TYPE_TCPV6; gso_size = skb_shinfo(skb)->gso_size; } else { gso_type = XEN_NETIF_GSO_TYPE_NONE; gso_size = 0; } /* Set up a GSO prefix descriptor, if necessary */ if ((1 << gso_type) & vif->gso_prefix_mask) { req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); meta = npo->meta + npo->meta_prod++; meta->gso_type = gso_type; meta->gso_size = gso_size; meta->size = 0; meta->id = req->id; } req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); meta = npo->meta + npo->meta_prod++; if ((1 << gso_type) & vif->gso_mask) { meta->gso_type = gso_type; meta->gso_size = gso_size; } else { meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; meta->gso_size = 0; } meta->size = 0; meta->id = req->id; npo->copy_off = 0; npo->copy_gref = req->gref; data = skb->data; while (data < skb_tail_pointer(skb)) { unsigned int offset = offset_in_page(data); unsigned int len = PAGE_SIZE - offset; if (data + len > skb_tail_pointer(skb)) len = skb_tail_pointer(skb) - data; xenvif_gop_frag_copy(vif, skb, npo, virt_to_page(data), len, offset, &head); data += len; } for (i = 0; i < nr_frags; i++) { xenvif_gop_frag_copy(vif, skb, npo, skb_frag_page(&skb_shinfo(skb)->frags[i]), skb_frag_size(&skb_shinfo(skb)->frags[i]), skb_shinfo(skb)->frags[i].page_offset, &head); } return npo->meta_prod - old_meta_prod; } /* * This is a twin to xenvif_gop_skb. Assume that xenvif_gop_skb was * used to set up the operations on the top of * netrx_pending_operations, which have since been done. Check that * they didn't give any errors and advance over them. */ static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots, struct netrx_pending_operations *npo) { struct gnttab_copy *copy_op; int status = XEN_NETIF_RSP_OKAY; int i; for (i = 0; i < nr_meta_slots; i++) { copy_op = npo->copy + npo->copy_cons++; if (copy_op->status != GNTST_okay) { netdev_dbg(vif->dev, "Bad status %d from copy to DOM%d.\n", copy_op->status, vif->domid); status = XEN_NETIF_RSP_ERROR; } } return status; } static void xenvif_add_frag_responses(struct xenvif *vif, int status, struct xenvif_rx_meta *meta, int nr_meta_slots) { int i; unsigned long offset; /* No fragments used */ if (nr_meta_slots <= 1) return; nr_meta_slots--; for (i = 0; i < nr_meta_slots; i++) { int flags; if (i == nr_meta_slots - 1) flags = 0; else flags = XEN_NETRXF_more_data; offset = 0; make_rx_response(vif, meta[i].id, status, offset, meta[i].size, flags); } } struct skb_cb_overlay { int meta_slots_used; }; void xenvif_kick_thread(struct xenvif *vif) { wake_up(&vif->wq); } static void xenvif_rx_action(struct xenvif *vif) { s8 status; u16 flags; struct xen_netif_rx_response *resp; struct sk_buff_head rxq; struct sk_buff *skb; LIST_HEAD(notify); int ret; unsigned long offset; struct skb_cb_overlay *sco; bool need_to_notify = false; struct netrx_pending_operations npo = { .copy = vif->grant_copy_op, .meta = vif->meta, }; skb_queue_head_init(&rxq); while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) { RING_IDX max_slots_needed; int i; /* We need a cheap worse case estimate for the number of * slots we'll use. */ max_slots_needed = DIV_ROUND_UP(offset_in_page(skb->data) + skb_headlen(skb), PAGE_SIZE); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { unsigned int size; size = skb_frag_size(&skb_shinfo(skb)->frags[i]); max_slots_needed += DIV_ROUND_UP(size, PAGE_SIZE); } if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) max_slots_needed++; /* If the skb may not fit then bail out now */ if (!xenvif_rx_ring_slots_available(vif, max_slots_needed)) { skb_queue_head(&vif->rx_queue, skb); need_to_notify = true; vif->rx_last_skb_slots = max_slots_needed; break; } else vif->rx_last_skb_slots = 0; sco = (struct skb_cb_overlay *)skb->cb; sco->meta_slots_used = xenvif_gop_skb(skb, &npo); BUG_ON(sco->meta_slots_used > max_slots_needed); __skb_queue_tail(&rxq, skb); } BUG_ON(npo.meta_prod > ARRAY_SIZE(vif->meta)); if (!npo.copy_prod) goto done; BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS); gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod); while ((skb = __skb_dequeue(&rxq)) != NULL) { sco = (struct skb_cb_overlay *)skb->cb; if ((1 << vif->meta[npo.meta_cons].gso_type) & vif->gso_prefix_mask) { resp = RING_GET_RESPONSE(&vif->rx, vif->rx.rsp_prod_pvt++); resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data; resp->offset = vif->meta[npo.meta_cons].gso_size; resp->id = vif->meta[npo.meta_cons].id; resp->status = sco->meta_slots_used; npo.meta_cons++; sco->meta_slots_used--; } vif->dev->stats.tx_bytes += skb->len; vif->dev->stats.tx_packets++; status = xenvif_check_gop(vif, sco->meta_slots_used, &npo); if (sco->meta_slots_used == 1) flags = 0; else flags = XEN_NETRXF_more_data; if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated; else if (skb->ip_summed == CHECKSUM_UNNECESSARY) /* remote but checksummed. */ flags |= XEN_NETRXF_data_validated; offset = 0; resp = make_rx_response(vif, vif->meta[npo.meta_cons].id, status, offset, vif->meta[npo.meta_cons].size, flags); if ((1 << vif->meta[npo.meta_cons].gso_type) & vif->gso_mask) { struct xen_netif_extra_info *gso = (struct xen_netif_extra_info *) RING_GET_RESPONSE(&vif->rx, vif->rx.rsp_prod_pvt++); resp->flags |= XEN_NETRXF_extra_info; gso->u.gso.type = vif->meta[npo.meta_cons].gso_type; gso->u.gso.size = vif->meta[npo.meta_cons].gso_size; gso->u.gso.pad = 0; gso->u.gso.features = 0; gso->type = XEN_NETIF_EXTRA_TYPE_GSO; gso->flags = 0; } xenvif_add_frag_responses(vif, status, vif->meta + npo.meta_cons + 1, sco->meta_slots_used); RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); need_to_notify |= !!ret; npo.meta_cons += sco->meta_slots_used; dev_kfree_skb(skb); } done: if (need_to_notify) notify_remote_via_irq(vif->rx_irq); } void xenvif_check_rx_xenvif(struct xenvif *vif) { int more_to_do; RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); if (more_to_do) napi_schedule(&vif->napi); } static void tx_add_credit(struct xenvif *vif) { unsigned long max_burst, max_credit; /* * Allow a burst big enough to transmit a jumbo packet of up to 128kB. * Otherwise the interface can seize up due to insufficient credit. */ max_burst = RING_GET_REQUEST(&vif->tx, vif->tx.req_cons)->size; max_burst = min(max_burst, 131072UL); max_burst = max(max_burst, vif->credit_bytes); /* Take care that adding a new chunk of credit doesn't wrap to zero. */ max_credit = vif->remaining_credit + vif->credit_bytes; if (max_credit < vif->remaining_credit) max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */ vif->remaining_credit = min(max_credit, max_burst); } static void tx_credit_callback(unsigned long data) { struct xenvif *vif = (struct xenvif *)data; tx_add_credit(vif); xenvif_check_rx_xenvif(vif); } static void xenvif_tx_err(struct xenvif *vif, struct xen_netif_tx_request *txp, RING_IDX end) { RING_IDX cons = vif->tx.req_cons; do { make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); if (cons == end) break; txp = RING_GET_REQUEST(&vif->tx, cons++); } while (1); vif->tx.req_cons = cons; } static void xenvif_fatal_tx_err(struct xenvif *vif) { netdev_err(vif->dev, "fatal error; disabling device\n"); xenvif_carrier_off(vif); } static int xenvif_count_requests(struct xenvif *vif, struct xen_netif_tx_request *first, struct xen_netif_tx_request *txp, int work_to_do) { RING_IDX cons = vif->tx.req_cons; int slots = 0; int drop_err = 0; int more_data; if (!(first->flags & XEN_NETTXF_more_data)) return 0; do { struct xen_netif_tx_request dropped_tx = { 0 }; if (slots >= work_to_do) { netdev_err(vif->dev, "Asked for %d slots but exceeds this limit\n", work_to_do); xenvif_fatal_tx_err(vif); return -ENODATA; } /* This guest is really using too many slots and * considered malicious. */ if (unlikely(slots >= fatal_skb_slots)) { netdev_err(vif->dev, "Malicious frontend using %d slots, threshold %u\n", slots, fatal_skb_slots); xenvif_fatal_tx_err(vif); return -E2BIG; } /* Xen network protocol had implicit dependency on * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to * the historical MAX_SKB_FRAGS value 18 to honor the * same behavior as before. Any packet using more than * 18 slots but less than fatal_skb_slots slots is * dropped */ if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { if (net_ratelimit()) netdev_dbg(vif->dev, "Too many slots (%d) exceeding limit (%d), dropping packet\n", slots, XEN_NETBK_LEGACY_SLOTS_MAX); drop_err = -E2BIG; } if (drop_err) txp = &dropped_tx; memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), sizeof(*txp)); /* If the guest submitted a frame >= 64 KiB then * first->size overflowed and following slots will * appear to be larger than the frame. * * This cannot be fatal error as there are buggy * frontends that do this. * * Consume all slots and drop the packet. */ if (!drop_err && txp->size > first->size) { if (net_ratelimit()) netdev_dbg(vif->dev, "Invalid tx request, slot size %u > remaining size %u\n", txp->size, first->size); drop_err = -EIO; } first->size -= txp->size; slots++; if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n", txp->offset, txp->size); xenvif_fatal_tx_err(vif); return -EINVAL; } more_data = txp->flags & XEN_NETTXF_more_data; if (!drop_err) txp++; } while (more_data); if (drop_err) { xenvif_tx_err(vif, first, cons + slots); return drop_err; } return slots; } static struct page *xenvif_alloc_page(struct xenvif *vif, u16 pending_idx) { struct page *page; page = alloc_page(GFP_ATOMIC|__GFP_COLD); if (!page) return NULL; vif->mmap_pages[pending_idx] = page; return page; } static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif, struct sk_buff *skb, struct xen_netif_tx_request *txp, struct gnttab_copy *gop) { struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; u16 pending_idx = *((u16 *)skb->data); u16 head_idx = 0; int slot, start; struct page *page; pending_ring_idx_t index, start_idx = 0; uint16_t dst_offset; unsigned int nr_slots; struct pending_tx_info *first = NULL; /* At this point shinfo->nr_frags is in fact the number of * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. */ nr_slots = shinfo->nr_frags; /* Skip first skb fragment if it is on same page as header fragment. */ start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); /* Coalesce tx requests, at this point the packet passed in * should be <= 64K. Any packets larger than 64K have been * handled in xenvif_count_requests(). */ for (shinfo->nr_frags = slot = start; slot < nr_slots; shinfo->nr_frags++) { struct pending_tx_info *pending_tx_info = vif->pending_tx_info; page = alloc_page(GFP_ATOMIC|__GFP_COLD); if (!page) goto err; dst_offset = 0; first = NULL; while (dst_offset < PAGE_SIZE && slot < nr_slots) { gop->flags = GNTCOPY_source_gref; gop->source.u.ref = txp->gref; gop->source.domid = vif->domid; gop->source.offset = txp->offset; gop->dest.domid = DOMID_SELF; gop->dest.offset = dst_offset; gop->dest.u.gmfn = virt_to_mfn(page_address(page)); if (dst_offset + txp->size > PAGE_SIZE) { /* This page can only merge a portion * of tx request. Do not increment any * pointer / counter here. The txp * will be dealt with in future * rounds, eventually hitting the * `else` branch. */ gop->len = PAGE_SIZE - dst_offset; txp->offset += gop->len; txp->size -= gop->len; dst_offset += gop->len; /* quit loop */ } else { /* This tx request can be merged in the page */ gop->len = txp->size; dst_offset += gop->len; index = pending_index(vif->pending_cons++); pending_idx = vif->pending_ring[index]; memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); /* Poison these fields, corresponding * fields for head tx req will be set * to correct values after the loop. */ vif->mmap_pages[pending_idx] = (void *)(~0UL); pending_tx_info[pending_idx].head = INVALID_PENDING_RING_IDX; if (!first) { first = &pending_tx_info[pending_idx]; start_idx = index; head_idx = pending_idx; } txp++; slot++; } gop++; } first->req.offset = 0; first->req.size = dst_offset; first->head = start_idx; vif->mmap_pages[head_idx] = page; frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); } BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); return gop; err: /* Unwind, freeing all pages and sending error responses. */ while (shinfo->nr_frags-- > start) { xenvif_idx_release(vif, frag_get_pending_idx(&frags[shinfo->nr_frags]), XEN_NETIF_RSP_ERROR); } /* The head too, if necessary. */ if (start) xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); return NULL; } static int xenvif_tx_check_gop(struct xenvif *vif, struct sk_buff *skb, struct gnttab_copy **gopp) { struct gnttab_copy *gop = *gopp; u16 pending_idx = *((u16 *)skb->data); struct skb_shared_info *shinfo = skb_shinfo(skb); struct pending_tx_info *tx_info; int nr_frags = shinfo->nr_frags; int i, err, start; u16 peek; /* peek into next tx request */ /* Check status of header. */ err = gop->status; if (unlikely(err)) xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); /* Skip first skb fragment if it is on same page as header fragment. */ start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); for (i = start; i < nr_frags; i++) { int j, newerr; pending_ring_idx_t head; pending_idx = frag_get_pending_idx(&shinfo->frags[i]); tx_info = &vif->pending_tx_info[pending_idx]; head = tx_info->head; /* Check error status: if okay then remember grant handle. */ do { newerr = (++gop)->status; if (newerr) break; peek = vif->pending_ring[pending_index(++head)]; } while (!pending_tx_is_head(vif, peek)); if (likely(!newerr)) { /* Had a previous error? Invalidate this fragment. */ if (unlikely(err)) xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); continue; } /* Error on this fragment: respond to client with an error. */ xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR); /* Not the first error? Preceding frags already invalidated. */ if (err) continue; /* First error: invalidate header and preceding fragments. */ pending_idx = *((u16 *)skb->data); xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); for (j = start; j < i; j++) { pending_idx = frag_get_pending_idx(&shinfo->frags[j]); xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); } /* Remember the error: invalidate all subsequent fragments. */ err = newerr; } *gopp = gop + 1; return err; } static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); int nr_frags = shinfo->nr_frags; int i; for (i = 0; i < nr_frags; i++) { skb_frag_t *frag = shinfo->frags + i; struct xen_netif_tx_request *txp; struct page *page; u16 pending_idx; pending_idx = frag_get_pending_idx(frag); txp = &vif->pending_tx_info[pending_idx].req; page = virt_to_page(idx_to_kaddr(vif, pending_idx)); __skb_fill_page_desc(skb, i, page, txp->offset, txp->size); skb->len += txp->size; skb->data_len += txp->size; skb->truesize += txp->size; /* Take an extra reference to offset xenvif_idx_release */ get_page(vif->mmap_pages[pending_idx]); xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); } } static int xenvif_get_extras(struct xenvif *vif, struct xen_netif_extra_info *extras, int work_to_do) { struct xen_netif_extra_info extra; RING_IDX cons = vif->tx.req_cons; do { if (unlikely(work_to_do-- <= 0)) { netdev_err(vif->dev, "Missing extra info\n"); xenvif_fatal_tx_err(vif); return -EBADR; } memcpy(&extra, RING_GET_REQUEST(&vif->tx, cons), sizeof(extra)); if (unlikely(!extra.type || extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { vif->tx.req_cons = ++cons; netdev_err(vif->dev, "Invalid extra type: %d\n", extra.type); xenvif_fatal_tx_err(vif); return -EINVAL; } memcpy(&extras[extra.type - 1], &extra, sizeof(extra)); vif->tx.req_cons = ++cons; } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); return work_to_do; } static int xenvif_set_skb_gso(struct xenvif *vif, struct sk_buff *skb, struct xen_netif_extra_info *gso) { if (!gso->u.gso.size) { netdev_err(vif->dev, "GSO size must not be zero.\n"); xenvif_fatal_tx_err(vif); return -EINVAL; } switch (gso->u.gso.type) { case XEN_NETIF_GSO_TYPE_TCPV4: skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; break; case XEN_NETIF_GSO_TYPE_TCPV6: skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; break; default: netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); xenvif_fatal_tx_err(vif); return -EINVAL; } skb_shinfo(skb)->gso_size = gso->u.gso.size; /* gso_segs will be calculated later */ return 0; } static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) { bool recalculate_partial_csum = false; /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy * peers can fail to set NETRXF_csum_blank when sending a GSO * frame. In this case force the SKB to CHECKSUM_PARTIAL and * recalculate the partial checksum. */ if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) { vif->rx_gso_checksum_fixup++; skb->ip_summed = CHECKSUM_PARTIAL; recalculate_partial_csum = true; } /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ if (skb->ip_summed != CHECKSUM_PARTIAL) return 0; return skb_checksum_setup(skb, recalculate_partial_csum); } static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) { u64 now = get_jiffies_64(); u64 next_credit = vif->credit_window_start + msecs_to_jiffies(vif->credit_usec / 1000); /* Timer could already be pending in rare cases. */ if (timer_pending(&vif->credit_timeout)) return true; /* Passed the point where we can replenish credit? */ if (time_after_eq64(now, next_credit)) { vif->credit_window_start = now; tx_add_credit(vif); } /* Still too big to send right now? Set a callback. */ if (size > vif->remaining_credit) { vif->credit_timeout.data = (unsigned long)vif; vif->credit_timeout.function = tx_credit_callback; mod_timer(&vif->credit_timeout, next_credit); vif->credit_window_start = next_credit; return true; } return false; } static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget) { struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop; struct sk_buff *skb; int ret; while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX < MAX_PENDING_REQS) && (skb_queue_len(&vif->tx_queue) < budget)) { struct xen_netif_tx_request txreq; struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; struct page *page; struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; u16 pending_idx; RING_IDX idx; int work_to_do; unsigned int data_len; pending_ring_idx_t index; if (vif->tx.sring->req_prod - vif->tx.req_cons > XEN_NETIF_TX_RING_SIZE) { netdev_err(vif->dev, "Impossible number of requests. " "req_prod %d, req_cons %d, size %ld\n", vif->tx.sring->req_prod, vif->tx.req_cons, XEN_NETIF_TX_RING_SIZE); xenvif_fatal_tx_err(vif); continue; } work_to_do = RING_HAS_UNCONSUMED_REQUESTS(&vif->tx); if (!work_to_do) break; idx = vif->tx.req_cons; rmb(); /* Ensure that we see the request before we copy it. */ memcpy(&txreq, RING_GET_REQUEST(&vif->tx, idx), sizeof(txreq)); /* Credit-based scheduling. */ if (txreq.size > vif->remaining_credit && tx_credit_exceeded(vif, txreq.size)) break; vif->remaining_credit -= txreq.size; work_to_do--; vif->tx.req_cons = ++idx; memset(extras, 0, sizeof(extras)); if (txreq.flags & XEN_NETTXF_extra_info) { work_to_do = xenvif_get_extras(vif, extras, work_to_do); idx = vif->tx.req_cons; if (unlikely(work_to_do < 0)) break; } ret = xenvif_count_requests(vif, &txreq, txfrags, work_to_do); if (unlikely(ret < 0)) break; idx += ret; if (unlikely(txreq.size < ETH_HLEN)) { netdev_dbg(vif->dev, "Bad packet size: %d\n", txreq.size); xenvif_tx_err(vif, &txreq, idx); break; } /* No crossing a page as the payload mustn't fragment. */ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { netdev_err(vif->dev, "txreq.offset: %x, size: %u, end: %lu\n", txreq.offset, txreq.size, (txreq.offset&~PAGE_MASK) + txreq.size); xenvif_fatal_tx_err(vif); break; } index = pending_index(vif->pending_cons); pending_idx = vif->pending_ring[index]; data_len = (txreq.size > PKT_PROT_LEN && ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? PKT_PROT_LEN : txreq.size; skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(skb == NULL)) { netdev_dbg(vif->dev, "Can't allocate a skb in start_xmit.\n"); xenvif_tx_err(vif, &txreq, idx); break; } /* Packets passed to netif_rx() must have some headroom. */ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { struct xen_netif_extra_info *gso; gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; if (xenvif_set_skb_gso(vif, skb, gso)) { /* Failure in xenvif_set_skb_gso is fatal. */ kfree_skb(skb); break; } } /* XXX could copy straight to head */ page = xenvif_alloc_page(vif, pending_idx); if (!page) { kfree_skb(skb); xenvif_tx_err(vif, &txreq, idx); break; } gop->source.u.ref = txreq.gref; gop->source.domid = vif->domid; gop->source.offset = txreq.offset; gop->dest.u.gmfn = virt_to_mfn(page_address(page)); gop->dest.domid = DOMID_SELF; gop->dest.offset = txreq.offset; gop->len = txreq.size; gop->flags = GNTCOPY_source_gref; gop++; memcpy(&vif->pending_tx_info[pending_idx].req, &txreq, sizeof(txreq)); vif->pending_tx_info[pending_idx].head = index; *((u16 *)skb->data) = pending_idx; __skb_put(skb, data_len); skb_shinfo(skb)->nr_frags = ret; if (data_len < txreq.size) { skb_shinfo(skb)->nr_frags++; frag_set_pending_idx(&skb_shinfo(skb)->frags[0], pending_idx); } else { frag_set_pending_idx(&skb_shinfo(skb)->frags[0], INVALID_PENDING_IDX); } vif->pending_cons++; request_gop = xenvif_get_requests(vif, skb, txfrags, gop); if (request_gop == NULL) { kfree_skb(skb); xenvif_tx_err(vif, &txreq, idx); break; } gop = request_gop; __skb_queue_tail(&vif->tx_queue, skb); vif->tx.req_cons = idx; if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops)) break; } return gop - vif->tx_copy_ops; } static int xenvif_tx_submit(struct xenvif *vif) { struct gnttab_copy *gop = vif->tx_copy_ops; struct sk_buff *skb; int work_done = 0; while ((skb = __skb_dequeue(&vif->tx_queue)) != NULL) { struct xen_netif_tx_request *txp; u16 pending_idx; unsigned data_len; pending_idx = *((u16 *)skb->data); txp = &vif->pending_tx_info[pending_idx].req; /* Check the remap error code. */ if (unlikely(xenvif_tx_check_gop(vif, skb, &gop))) { netdev_dbg(vif->dev, "netback grant failed.\n"); skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); continue; } data_len = skb->len; memcpy(skb->data, (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset), data_len); if (data_len < txp->size) { /* Append the packet payload as a fragment. */ txp->offset += data_len; txp->size -= data_len; } else { /* Schedule a response immediately. */ xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY); } if (txp->flags & XEN_NETTXF_csum_blank) skb->ip_summed = CHECKSUM_PARTIAL; else if (txp->flags & XEN_NETTXF_data_validated) skb->ip_summed = CHECKSUM_UNNECESSARY; xenvif_fill_frags(vif, skb); if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) { int target = min_t(int, skb->len, PKT_PROT_LEN); __pskb_pull_tail(skb, target - skb_headlen(skb)); } skb->dev = vif->dev; skb->protocol = eth_type_trans(skb, skb->dev); skb_reset_network_header(skb); if (checksum_setup(vif, skb)) { netdev_dbg(vif->dev, "Can't setup checksum in net_tx_action\n"); kfree_skb(skb); continue; } skb_probe_transport_header(skb, 0); /* If the packet is GSO then we will have just set up the * transport header offset in checksum_setup so it's now * straightforward to calculate gso_segs. */ if (skb_is_gso(skb)) { int mss = skb_shinfo(skb)->gso_size; int hdrlen = skb_transport_header(skb) - skb_mac_header(skb) + tcp_hdrlen(skb); skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len - hdrlen, mss); } vif->dev->stats.rx_bytes += skb->len; vif->dev->stats.rx_packets++; work_done++; netif_receive_skb(skb); } return work_done; } /* Called after netfront has transmitted */ int xenvif_tx_action(struct xenvif *vif, int budget) { unsigned nr_gops; int work_done; if (unlikely(!tx_work_todo(vif))) return 0; nr_gops = xenvif_tx_build_gops(vif, budget); if (nr_gops == 0) return 0; gnttab_batch_copy(vif->tx_copy_ops, nr_gops); work_done = xenvif_tx_submit(vif); return work_done; } static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx, u8 status) { struct pending_tx_info *pending_tx_info; pending_ring_idx_t head; u16 peek; /* peek into next tx request */ BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL)); /* Already complete? */ if (vif->mmap_pages[pending_idx] == NULL) return; pending_tx_info = &vif->pending_tx_info[pending_idx]; head = pending_tx_info->head; BUG_ON(!pending_tx_is_head(vif, head)); BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx); do { pending_ring_idx_t index; pending_ring_idx_t idx = pending_index(head); u16 info_idx = vif->pending_ring[idx]; pending_tx_info = &vif->pending_tx_info[info_idx]; make_tx_response(vif, &pending_tx_info->req, status); /* Setting any number other than * INVALID_PENDING_RING_IDX indicates this slot is * starting a new packet / ending a previous packet. */ pending_tx_info->head = 0; index = pending_index(vif->pending_prod++); vif->pending_ring[index] = vif->pending_ring[info_idx]; peek = vif->pending_ring[pending_index(++head)]; } while (!pending_tx_is_head(vif, peek)); put_page(vif->mmap_pages[pending_idx]); vif->mmap_pages[pending_idx] = NULL; } static void make_tx_response(struct xenvif *vif, struct xen_netif_tx_request *txp, s8 st) { RING_IDX i = vif->tx.rsp_prod_pvt; struct xen_netif_tx_response *resp; int notify; resp = RING_GET_RESPONSE(&vif->tx, i); resp->id = txp->id; resp->status = st; if (txp->flags & XEN_NETTXF_extra_info) RING_GET_RESPONSE(&vif->tx, ++i)->status = XEN_NETIF_RSP_NULL; vif->tx.rsp_prod_pvt = ++i; RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify); if (notify) notify_remote_via_irq(vif->tx_irq); } static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, u16 id, s8 st, u16 offset, u16 size, u16 flags) { RING_IDX i = vif->rx.rsp_prod_pvt; struct xen_netif_rx_response *resp; resp = RING_GET_RESPONSE(&vif->rx, i); resp->offset = offset; resp->flags = flags; resp->id = id; resp->status = (s16)size; if (st < 0) resp->status = (s16)st; vif->rx.rsp_prod_pvt = ++i; return resp; } static inline int rx_work_todo(struct xenvif *vif) { return !skb_queue_empty(&vif->rx_queue) && xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots); } static inline int tx_work_todo(struct xenvif *vif) { if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) && (nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX < MAX_PENDING_REQS)) return 1; return 0; } void xenvif_unmap_frontend_rings(struct xenvif *vif) { if (vif->tx.sring) xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), vif->tx.sring); if (vif->rx.sring) xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif), vif->rx.sring); } int xenvif_map_frontend_rings(struct xenvif *vif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref) { void *addr; struct xen_netif_tx_sring *txs; struct xen_netif_rx_sring *rxs; int err = -ENOMEM; err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), tx_ring_ref, &addr); if (err) goto err; txs = (struct xen_netif_tx_sring *)addr; BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), rx_ring_ref, &addr); if (err) goto err; rxs = (struct xen_netif_rx_sring *)addr; BACK_RING_INIT(&vif->rx, rxs, PAGE_SIZE); return 0; err: xenvif_unmap_frontend_rings(vif); return err; } void xenvif_stop_queue(struct xenvif *vif) { if (!vif->can_queue) return; netif_stop_queue(vif->dev); } static void xenvif_start_queue(struct xenvif *vif) { if (xenvif_schedulable(vif)) netif_wake_queue(vif->dev); } int xenvif_kthread(void *data) { struct xenvif *vif = data; struct sk_buff *skb; while (!kthread_should_stop()) { wait_event_interruptible(vif->wq, rx_work_todo(vif) || kthread_should_stop()); if (kthread_should_stop()) break; if (!skb_queue_empty(&vif->rx_queue)) xenvif_rx_action(vif); if (skb_queue_empty(&vif->rx_queue) && netif_queue_stopped(vif->dev)) xenvif_start_queue(vif); cond_resched(); } /* Bin any remaining skbs */ while ((skb = skb_dequeue(&vif->rx_queue)) != NULL) dev_kfree_skb(skb); return 0; } static int __init netback_init(void) { int rc = 0; if (!xen_domain()) return -ENODEV; if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; } rc = xenvif_xenbus_init(); if (rc) goto failed_init; return 0; failed_init: return rc; } module_init(netback_init); static void __exit netback_fini(void) { xenvif_xenbus_fini(); } module_exit(netback_fini); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS("xen-backend:vif");