aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
blob: d2d8ef270142dbd3ec1fed3d019b106e6906cce4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
/*
 * Copyright (C) 2015 Cavium, Inc.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of version 2 of the GNU General Public License
 * as published by the Free Software Foundation.
 */

/* ETHTOOL Support for VNIC_VF Device*/

#include <linux/pci.h>

#include "nic_reg.h"
#include "nic.h"
#include "nicvf_queues.h"
#include "q_struct.h"
#include "thunder_bgx.h"

#define DRV_NAME	"thunder-nicvf"
#define DRV_VERSION     "1.0"

struct nicvf_stat {
	char name[ETH_GSTRING_LEN];
	unsigned int index;
};

#define NICVF_HW_STAT(stat) { \
	.name = #stat, \
	.index = offsetof(struct nicvf_hw_stats, stat) / sizeof(u64), \
}

#define NICVF_DRV_STAT(stat) { \
	.name = #stat, \
	.index = offsetof(struct nicvf_drv_stats, stat) / sizeof(u64), \
}

static const struct nicvf_stat nicvf_hw_stats[] = {
	NICVF_HW_STAT(rx_bytes),
	NICVF_HW_STAT(rx_ucast_frames),
	NICVF_HW_STAT(rx_bcast_frames),
	NICVF_HW_STAT(rx_mcast_frames),
	NICVF_HW_STAT(rx_fcs_errors),
	NICVF_HW_STAT(rx_l2_errors),
	NICVF_HW_STAT(rx_drop_red),
	NICVF_HW_STAT(rx_drop_red_bytes),
	NICVF_HW_STAT(rx_drop_overrun),
	NICVF_HW_STAT(rx_drop_overrun_bytes),
	NICVF_HW_STAT(rx_drop_bcast),
	NICVF_HW_STAT(rx_drop_mcast),
	NICVF_HW_STAT(rx_drop_l3_bcast),
	NICVF_HW_STAT(rx_drop_l3_mcast),
	NICVF_HW_STAT(rx_bgx_truncated_pkts),
	NICVF_HW_STAT(rx_jabber_errs),
	NICVF_HW_STAT(rx_fcs_errs),
	NICVF_HW_STAT(rx_bgx_errs),
	NICVF_HW_STAT(rx_prel2_errs),
	NICVF_HW_STAT(rx_l2_hdr_malformed),
	NICVF_HW_STAT(rx_oversize),
	NICVF_HW_STAT(rx_undersize),
	NICVF_HW_STAT(rx_l2_len_mismatch),
	NICVF_HW_STAT(rx_l2_pclp),
	NICVF_HW_STAT(rx_ip_ver_errs),
	NICVF_HW_STAT(rx_ip_csum_errs),
	NICVF_HW_STAT(rx_ip_hdr_malformed),
	NICVF_HW_STAT(rx_ip_payload_malformed),
	NICVF_HW_STAT(rx_ip_ttl_errs),
	NICVF_HW_STAT(rx_l3_pclp),
	NICVF_HW_STAT(rx_l4_malformed),
	NICVF_HW_STAT(rx_l4_csum_errs),
	NICVF_HW_STAT(rx_udp_len_errs),
	NICVF_HW_STAT(rx_l4_port_errs),
	NICVF_HW_STAT(rx_tcp_flag_errs),
	NICVF_HW_STAT(rx_tcp_offset_errs),
	NICVF_HW_STAT(rx_l4_pclp),
	NICVF_HW_STAT(rx_truncated_pkts),
	NICVF_HW_STAT(tx_bytes_ok),
	NICVF_HW_STAT(tx_ucast_frames_ok),
	NICVF_HW_STAT(tx_bcast_frames_ok),
	NICVF_HW_STAT(tx_mcast_frames_ok),
};

static const struct nicvf_stat nicvf_drv_stats[] = {
	NICVF_DRV_STAT(rx_frames_ok),
	NICVF_DRV_STAT(rx_frames_64),
	NICVF_DRV_STAT(rx_frames_127),
	NICVF_DRV_STAT(rx_frames_255),
	NICVF_DRV_STAT(rx_frames_511),
	NICVF_DRV_STAT(rx_frames_1023),
	NICVF_DRV_STAT(rx_frames_1518),
	NICVF_DRV_STAT(rx_frames_jumbo),
	NICVF_DRV_STAT(rx_drops),
	NICVF_DRV_STAT(rcv_buffer_alloc_failures),
	NICVF_DRV_STAT(tx_frames_ok),
	NICVF_DRV_STAT(tx_tso),
	NICVF_DRV_STAT(tx_drops),
	NICVF_DRV_STAT(tx_timeout),
	NICVF_DRV_STAT(txq_stop),
	NICVF_DRV_STAT(txq_wake),
};

static const struct nicvf_stat nicvf_queue_stats[] = {
	{ "bytes", 0 },
	{ "frames", 1 },
};

static const unsigned int nicvf_n_hw_stats = ARRAY_SIZE(nicvf_hw_stats);
static const unsigned int nicvf_n_drv_stats = ARRAY_SIZE(nicvf_drv_stats);
static const unsigned int nicvf_n_queue_stats = ARRAY_SIZE(nicvf_queue_stats);

static int nicvf_get_settings(struct net_device *netdev,
			      struct ethtool_cmd *cmd)
{
	struct nicvf *nic = netdev_priv(netdev);

	cmd->supported = 0;
	cmd->transceiver = XCVR_EXTERNAL;

	if (!nic->link_up) {
		cmd->duplex = DUPLEX_UNKNOWN;
		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
		return 0;
	}

	if (nic->speed <= 1000) {
		cmd->port = PORT_MII;
		cmd->autoneg = AUTONEG_ENABLE;
	} else {
		cmd->port = PORT_FIBRE;
		cmd->autoneg = AUTONEG_DISABLE;
	}
	cmd->duplex = nic->duplex;
	ethtool_cmd_speed_set(cmd, nic->speed);

	return 0;
}

static u32 nicvf_get_link(struct net_device *netdev)
{
	struct nicvf *nic = netdev_priv(netdev);

	return nic->link_up;
}

static void nicvf_get_drvinfo(struct net_device *netdev,
			      struct ethtool_drvinfo *info)
{
	struct nicvf *nic = netdev_priv(netdev);

	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
	strlcpy(info->bus_info, pci_name(nic->pdev), sizeof(info->bus_info));
}

static u32 nicvf_get_msglevel(struct net_device *netdev)
{
	struct nicvf *nic = netdev_priv(netdev);

	return nic->msg_enable;
}

static void nicvf_set_msglevel(struct net_device *netdev, u32 lvl)
{
	struct nicvf *nic = netdev_priv(netdev);

	nic->msg_enable = lvl;
}

static void nicvf_get_qset_strings(struct nicvf *nic, u8 **data, int qset)
{
	int stats, qidx;
	int start_qidx = qset * MAX_RCV_QUEUES_PER_QS;

	for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
		for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
			sprintf(*data, "rxq%d: %s", qidx + start_qidx,
				nicvf_queue_stats[stats].name);
			*data += ETH_GSTRING_LEN;
		}
	}

	for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
		for (stats = 0; stats < nicvf_n_queue_stats; stats++) {
			sprintf(*data, "txq%d: %s", qidx + start_qidx,
				nicvf_queue_stats[stats].name);
			*data += ETH_GSTRING_LEN;
		}
	}
}

static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
{
	struct nicvf *nic = netdev_priv(netdev);
	int stats;
	int sqs;

	if (sset != ETH_SS_STATS)
		return;

	for (stats = 0; stats < nicvf_n_hw_stats; stats++) {
		memcpy(data, nicvf_hw_stats[stats].name, ETH_GSTRING_LEN);
		data += ETH_GSTRING_LEN;
	}

	for (stats = 0; stats < nicvf_n_drv_stats; stats++) {
		memcpy(data, nicvf_drv_stats[stats].name, ETH_GSTRING_LEN);
		data += ETH_GSTRING_LEN;
	}

	nicvf_get_qset_strings(nic, &data, 0);

	for (sqs = 0; sqs < nic->sqs_count; sqs++) {
		if (!nic->snicvf[sqs])
			continue;
		nicvf_get_qset_strings(nic->snicvf[sqs], &data, sqs + 1);
	}

	for (stats = 0; stats < BGX_RX_STATS_COUNT; stats++) {
		sprintf(data, "bgx_rxstat%d: ", stats);
		data += ETH_GSTRING_LEN;
	}

	for (stats = 0; stats < BGX_TX_STATS_COUNT; stats++) {
		sprintf(data, "bgx_txstat%d: ", stats);
		data += ETH_GSTRING_LEN;
	}
}

static int nicvf_get_sset_count(struct net_device *netdev, int sset)
{
	struct nicvf *nic = netdev_priv(netdev);
	int qstats_count;
	int sqs;

	if (sset != ETH_SS_STATS)
		return -EINVAL;

	qstats_count = nicvf_n_queue_stats *
		       (nic->qs->rq_cnt + nic->qs->sq_cnt);
	for (sqs = 0; sqs < nic->sqs_count; sqs++) {
		struct nicvf *snic;

		snic = nic->snicvf[sqs];
		if (!snic)
			continue;
		qstats_count += nicvf_n_queue_stats *
				(snic->qs->rq_cnt + snic->qs->sq_cnt);
	}

	return nicvf_n_hw_stats + nicvf_n_drv_stats +
		qstats_count +
		BGX_RX_STATS_COUNT + BGX_TX_STATS_COUNT;
}

static void nicvf_get_qset_stats(struct nicvf *nic,
				 struct ethtool_stats *stats, u64 **data)
{
	int stat, qidx;

	if (!nic)
		return;

	for (qidx = 0; qidx < nic->qs->rq_cnt; qidx++) {
		nicvf_update_rq_stats(nic, qidx);
		for (stat = 0; stat < nicvf_n_queue_stats; stat++)
			*((*data)++) = ((u64 *)&nic->qs->rq[qidx].stats)
					[nicvf_queue_stats[stat].index];
	}

	for (qidx = 0; qidx < nic->qs->sq_cnt; qidx++) {
		nicvf_update_sq_stats(nic, qidx);
		for (stat = 0; stat < nicvf_n_queue_stats; stat++)
			*((*data)++) = ((u64 *)&nic->qs->sq[qidx].stats)
					[nicvf_queue_stats[stat].index];
	}
}

static void nicvf_get_ethtool_stats(struct net_device *netdev,
				    struct ethtool_stats *stats, u64 *data)
{
	struct nicvf *nic = netdev_priv(netdev);
	int stat;
	int sqs;

	nicvf_update_stats(nic);

	/* Update LMAC stats */
	nicvf_update_lmac_stats(nic);

	for (stat = 0; stat < nicvf_n_hw_stats; stat++)
		*(data++) = ((u64 *)&nic->hw_stats)
				[nicvf_hw_stats[stat].index];
	for (stat = 0; stat < nicvf_n_drv_stats; stat++)
		*(data++) = ((u64 *)&nic->drv_stats)
				[nicvf_drv_stats[stat].index];

	nicvf_get_qset_stats(nic, stats, &data);

	for (sqs = 0; sqs < nic->sqs_count; sqs++) {
		if (!nic->snicvf[sqs])
			continue;
		nicvf_get_qset_stats(nic->snicvf[sqs], stats, &data);
	}

	for (stat = 0; stat < BGX_RX_STATS_COUNT; stat++)
		*(data++) = nic->bgx_stats.rx_stats[stat];
	for (stat = 0; stat < BGX_TX_STATS_COUNT; stat++)
		*(data++) = nic->bgx_stats.tx_stats[stat];
}

static int nicvf_get_regs_len(struct net_device *dev)
{
	return sizeof(u64) * NIC_VF_REG_COUNT;
}

static void nicvf_get_regs(struct net_device *dev,
			   struct ethtool_regs *regs, void *reg)
{
	struct nicvf *nic = netdev_priv(dev);
	u64 *p = (u64 *)reg;
	u64 reg_offset;
	int mbox, key, stat, q;
	int i = 0;

	regs->version = 0;
	memset(p, 0, NIC_VF_REG_COUNT);

	p[i++] = nicvf_reg_read(nic, NIC_VNIC_CFG);
	/* Mailbox registers */
	for (mbox = 0; mbox < NIC_PF_VF_MAILBOX_SIZE; mbox++)
		p[i++] = nicvf_reg_read(nic,
					NIC_VF_PF_MAILBOX_0_1 | (mbox << 3));

	p[i++] = nicvf_reg_read(nic, NIC_VF_INT);
	p[i++] = nicvf_reg_read(nic, NIC_VF_INT_W1S);
	p[i++] = nicvf_reg_read(nic, NIC_VF_ENA_W1C);
	p[i++] = nicvf_reg_read(nic, NIC_VF_ENA_W1S);
	p[i++] = nicvf_reg_read(nic, NIC_VNIC_RSS_CFG);

	for (key = 0; key < RSS_HASH_KEY_SIZE; key++)
		p[i++] = nicvf_reg_read(nic, NIC_VNIC_RSS_KEY_0_4 | (key << 3));

	/* Tx/Rx statistics */
	for (stat = 0; stat < TX_STATS_ENUM_LAST; stat++)
		p[i++] = nicvf_reg_read(nic,
					NIC_VNIC_TX_STAT_0_4 | (stat << 3));

	for (i = 0; i < RX_STATS_ENUM_LAST; i++)
		p[i++] = nicvf_reg_read(nic,
					NIC_VNIC_RX_STAT_0_13 | (stat << 3));

	p[i++] = nicvf_reg_read(nic, NIC_QSET_RQ_GEN_CFG);

	/* All completion queue's registers */
	for (q = 0; q < MAX_CMP_QUEUES_PER_QS; q++) {
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_CFG, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_CFG2, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_THRESH, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_BASE, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_TAIL, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_DOOR, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS2, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_DEBUG, q);
	}

	/* All receive queue's registers */
	for (q = 0; q < MAX_RCV_QUEUES_PER_QS; q++) {
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RQ_0_7_CFG, q);
		p[i++] = nicvf_queue_reg_read(nic,
						  NIC_QSET_RQ_0_7_STAT_0_1, q);
		reg_offset = NIC_QSET_RQ_0_7_STAT_0_1 | (1 << 3);
		p[i++] = nicvf_queue_reg_read(nic, reg_offset, q);
	}

	for (q = 0; q < MAX_SND_QUEUES_PER_QS; q++) {
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_THRESH, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_BASE, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DOOR, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STATUS, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DEBUG, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CNM_CHG, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1, q);
		reg_offset = NIC_QSET_SQ_0_7_STAT_0_1 | (1 << 3);
		p[i++] = nicvf_queue_reg_read(nic, reg_offset, q);
	}

	for (q = 0; q < MAX_RCV_BUF_DESC_RINGS_PER_QS; q++) {
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_CFG, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_THRESH, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_BASE, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_HEAD, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, q);
		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_DOOR, q);
		p[i++] = nicvf_queue_reg_read(nic,
					      NIC_QSET_RBDR_0_1_STATUS0, q);
		p[i++] = nicvf_queue_reg_read(nic,
					      NIC_QSET_RBDR_0_1_STATUS1, q);
		reg_offset = NIC_QSET_RBDR_0_1_PREFETCH_STATUS;
		p[i++] = nicvf_queue_reg_read(nic, reg_offset, q);
	}
}

static int nicvf_get_coalesce(struct net_device *netdev,
			      struct ethtool_coalesce *cmd)
{
	struct nicvf *nic = netdev_priv(netdev);

	cmd->rx_coalesce_usecs = nic->cq_coalesce_usecs;
	return 0;
}

static void nicvf_get_ringparam(struct net_device *netdev,
				struct ethtool_ringparam *ring)
{
	struct nicvf *nic = netdev_priv(netdev);
	struct queue_set *qs = nic->qs;

	ring->rx_max_pending = MAX_RCV_BUF_COUNT;
	ring->rx_pending = qs->rbdr_len;
	ring->tx_max_pending = MAX_SND_QUEUE_LEN;
	ring->tx_pending = qs->sq_len;
}

static int nicvf_get_rss_hash_opts(struct nicvf *nic,
				   struct ethtool_rxnfc *info)
{
	info->data = 0;

	switch (info->flow_type) {
	case TCP_V4_FLOW:
	case TCP_V6_FLOW:
	case UDP_V4_FLOW:
	case UDP_V6_FLOW:
	case SCTP_V4_FLOW:
	case SCTP_V6_FLOW:
		info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
	case IPV4_FLOW:
	case IPV6_FLOW:
		info->data |= RXH_IP_SRC | RXH_IP_DST;
		break;
	default:
		return -EINVAL;
	}

	return 0;
}

static int nicvf_get_rxnfc(struct net_device *dev,
			   struct ethtool_rxnfc *info, u32 *rules)
{
	struct nicvf *nic = netdev_priv(dev);
	int ret = -EOPNOTSUPP;

	switch (info->cmd) {
	case ETHTOOL_GRXRINGS:
		info->data = nic->rx_queues;
		ret = 0;
		break;
	case ETHTOOL_GRXFH:
		return nicvf_get_rss_hash_opts(nic, info);
	default:
		break;
	}
	return ret;
}

static int nicvf_set_rss_hash_opts(struct nicvf *nic,
				   struct ethtool_rxnfc *info)
{
	struct nicvf_rss_info *rss = &nic->rss_info;
	u64 rss_cfg = nicvf_reg_read(nic, NIC_VNIC_RSS_CFG);

	if (!rss->enable)
		netdev_err(nic->netdev,
			   "RSS is disabled, hash cannot be set\n");

	netdev_info(nic->netdev, "Set RSS flow type = %d, data = %lld\n",
		    info->flow_type, info->data);

	if (!(info->data & RXH_IP_SRC) || !(info->data & RXH_IP_DST))
		return -EINVAL;

	switch (info->flow_type) {
	case TCP_V4_FLOW:
	case TCP_V6_FLOW:
		switch (info->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
		case 0:
			rss_cfg &= ~(1ULL << RSS_HASH_TCP);
			break;
		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
			rss_cfg |= (1ULL << RSS_HASH_TCP);
			break;
		default:
			return -EINVAL;
		}
		break;
	case UDP_V4_FLOW:
	case UDP_V6_FLOW:
		switch (info->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
		case 0:
			rss_cfg &= ~(1ULL << RSS_HASH_UDP);
			break;
		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
			rss_cfg |= (1ULL << RSS_HASH_UDP);
			break;
		default:
			return -EINVAL;
		}
		break;
	case SCTP_V4_FLOW:
	case SCTP_V6_FLOW:
		switch (info->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
		case 0:
			rss_cfg &= ~(1ULL << RSS_HASH_L4ETC);
			break;
		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
			rss_cfg |= (1ULL << RSS_HASH_L4ETC);
			break;
		default:
			return -EINVAL;
		}
		break;
	case IPV4_FLOW:
	case IPV6_FLOW:
		rss_cfg = RSS_HASH_IP;
		break;
	default:
		return -EINVAL;
	}

	nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss_cfg);
	return 0;
}

static int nicvf_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info)
{
	struct nicvf *nic = netdev_priv(dev);

	switch (info->cmd) {
	case ETHTOOL_SRXFH:
		return nicvf_set_rss_hash_opts(nic, info);
	default:
		break;
	}
	return -EOPNOTSUPP;
}

static u32 nicvf_get_rxfh_key_size(struct net_device *netdev)
{
	return RSS_HASH_KEY_SIZE * sizeof(u64);
}

static u32 nicvf_get_rxfh_indir_size(struct net_device *dev)
{
	struct nicvf *nic = netdev_priv(dev);

	return nic->rss_info.rss_size;
}

static int nicvf_get_rxfh(struct net_device *dev, u32 *indir, u8 *hkey,
			  u8 *hfunc)
{
	struct nicvf *nic = netdev_priv(dev);
	struct nicvf_rss_info *rss = &nic->rss_info;
	int idx;

	if (indir) {
		for (idx = 0; idx < rss->rss_size; idx++)
			indir[idx] = rss->ind_tbl[idx];
	}

	if (hkey)
		memcpy(hkey, rss->key, RSS_HASH_KEY_SIZE * sizeof(u64));

	if (hfunc)
		*hfunc = ETH_RSS_HASH_TOP;

	return 0;
}

static int nicvf_set_rxfh(struct net_device *dev, const u32 *indir,
			  const u8 *hkey, u8 hfunc)
{
	struct nicvf *nic = netdev_priv(dev);
	struct nicvf_rss_info *rss = &nic->rss_info;
	int idx;

	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
		return -EOPNOTSUPP;

	if (!rss->enable) {
		netdev_err(nic->netdev,
			   "RSS is disabled, cannot change settings\n");
		return -EIO;
	}

	if (indir) {
		for (idx = 0; idx < rss->rss_size; idx++)
			rss->ind_tbl[idx] = indir[idx];
	}

	if (hkey) {
		memcpy(rss->key, hkey, RSS_HASH_KEY_SIZE * sizeof(u64));
		nicvf_set_rss_key(nic);
	}

	nicvf_config_rss(nic);
	return 0;
}

/* Get no of queues device supports and current queue count */
static void nicvf_get_channels(struct net_device *dev,
			       struct ethtool_channels *channel)
{
	struct nicvf *nic = netdev_priv(dev);

	memset(channel, 0, sizeof(*channel));

	channel->max_rx = nic->max_queues;
	channel->max_tx = nic->max_queues;

	channel->rx_count = nic->rx_queues;
	channel->tx_count = nic->tx_queues;
}

/* Set no of Tx, Rx queues to be used */
static int nicvf_set_channels(struct net_device *dev,
			      struct ethtool_channels *channel)
{
	struct nicvf *nic = netdev_priv(dev);
	int err = 0;
	bool if_up = netif_running(dev);
	int cqcount;

	if (!channel->rx_count || !channel->tx_count)
		return -EINVAL;
	if (channel->rx_count > nic->max_queues)
		return -EINVAL;
	if (channel->tx_count > nic->max_queues)
		return -EINVAL;

	if (if_up)
		nicvf_stop(dev);

	cqcount = max(channel->rx_count, channel->tx_count);

	if (cqcount > MAX_CMP_QUEUES_PER_QS) {
		nic->sqs_count = roundup(cqcount, MAX_CMP_QUEUES_PER_QS);
		nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1;
	} else {
		nic->sqs_count = 0;
	}

	nic->qs->rq_cnt = min_t(u32, channel->rx_count, MAX_RCV_QUEUES_PER_QS);
	nic->qs->sq_cnt = min_t(u32, channel->tx_count, MAX_SND_QUEUES_PER_QS);
	nic->qs->cq_cnt = max(nic->qs->rq_cnt, nic->qs->sq_cnt);

	nic->rx_queues = channel->rx_count;
	nic->tx_queues = channel->tx_count;
	err = nicvf_set_real_num_queues(dev, nic->tx_queues, nic->rx_queues);
	if (err)
		return err;

	if (if_up)
		nicvf_open(dev);

	netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n",
		    nic->tx_queues, nic->rx_queues);

	return err;
}

static const struct ethtool_ops nicvf_ethtool_ops = {
	.get_settings		= nicvf_get_settings,
	.get_link		= nicvf_get_link,
	.get_drvinfo		= nicvf_get_drvinfo,
	.get_msglevel		= nicvf_get_msglevel,
	.set_msglevel		= nicvf_set_msglevel,
	.get_strings		= nicvf_get_strings,
	.get_sset_count		= nicvf_get_sset_count,
	.get_ethtool_stats	= nicvf_get_ethtool_stats,
	.get_regs_len		= nicvf_get_regs_len,
	.get_regs		= nicvf_get_regs,
	.get_coalesce		= nicvf_get_coalesce,
	.get_ringparam		= nicvf_get_ringparam,
	.get_rxnfc		= nicvf_get_rxnfc,
	.set_rxnfc		= nicvf_set_rxnfc,
	.get_rxfh_key_size	= nicvf_get_rxfh_key_size,
	.get_rxfh_indir_size	= nicvf_get_rxfh_indir_size,
	.get_rxfh		= nicvf_get_rxfh,
	.set_rxfh		= nicvf_set_rxfh,
	.get_channels		= nicvf_get_channels,
	.set_channels		= nicvf_set_channels,
	.get_ts_info		= ethtool_op_get_ts_info,
};

void nicvf_set_ethtool_ops(struct net_device *netdev)
{
	netdev->ethtool_ops = &nicvf_ethtool_ops;
}
f='#n2988'>2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382
/*
 * Copyright (C) STRATO AG 2011.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

/*
 * This module can be used to catch cases when the btrfs kernel
 * code executes write requests to the disk that bring the file
 * system in an inconsistent state. In such a state, a power-loss
 * or kernel panic event would cause that the data on disk is
 * lost or at least damaged.
 *
 * Code is added that examines all block write requests during
 * runtime (including writes of the super block). Three rules
 * are verified and an error is printed on violation of the
 * rules:
 * 1. It is not allowed to write a disk block which is
 *    currently referenced by the super block (either directly
 *    or indirectly).
 * 2. When a super block is written, it is verified that all
 *    referenced (directly or indirectly) blocks fulfill the
 *    following requirements:
 *    2a. All referenced blocks have either been present when
 *        the file system was mounted, (i.e., they have been
 *        referenced by the super block) or they have been
 *        written since then and the write completion callback
 *        was called and no write error was indicated and a
 *        FLUSH request to the device where these blocks are
 *        located was received and completed.
 *    2b. All referenced blocks need to have a generation
 *        number which is equal to the parent's number.
 *
 * One issue that was found using this module was that the log
 * tree on disk became temporarily corrupted because disk blocks
 * that had been in use for the log tree had been freed and
 * reused too early, while being referenced by the written super
 * block.
 *
 * The search term in the kernel log that can be used to filter
 * on the existence of detected integrity issues is
 * "btrfs: attempt".
 *
 * The integrity check is enabled via mount options. These
 * mount options are only supported if the integrity check
 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
 *
 * Example #1, apply integrity checks to all metadata:
 * mount /dev/sdb1 /mnt -o check_int
 *
 * Example #2, apply integrity checks to all metadata and
 * to data extents:
 * mount /dev/sdb1 /mnt -o check_int_data
 *
 * Example #3, apply integrity checks to all metadata and dump
 * the tree that the super block references to kernel messages
 * each time after a super block was written:
 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
 *
 * If the integrity check tool is included and activated in
 * the mount options, plenty of kernel memory is used, and
 * plenty of additional CPU cycles are spent. Enabling this
 * functionality is not intended for normal use. In most
 * cases, unless you are a btrfs developer who needs to verify
 * the integrity of (super)-block write requests, do not
 * enable the config option BTRFS_FS_CHECK_INTEGRITY to
 * include and compile the integrity check tool.
 */

#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/buffer_head.h>
#include <linux/mutex.h>
#include <linux/crc32c.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "extent_io.h"
#include "volumes.h"
#include "print-tree.h"
#include "locking.h"
#include "check-integrity.h"
#include "rcu-string.h"

#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
#define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
#define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
#define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
#define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
#define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
#define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6)	/* in characters,
							 * excluding " [...]" */
#define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)

/*
 * The definition of the bitmask fields for the print_mask.
 * They are specified with the mount option check_integrity_print_mask.
 */
#define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE			0x00000001
#define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION		0x00000002
#define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE			0x00000004
#define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE			0x00000008
#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH			0x00000010
#define BTRFSIC_PRINT_MASK_END_IO_BIO_BH			0x00000020
#define BTRFSIC_PRINT_MASK_VERBOSE				0x00000040
#define BTRFSIC_PRINT_MASK_VERY_VERBOSE				0x00000080
#define BTRFSIC_PRINT_MASK_INITIAL_TREE				0x00000100
#define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES			0x00000200
#define BTRFSIC_PRINT_MASK_INITIAL_DATABASE			0x00000400
#define BTRFSIC_PRINT_MASK_NUM_COPIES				0x00000800
#define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS		0x00001000

struct btrfsic_dev_state;
struct btrfsic_state;

struct btrfsic_block {
	u32 magic_num;		/* only used for debug purposes */
	unsigned int is_metadata:1;	/* if it is meta-data, not data-data */
	unsigned int is_superblock:1;	/* if it is one of the superblocks */
	unsigned int is_iodone:1;	/* if is done by lower subsystem */
	unsigned int iodone_w_error:1;	/* error was indicated to endio */
	unsigned int never_written:1;	/* block was added because it was
					 * referenced, not because it was
					 * written */
	unsigned int mirror_num;	/* large enough to hold
					 * BTRFS_SUPER_MIRROR_MAX */
	struct btrfsic_dev_state *dev_state;
	u64 dev_bytenr;		/* key, physical byte num on disk */
	u64 logical_bytenr;	/* logical byte num on disk */
	u64 generation;
	struct btrfs_disk_key disk_key;	/* extra info to print in case of
					 * issues, will not always be correct */
	struct list_head collision_resolving_node;	/* list node */
	struct list_head all_blocks_node;	/* list node */

	/* the following two lists contain block_link items */
	struct list_head ref_to_list;	/* list */
	struct list_head ref_from_list;	/* list */
	struct btrfsic_block *next_in_same_bio;
	void *orig_bio_bh_private;
	union {
		bio_end_io_t *bio;
		bh_end_io_t *bh;
	} orig_bio_bh_end_io;
	int submit_bio_bh_rw;
	u64 flush_gen; /* only valid if !never_written */
};

/*
 * Elements of this type are allocated dynamically and required because
 * each block object can refer to and can be ref from multiple blocks.
 * The key to lookup them in the hashtable is the dev_bytenr of
 * the block ref to plus the one from the block refered from.
 * The fact that they are searchable via a hashtable and that a
 * ref_cnt is maintained is not required for the btrfs integrity
 * check algorithm itself, it is only used to make the output more
 * beautiful in case that an error is detected (an error is defined
 * as a write operation to a block while that block is still referenced).
 */
struct btrfsic_block_link {
	u32 magic_num;		/* only used for debug purposes */
	u32 ref_cnt;
	struct list_head node_ref_to;	/* list node */
	struct list_head node_ref_from;	/* list node */
	struct list_head collision_resolving_node;	/* list node */
	struct btrfsic_block *block_ref_to;
	struct btrfsic_block *block_ref_from;
	u64 parent_generation;
};

struct btrfsic_dev_state {
	u32 magic_num;		/* only used for debug purposes */
	struct block_device *bdev;
	struct btrfsic_state *state;
	struct list_head collision_resolving_node;	/* list node */
	struct btrfsic_block dummy_block_for_bio_bh_flush;
	u64 last_flush_gen;
	char name[BDEVNAME_SIZE];
};

struct btrfsic_block_hashtable {
	struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
};

struct btrfsic_block_link_hashtable {
	struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
};

struct btrfsic_dev_state_hashtable {
	struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
};

struct btrfsic_block_data_ctx {
	u64 start;		/* virtual bytenr */
	u64 dev_bytenr;		/* physical bytenr on device */
	u32 len;
	struct btrfsic_dev_state *dev;
	char **datav;
	struct page **pagev;
	void *mem_to_free;
};

/* This structure is used to implement recursion without occupying
 * any stack space, refer to btrfsic_process_metablock() */
struct btrfsic_stack_frame {
	u32 magic;
	u32 nr;
	int error;
	int i;
	int limit_nesting;
	int num_copies;
	int mirror_num;
	struct btrfsic_block *block;
	struct btrfsic_block_data_ctx *block_ctx;
	struct btrfsic_block *next_block;
	struct btrfsic_block_data_ctx next_block_ctx;
	struct btrfs_header *hdr;
	struct btrfsic_stack_frame *prev;
};

/* Some state per mounted filesystem */
struct btrfsic_state {
	u32 print_mask;
	int include_extent_data;
	int csum_size;
	struct list_head all_blocks_list;
	struct btrfsic_block_hashtable block_hashtable;
	struct btrfsic_block_link_hashtable block_link_hashtable;
	struct btrfs_root *root;
	u64 max_superblock_generation;
	struct btrfsic_block *latest_superblock;
	u32 metablock_size;
	u32 datablock_size;
};

static void btrfsic_block_init(struct btrfsic_block *b);
static struct btrfsic_block *btrfsic_block_alloc(void);
static void btrfsic_block_free(struct btrfsic_block *b);
static void btrfsic_block_link_init(struct btrfsic_block_link *n);
static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
static void btrfsic_block_link_free(struct btrfsic_block_link *n);
static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
					struct btrfsic_block_hashtable *h);
static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
static struct btrfsic_block *btrfsic_block_hashtable_lookup(
		struct block_device *bdev,
		u64 dev_bytenr,
		struct btrfsic_block_hashtable *h);
static void btrfsic_block_link_hashtable_init(
		struct btrfsic_block_link_hashtable *h);
static void btrfsic_block_link_hashtable_add(
		struct btrfsic_block_link *l,
		struct btrfsic_block_link_hashtable *h);
static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
		struct block_device *bdev_ref_to,
		u64 dev_bytenr_ref_to,
		struct block_device *bdev_ref_from,
		u64 dev_bytenr_ref_from,
		struct btrfsic_block_link_hashtable *h);
static void btrfsic_dev_state_hashtable_init(
		struct btrfsic_dev_state_hashtable *h);
static void btrfsic_dev_state_hashtable_add(
		struct btrfsic_dev_state *ds,
		struct btrfsic_dev_state_hashtable *h);
static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
		struct block_device *bdev,
		struct btrfsic_dev_state_hashtable *h);
static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
static int btrfsic_process_superblock(struct btrfsic_state *state,
				      struct btrfs_fs_devices *fs_devices);
static int btrfsic_process_metablock(struct btrfsic_state *state,
				     struct btrfsic_block *block,
				     struct btrfsic_block_data_ctx *block_ctx,
				     int limit_nesting, int force_iodone_flag);
static void btrfsic_read_from_block_data(
	struct btrfsic_block_data_ctx *block_ctx,
	void *dst, u32 offset, size_t len);
static int btrfsic_create_link_to_next_block(
		struct btrfsic_state *state,
		struct btrfsic_block *block,
		struct btrfsic_block_data_ctx
		*block_ctx, u64 next_bytenr,
		int limit_nesting,
		struct btrfsic_block_data_ctx *next_block_ctx,
		struct btrfsic_block **next_blockp,
		int force_iodone_flag,
		int *num_copiesp, int *mirror_nump,
		struct btrfs_disk_key *disk_key,
		u64 parent_generation);
static int btrfsic_handle_extent_data(struct btrfsic_state *state,
				      struct btrfsic_block *block,
				      struct btrfsic_block_data_ctx *block_ctx,
				      u32 item_offset, int force_iodone_flag);
static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
			     struct btrfsic_block_data_ctx *block_ctx_out,
			     int mirror_num);
static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
				  u32 len, struct block_device *bdev,
				  struct btrfsic_block_data_ctx *block_ctx_out);
static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
static int btrfsic_read_block(struct btrfsic_state *state,
			      struct btrfsic_block_data_ctx *block_ctx);
static void btrfsic_dump_database(struct btrfsic_state *state);
static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
static int btrfsic_test_for_metadata(struct btrfsic_state *state,
				     char **datav, unsigned int num_pages);
static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
					  u64 dev_bytenr, char **mapped_datav,
					  unsigned int num_pages,
					  struct bio *bio, int *bio_is_patched,
					  struct buffer_head *bh,
					  int submit_bio_bh_rw);
static int btrfsic_process_written_superblock(
		struct btrfsic_state *state,
		struct btrfsic_block *const block,
		struct btrfs_super_block *const super_hdr);
static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
					      const struct btrfsic_block *block,
					      int recursion_level);
static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
					struct btrfsic_block *const block,
					int recursion_level);
static void btrfsic_print_add_link(const struct btrfsic_state *state,
				   const struct btrfsic_block_link *l);
static void btrfsic_print_rem_link(const struct btrfsic_state *state,
				   const struct btrfsic_block_link *l);
static char btrfsic_get_block_type(const struct btrfsic_state *state,
				   const struct btrfsic_block *block);
static void btrfsic_dump_tree(const struct btrfsic_state *state);
static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
				  const struct btrfsic_block *block,
				  int indent_level);
static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
		struct btrfsic_state *state,
		struct btrfsic_block_data_ctx *next_block_ctx,
		struct btrfsic_block *next_block,
		struct btrfsic_block *from_block,
		u64 parent_generation);
static struct btrfsic_block *btrfsic_block_lookup_or_add(
		struct btrfsic_state *state,
		struct btrfsic_block_data_ctx *block_ctx,
		const char *additional_string,
		int is_metadata,
		int is_iodone,
		int never_written,
		int mirror_num,
		int *was_created);
static int btrfsic_process_superblock_dev_mirror(
		struct btrfsic_state *state,
		struct btrfsic_dev_state *dev_state,
		struct btrfs_device *device,
		int superblock_mirror_num,
		struct btrfsic_dev_state **selected_dev_state,
		struct btrfs_super_block *selected_super);
static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
		struct block_device *bdev);
static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
					   u64 bytenr,
					   struct btrfsic_dev_state *dev_state,
					   u64 dev_bytenr);

static struct mutex btrfsic_mutex;
static int btrfsic_is_initialized;
static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;


static void btrfsic_block_init(struct btrfsic_block *b)
{
	b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
	b->dev_state = NULL;
	b->dev_bytenr = 0;
	b->logical_bytenr = 0;
	b->generation = BTRFSIC_GENERATION_UNKNOWN;
	b->disk_key.objectid = 0;
	b->disk_key.type = 0;
	b->disk_key.offset = 0;
	b->is_metadata = 0;
	b->is_superblock = 0;
	b->is_iodone = 0;
	b->iodone_w_error = 0;
	b->never_written = 0;
	b->mirror_num = 0;
	b->next_in_same_bio = NULL;
	b->orig_bio_bh_private = NULL;
	b->orig_bio_bh_end_io.bio = NULL;
	INIT_LIST_HEAD(&b->collision_resolving_node);
	INIT_LIST_HEAD(&b->all_blocks_node);
	INIT_LIST_HEAD(&b->ref_to_list);
	INIT_LIST_HEAD(&b->ref_from_list);
	b->submit_bio_bh_rw = 0;
	b->flush_gen = 0;
}

static struct btrfsic_block *btrfsic_block_alloc(void)
{
	struct btrfsic_block *b;

	b = kzalloc(sizeof(*b), GFP_NOFS);
	if (NULL != b)
		btrfsic_block_init(b);

	return b;
}

static void btrfsic_block_free(struct btrfsic_block *b)
{
	BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
	kfree(b);
}

static void btrfsic_block_link_init(struct btrfsic_block_link *l)
{
	l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
	l->ref_cnt = 1;
	INIT_LIST_HEAD(&l->node_ref_to);
	INIT_LIST_HEAD(&l->node_ref_from);
	INIT_LIST_HEAD(&l->collision_resolving_node);
	l->block_ref_to = NULL;
	l->block_ref_from = NULL;
}

static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
{
	struct btrfsic_block_link *l;

	l = kzalloc(sizeof(*l), GFP_NOFS);
	if (NULL != l)
		btrfsic_block_link_init(l);

	return l;
}

static void btrfsic_block_link_free(struct btrfsic_block_link *l)
{
	BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
	kfree(l);
}

static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
{
	ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
	ds->bdev = NULL;
	ds->state = NULL;
	ds->name[0] = '\0';
	INIT_LIST_HEAD(&ds->collision_resolving_node);
	ds->last_flush_gen = 0;
	btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
	ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
	ds->dummy_block_for_bio_bh_flush.dev_state = ds;
}

static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
{
	struct btrfsic_dev_state *ds;

	ds = kzalloc(sizeof(*ds), GFP_NOFS);
	if (NULL != ds)
		btrfsic_dev_state_init(ds);

	return ds;
}

static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
{
	BUG_ON(!(NULL == ds ||
		 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
	kfree(ds);
}

static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
{
	int i;

	for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
		INIT_LIST_HEAD(h->table + i);
}

static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
					struct btrfsic_block_hashtable *h)
{
	const unsigned int hashval =
	    (((unsigned int)(b->dev_bytenr >> 16)) ^
	     ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
	     (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);

	list_add(&b->collision_resolving_node, h->table + hashval);
}

static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
{
	list_del(&b->collision_resolving_node);
}

static struct btrfsic_block *btrfsic_block_hashtable_lookup(
		struct block_device *bdev,
		u64 dev_bytenr,
		struct btrfsic_block_hashtable *h)
{
	const unsigned int hashval =
	    (((unsigned int)(dev_bytenr >> 16)) ^
	     ((unsigned int)((uintptr_t)bdev))) &
	     (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
	struct list_head *elem;

	list_for_each(elem, h->table + hashval) {
		struct btrfsic_block *const b =
		    list_entry(elem, struct btrfsic_block,
			       collision_resolving_node);

		if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
			return b;
	}

	return NULL;
}

static void btrfsic_block_link_hashtable_init(
		struct btrfsic_block_link_hashtable *h)
{
	int i;

	for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
		INIT_LIST_HEAD(h->table + i);
}

static void btrfsic_block_link_hashtable_add(
		struct btrfsic_block_link *l,
		struct btrfsic_block_link_hashtable *h)
{
	const unsigned int hashval =
	    (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
	     ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
	     ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
	     ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
	     & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);

	BUG_ON(NULL == l->block_ref_to);
	BUG_ON(NULL == l->block_ref_from);
	list_add(&l->collision_resolving_node, h->table + hashval);
}

static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
{
	list_del(&l->collision_resolving_node);
}

static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
		struct block_device *bdev_ref_to,
		u64 dev_bytenr_ref_to,
		struct block_device *bdev_ref_from,
		u64 dev_bytenr_ref_from,
		struct btrfsic_block_link_hashtable *h)
{
	const unsigned int hashval =
	    (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
	     ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
	     ((unsigned int)((uintptr_t)bdev_ref_to)) ^
	     ((unsigned int)((uintptr_t)bdev_ref_from))) &
	     (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
	struct list_head *elem;

	list_for_each(elem, h->table + hashval) {
		struct btrfsic_block_link *const l =
		    list_entry(elem, struct btrfsic_block_link,
			       collision_resolving_node);

		BUG_ON(NULL == l->block_ref_to);
		BUG_ON(NULL == l->block_ref_from);
		if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
		    l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
		    l->block_ref_from->dev_state->bdev == bdev_ref_from &&
		    l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
			return l;
	}

	return NULL;
}

static void btrfsic_dev_state_hashtable_init(
		struct btrfsic_dev_state_hashtable *h)
{
	int i;

	for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
		INIT_LIST_HEAD(h->table + i);
}

static void btrfsic_dev_state_hashtable_add(
		struct btrfsic_dev_state *ds,
		struct btrfsic_dev_state_hashtable *h)
{
	const unsigned int hashval =
	    (((unsigned int)((uintptr_t)ds->bdev)) &
	     (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));

	list_add(&ds->collision_resolving_node, h->table + hashval);
}

static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
{
	list_del(&ds->collision_resolving_node);
}

static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
		struct block_device *bdev,
		struct btrfsic_dev_state_hashtable *h)
{
	const unsigned int hashval =
	    (((unsigned int)((uintptr_t)bdev)) &
	     (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
	struct list_head *elem;

	list_for_each(elem, h->table + hashval) {
		struct btrfsic_dev_state *const ds =
		    list_entry(elem, struct btrfsic_dev_state,
			       collision_resolving_node);

		if (ds->bdev == bdev)
			return ds;
	}

	return NULL;
}

static int btrfsic_process_superblock(struct btrfsic_state *state,
				      struct btrfs_fs_devices *fs_devices)
{
	int ret = 0;
	struct btrfs_super_block *selected_super;
	struct list_head *dev_head = &fs_devices->devices;
	struct btrfs_device *device;
	struct btrfsic_dev_state *selected_dev_state = NULL;
	int pass;

	BUG_ON(NULL == state);
	selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
	if (NULL == selected_super) {
		printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
		return -1;
	}

	list_for_each_entry(device, dev_head, dev_list) {
		int i;
		struct btrfsic_dev_state *dev_state;

		if (!device->bdev || !device->name)
			continue;

		dev_state = btrfsic_dev_state_lookup(device->bdev);
		BUG_ON(NULL == dev_state);
		for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
			ret = btrfsic_process_superblock_dev_mirror(
					state, dev_state, device, i,
					&selected_dev_state, selected_super);
			if (0 != ret && 0 == i) {
				kfree(selected_super);
				return ret;
			}
		}
	}

	if (NULL == state->latest_superblock) {
		printk(KERN_INFO "btrfsic: no superblock found!\n");
		kfree(selected_super);
		return -1;
	}

	state->csum_size = btrfs_super_csum_size(selected_super);

	for (pass = 0; pass < 3; pass++) {
		int num_copies;
		int mirror_num;
		u64 next_bytenr;

		switch (pass) {
		case 0:
			next_bytenr = btrfs_super_root(selected_super);
			if (state->print_mask &
			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
				printk(KERN_INFO "root@%llu\n",
				       (unsigned long long)next_bytenr);
			break;
		case 1:
			next_bytenr = btrfs_super_chunk_root(selected_super);
			if (state->print_mask &
			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
				printk(KERN_INFO "chunk@%llu\n",
				       (unsigned long long)next_bytenr);
			break;
		case 2:
			next_bytenr = btrfs_super_log_root(selected_super);
			if (0 == next_bytenr)
				continue;
			if (state->print_mask &
			    BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
				printk(KERN_INFO "log@%llu\n",
				       (unsigned long long)next_bytenr);
			break;
		}

		num_copies =
		    btrfs_num_copies(state->root->fs_info,
				     next_bytenr, state->metablock_size);
		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
			       (unsigned long long)next_bytenr, num_copies);

		for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
			struct btrfsic_block *next_block;
			struct btrfsic_block_data_ctx tmp_next_block_ctx;
			struct btrfsic_block_link *l;

			ret = btrfsic_map_block(state, next_bytenr,
						state->metablock_size,
						&tmp_next_block_ctx,
						mirror_num);
			if (ret) {
				printk(KERN_INFO "btrfsic:"
				       " btrfsic_map_block(root @%llu,"
				       " mirror %d) failed!\n",
				       (unsigned long long)next_bytenr,
				       mirror_num);
				kfree(selected_super);
				return -1;
			}

			next_block = btrfsic_block_hashtable_lookup(
					tmp_next_block_ctx.dev->bdev,
					tmp_next_block_ctx.dev_bytenr,
					&state->block_hashtable);
			BUG_ON(NULL == next_block);

			l = btrfsic_block_link_hashtable_lookup(
					tmp_next_block_ctx.dev->bdev,
					tmp_next_block_ctx.dev_bytenr,
					state->latest_superblock->dev_state->
					bdev,
					state->latest_superblock->dev_bytenr,
					&state->block_link_hashtable);
			BUG_ON(NULL == l);

			ret = btrfsic_read_block(state, &tmp_next_block_ctx);
			if (ret < (int)PAGE_CACHE_SIZE) {
				printk(KERN_INFO
				       "btrfsic: read @logical %llu failed!\n",
				       (unsigned long long)
				       tmp_next_block_ctx.start);
				btrfsic_release_block_ctx(&tmp_next_block_ctx);
				kfree(selected_super);
				return -1;
			}

			ret = btrfsic_process_metablock(state,
							next_block,
							&tmp_next_block_ctx,
							BTRFS_MAX_LEVEL + 3, 1);
			btrfsic_release_block_ctx(&tmp_next_block_ctx);
		}
	}

	kfree(selected_super);
	return ret;
}

static int btrfsic_process_superblock_dev_mirror(
		struct btrfsic_state *state,
		struct btrfsic_dev_state *dev_state,
		struct btrfs_device *device,
		int superblock_mirror_num,
		struct btrfsic_dev_state **selected_dev_state,
		struct btrfs_super_block *selected_super)
{
	struct btrfs_super_block *super_tmp;
	u64 dev_bytenr;
	struct buffer_head *bh;
	struct btrfsic_block *superblock_tmp;
	int pass;
	struct block_device *const superblock_bdev = device->bdev;

	/* super block bytenr is always the unmapped device bytenr */
	dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
	if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
		return -1;
	bh = __bread(superblock_bdev, dev_bytenr / 4096,
		     BTRFS_SUPER_INFO_SIZE);
	if (NULL == bh)
		return -1;
	super_tmp = (struct btrfs_super_block *)
	    (bh->b_data + (dev_bytenr & 4095));

	if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
	    strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
		    sizeof(super_tmp->magic)) ||
	    memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
	    btrfs_super_nodesize(super_tmp) != state->metablock_size ||
	    btrfs_super_leafsize(super_tmp) != state->metablock_size ||
	    btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
		brelse(bh);
		return 0;
	}

	superblock_tmp =
	    btrfsic_block_hashtable_lookup(superblock_bdev,
					   dev_bytenr,
					   &state->block_hashtable);
	if (NULL == superblock_tmp) {
		superblock_tmp = btrfsic_block_alloc();
		if (NULL == superblock_tmp) {
			printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
			brelse(bh);
			return -1;
		}
		/* for superblock, only the dev_bytenr makes sense */
		superblock_tmp->dev_bytenr = dev_bytenr;
		superblock_tmp->dev_state = dev_state;
		superblock_tmp->logical_bytenr = dev_bytenr;
		superblock_tmp->generation = btrfs_super_generation(super_tmp);
		superblock_tmp->is_metadata = 1;
		superblock_tmp->is_superblock = 1;
		superblock_tmp->is_iodone = 1;
		superblock_tmp->never_written = 0;
		superblock_tmp->mirror_num = 1 + superblock_mirror_num;
		if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
			printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
				     " @%llu (%s/%llu/%d)\n",
				     superblock_bdev,
				     rcu_str_deref(device->name),
				     (unsigned long long)dev_bytenr,
				     dev_state->name,
				     (unsigned long long)dev_bytenr,
				     superblock_mirror_num);
		list_add(&superblock_tmp->all_blocks_node,
			 &state->all_blocks_list);
		btrfsic_block_hashtable_add(superblock_tmp,
					    &state->block_hashtable);
	}

	/* select the one with the highest generation field */
	if (btrfs_super_generation(super_tmp) >
	    state->max_superblock_generation ||
	    0 == state->max_superblock_generation) {
		memcpy(selected_super, super_tmp, sizeof(*selected_super));
		*selected_dev_state = dev_state;
		state->max_superblock_generation =
		    btrfs_super_generation(super_tmp);
		state->latest_superblock = superblock_tmp;
	}

	for (pass = 0; pass < 3; pass++) {
		u64 next_bytenr;
		int num_copies;
		int mirror_num;
		const char *additional_string = NULL;
		struct btrfs_disk_key tmp_disk_key;

		tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
		tmp_disk_key.offset = 0;
		switch (pass) {
		case 0:
			tmp_disk_key.objectid =
			    cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
			additional_string = "initial root ";
			next_bytenr = btrfs_super_root(super_tmp);
			break;
		case 1:
			tmp_disk_key.objectid =
			    cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
			additional_string = "initial chunk ";
			next_bytenr = btrfs_super_chunk_root(super_tmp);
			break;
		case 2:
			tmp_disk_key.objectid =
			    cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
			additional_string = "initial log ";
			next_bytenr = btrfs_super_log_root(super_tmp);
			if (0 == next_bytenr)
				continue;
			break;
		}

		num_copies =
		    btrfs_num_copies(state->root->fs_info,
				     next_bytenr, state->metablock_size);
		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
			       (unsigned long long)next_bytenr, num_copies);
		for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
			struct btrfsic_block *next_block;
			struct btrfsic_block_data_ctx tmp_next_block_ctx;
			struct btrfsic_block_link *l;

			if (btrfsic_map_block(state, next_bytenr,
					      state->metablock_size,
					      &tmp_next_block_ctx,
					      mirror_num)) {
				printk(KERN_INFO "btrfsic: btrfsic_map_block("
				       "bytenr @%llu, mirror %d) failed!\n",
				       (unsigned long long)next_bytenr,
				       mirror_num);
				brelse(bh);
				return -1;
			}

			next_block = btrfsic_block_lookup_or_add(
					state, &tmp_next_block_ctx,
					additional_string, 1, 1, 0,
					mirror_num, NULL);
			if (NULL == next_block) {
				btrfsic_release_block_ctx(&tmp_next_block_ctx);
				brelse(bh);
				return -1;
			}

			next_block->disk_key = tmp_disk_key;
			next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
			l = btrfsic_block_link_lookup_or_add(
					state, &tmp_next_block_ctx,
					next_block, superblock_tmp,
					BTRFSIC_GENERATION_UNKNOWN);
			btrfsic_release_block_ctx(&tmp_next_block_ctx);
			if (NULL == l) {
				brelse(bh);
				return -1;
			}
		}
	}
	if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
		btrfsic_dump_tree_sub(state, superblock_tmp, 0);

	brelse(bh);
	return 0;
}

static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
{
	struct btrfsic_stack_frame *sf;

	sf = kzalloc(sizeof(*sf), GFP_NOFS);
	if (NULL == sf)
		printk(KERN_INFO "btrfsic: alloc memory failed!\n");
	else
		sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
	return sf;
}

static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
{
	BUG_ON(!(NULL == sf ||
		 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
	kfree(sf);
}

static int btrfsic_process_metablock(
		struct btrfsic_state *state,
		struct btrfsic_block *const first_block,
		struct btrfsic_block_data_ctx *const first_block_ctx,
		int first_limit_nesting, int force_iodone_flag)
{
	struct btrfsic_stack_frame initial_stack_frame = { 0 };
	struct btrfsic_stack_frame *sf;
	struct btrfsic_stack_frame *next_stack;
	struct btrfs_header *const first_hdr =
		(struct btrfs_header *)first_block_ctx->datav[0];

	BUG_ON(!first_hdr);
	sf = &initial_stack_frame;
	sf->error = 0;
	sf->i = -1;
	sf->limit_nesting = first_limit_nesting;
	sf->block = first_block;
	sf->block_ctx = first_block_ctx;
	sf->next_block = NULL;
	sf->hdr = first_hdr;
	sf->prev = NULL;

continue_with_new_stack_frame:
	sf->block->generation = le64_to_cpu(sf->hdr->generation);
	if (0 == sf->hdr->level) {
		struct btrfs_leaf *const leafhdr =
		    (struct btrfs_leaf *)sf->hdr;

		if (-1 == sf->i) {
			sf->nr = le32_to_cpu(leafhdr->header.nritems);

			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
				printk(KERN_INFO
				       "leaf %llu items %d generation %llu"
				       " owner %llu\n",
				       (unsigned long long)
				       sf->block_ctx->start,
				       sf->nr,
				       (unsigned long long)
				       le64_to_cpu(leafhdr->header.generation),
				       (unsigned long long)
				       le64_to_cpu(leafhdr->header.owner));
		}

continue_with_current_leaf_stack_frame:
		if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
			sf->i++;
			sf->num_copies = 0;
		}

		if (sf->i < sf->nr) {
			struct btrfs_item disk_item;
			u32 disk_item_offset =
				(uintptr_t)(leafhdr->items + sf->i) -
				(uintptr_t)leafhdr;
			struct btrfs_disk_key *disk_key;
			u8 type;
			u32 item_offset;
			u32 item_size;

			if (disk_item_offset + sizeof(struct btrfs_item) >
			    sf->block_ctx->len) {
leaf_item_out_of_bounce_error:
				printk(KERN_INFO
				       "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
				       sf->block_ctx->start,
				       sf->block_ctx->dev->name);
				goto one_stack_frame_backwards;
			}
			btrfsic_read_from_block_data(sf->block_ctx,
						     &disk_item,
						     disk_item_offset,
						     sizeof(struct btrfs_item));
			item_offset = le32_to_cpu(disk_item.offset);
			item_size = le32_to_cpu(disk_item.size);
			disk_key = &disk_item.key;
			type = disk_key->type;

			if (BTRFS_ROOT_ITEM_KEY == type) {
				struct btrfs_root_item root_item;
				u32 root_item_offset;
				u64 next_bytenr;

				root_item_offset = item_offset +
					offsetof(struct btrfs_leaf, items);
				if (root_item_offset + item_size >
				    sf->block_ctx->len)
					goto leaf_item_out_of_bounce_error;
				btrfsic_read_from_block_data(
					sf->block_ctx, &root_item,
					root_item_offset,
					item_size);
				next_bytenr = le64_to_cpu(root_item.bytenr);

				sf->error =
				    btrfsic_create_link_to_next_block(
						state,
						sf->block,
						sf->block_ctx,
						next_bytenr,
						sf->limit_nesting,
						&sf->next_block_ctx,
						&sf->next_block,
						force_iodone_flag,
						&sf->num_copies,
						&sf->mirror_num,
						disk_key,
						le64_to_cpu(root_item.
						generation));
				if (sf->error)
					goto one_stack_frame_backwards;

				if (NULL != sf->next_block) {
					struct btrfs_header *const next_hdr =
					    (struct btrfs_header *)
					    sf->next_block_ctx.datav[0];

					next_stack =
					    btrfsic_stack_frame_alloc();
					if (NULL == next_stack) {
						btrfsic_release_block_ctx(
								&sf->
								next_block_ctx);
						goto one_stack_frame_backwards;
					}

					next_stack->i = -1;
					next_stack->block = sf->next_block;
					next_stack->block_ctx =
					    &sf->next_block_ctx;
					next_stack->next_block = NULL;
					next_stack->hdr = next_hdr;
					next_stack->limit_nesting =
					    sf->limit_nesting - 1;
					next_stack->prev = sf;
					sf = next_stack;
					goto continue_with_new_stack_frame;
				}
			} else if (BTRFS_EXTENT_DATA_KEY == type &&
				   state->include_extent_data) {
				sf->error = btrfsic_handle_extent_data(
						state,
						sf->block,
						sf->block_ctx,
						item_offset,
						force_iodone_flag);
				if (sf->error)
					goto one_stack_frame_backwards;
			}

			goto continue_with_current_leaf_stack_frame;
		}
	} else {
		struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;

		if (-1 == sf->i) {
			sf->nr = le32_to_cpu(nodehdr->header.nritems);

			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
				printk(KERN_INFO "node %llu level %d items %d"
				       " generation %llu owner %llu\n",
				       (unsigned long long)
				       sf->block_ctx->start,
				       nodehdr->header.level, sf->nr,
				       (unsigned long long)
				       le64_to_cpu(nodehdr->header.generation),
				       (unsigned long long)
				       le64_to_cpu(nodehdr->header.owner));
		}

continue_with_current_node_stack_frame:
		if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
			sf->i++;
			sf->num_copies = 0;
		}

		if (sf->i < sf->nr) {
			struct btrfs_key_ptr key_ptr;
			u32 key_ptr_offset;
			u64 next_bytenr;

			key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
					  (uintptr_t)nodehdr;
			if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
			    sf->block_ctx->len) {
				printk(KERN_INFO
				       "btrfsic: node item out of bounce at logical %llu, dev %s\n",
				       sf->block_ctx->start,
				       sf->block_ctx->dev->name);
				goto one_stack_frame_backwards;
			}
			btrfsic_read_from_block_data(
				sf->block_ctx, &key_ptr, key_ptr_offset,
				sizeof(struct btrfs_key_ptr));
			next_bytenr = le64_to_cpu(key_ptr.blockptr);

			sf->error = btrfsic_create_link_to_next_block(
					state,
					sf->block,
					sf->block_ctx,
					next_bytenr,
					sf->limit_nesting,
					&sf->next_block_ctx,
					&sf->next_block,
					force_iodone_flag,
					&sf->num_copies,
					&sf->mirror_num,
					&key_ptr.key,
					le64_to_cpu(key_ptr.generation));
			if (sf->error)
				goto one_stack_frame_backwards;

			if (NULL != sf->next_block) {
				struct btrfs_header *const next_hdr =
				    (struct btrfs_header *)
				    sf->next_block_ctx.datav[0];

				next_stack = btrfsic_stack_frame_alloc();
				if (NULL == next_stack)
					goto one_stack_frame_backwards;

				next_stack->i = -1;
				next_stack->block = sf->next_block;
				next_stack->block_ctx = &sf->next_block_ctx;
				next_stack->next_block = NULL;
				next_stack->hdr = next_hdr;
				next_stack->limit_nesting =
				    sf->limit_nesting - 1;
				next_stack->prev = sf;
				sf = next_stack;
				goto continue_with_new_stack_frame;
			}

			goto continue_with_current_node_stack_frame;
		}
	}

one_stack_frame_backwards:
	if (NULL != sf->prev) {
		struct btrfsic_stack_frame *const prev = sf->prev;

		/* the one for the initial block is freed in the caller */
		btrfsic_release_block_ctx(sf->block_ctx);

		if (sf->error) {
			prev->error = sf->error;
			btrfsic_stack_frame_free(sf);
			sf = prev;
			goto one_stack_frame_backwards;
		}

		btrfsic_stack_frame_free(sf);
		sf = prev;
		goto continue_with_new_stack_frame;
	} else {
		BUG_ON(&initial_stack_frame != sf);
	}

	return sf->error;
}

static void btrfsic_read_from_block_data(
	struct btrfsic_block_data_ctx *block_ctx,
	void *dstv, u32 offset, size_t len)
{
	size_t cur;
	size_t offset_in_page;
	char *kaddr;
	char *dst = (char *)dstv;
	size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
	unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;

	WARN_ON(offset + len > block_ctx->len);
	offset_in_page = (start_offset + offset) &
			 ((unsigned long)PAGE_CACHE_SIZE - 1);

	while (len > 0) {
		cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
		BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >>
			    PAGE_CACHE_SHIFT);
		kaddr = block_ctx->datav[i];
		memcpy(dst, kaddr + offset_in_page, cur);

		dst += cur;
		len -= cur;
		offset_in_page = 0;
		i++;
	}
}

static int btrfsic_create_link_to_next_block(
		struct btrfsic_state *state,
		struct btrfsic_block *block,
		struct btrfsic_block_data_ctx *block_ctx,
		u64 next_bytenr,
		int limit_nesting,
		struct btrfsic_block_data_ctx *next_block_ctx,
		struct btrfsic_block **next_blockp,
		int force_iodone_flag,
		int *num_copiesp, int *mirror_nump,
		struct btrfs_disk_key *disk_key,
		u64 parent_generation)
{
	struct btrfsic_block *next_block = NULL;
	int ret;
	struct btrfsic_block_link *l;
	int did_alloc_block_link;
	int block_was_created;

	*next_blockp = NULL;
	if (0 == *num_copiesp) {
		*num_copiesp =
		    btrfs_num_copies(state->root->fs_info,
				     next_bytenr, state->metablock_size);
		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
			       (unsigned long long)next_bytenr, *num_copiesp);
		*mirror_nump = 1;
	}

	if (*mirror_nump > *num_copiesp)
		return 0;

	if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
		printk(KERN_INFO
		       "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
		       *mirror_nump);
	ret = btrfsic_map_block(state, next_bytenr,
				state->metablock_size,
				next_block_ctx, *mirror_nump);
	if (ret) {
		printk(KERN_INFO
		       "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
		       (unsigned long long)next_bytenr, *mirror_nump);
		btrfsic_release_block_ctx(next_block_ctx);
		*next_blockp = NULL;
		return -1;
	}

	next_block = btrfsic_block_lookup_or_add(state,
						 next_block_ctx, "referenced ",
						 1, force_iodone_flag,
						 !force_iodone_flag,
						 *mirror_nump,
						 &block_was_created);
	if (NULL == next_block) {
		btrfsic_release_block_ctx(next_block_ctx);
		*next_blockp = NULL;
		return -1;
	}
	if (block_was_created) {
		l = NULL;
		next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
	} else {
		if (next_block->logical_bytenr != next_bytenr &&
		    !(!next_block->is_metadata &&
		      0 == next_block->logical_bytenr)) {
			printk(KERN_INFO
			       "Referenced block @%llu (%s/%llu/%d)"
			       " found in hash table, %c,"
			       " bytenr mismatch (!= stored %llu).\n",
			       (unsigned long long)next_bytenr,
			       next_block_ctx->dev->name,
			       (unsigned long long)next_block_ctx->dev_bytenr,
			       *mirror_nump,
			       btrfsic_get_block_type(state, next_block),
			       (unsigned long long)next_block->logical_bytenr);
		} else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
			printk(KERN_INFO
			       "Referenced block @%llu (%s/%llu/%d)"
			       " found in hash table, %c.\n",
			       (unsigned long long)next_bytenr,
			       next_block_ctx->dev->name,
			       (unsigned long long)next_block_ctx->dev_bytenr,
			       *mirror_nump,
			       btrfsic_get_block_type(state, next_block));
		next_block->logical_bytenr = next_bytenr;

		next_block->mirror_num = *mirror_nump;
		l = btrfsic_block_link_hashtable_lookup(
				next_block_ctx->dev->bdev,
				next_block_ctx->dev_bytenr,
				block_ctx->dev->bdev,
				block_ctx->dev_bytenr,
				&state->block_link_hashtable);
	}

	next_block->disk_key = *disk_key;
	if (NULL == l) {
		l = btrfsic_block_link_alloc();
		if (NULL == l) {
			printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
			btrfsic_release_block_ctx(next_block_ctx);
			*next_blockp = NULL;
			return -1;
		}

		did_alloc_block_link = 1;
		l->block_ref_to = next_block;
		l->block_ref_from = block;
		l->ref_cnt = 1;
		l->parent_generation = parent_generation;

		if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
			btrfsic_print_add_link(state, l);

		list_add(&l->node_ref_to, &block->ref_to_list);
		list_add(&l->node_ref_from, &next_block->ref_from_list);

		btrfsic_block_link_hashtable_add(l,
						 &state->block_link_hashtable);
	} else {
		did_alloc_block_link = 0;
		if (0 == limit_nesting) {
			l->ref_cnt++;
			l->parent_generation = parent_generation;
			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
				btrfsic_print_add_link(state, l);
		}
	}

	if (limit_nesting > 0 && did_alloc_block_link) {
		ret = btrfsic_read_block(state, next_block_ctx);
		if (ret < (int)next_block_ctx->len) {
			printk(KERN_INFO
			       "btrfsic: read block @logical %llu failed!\n",
			       (unsigned long long)next_bytenr);
			btrfsic_release_block_ctx(next_block_ctx);
			*next_blockp = NULL;
			return -1;
		}

		*next_blockp = next_block;
	} else {
		*next_blockp = NULL;
	}
	(*mirror_nump)++;

	return 0;
}

static int btrfsic_handle_extent_data(
		struct btrfsic_state *state,
		struct btrfsic_block *block,
		struct btrfsic_block_data_ctx *block_ctx,
		u32 item_offset, int force_iodone_flag)
{
	int ret;
	struct btrfs_file_extent_item file_extent_item;
	u64 file_extent_item_offset;
	u64 next_bytenr;
	u64 num_bytes;
	u64 generation;
	struct btrfsic_block_link *l;

	file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
				  item_offset;
	if (file_extent_item_offset +
	    offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
	    block_ctx->len) {
		printk(KERN_INFO
		       "btrfsic: file item out of bounce at logical %llu, dev %s\n",
		       block_ctx->start, block_ctx->dev->name);
		return -1;
	}

	btrfsic_read_from_block_data(block_ctx, &file_extent_item,
		file_extent_item_offset,
		offsetof(struct btrfs_file_extent_item, disk_num_bytes));
	if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
	    ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) {
		if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
			printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
			       file_extent_item.type,
			       (unsigned long long)
			       le64_to_cpu(file_extent_item.disk_bytenr));
		return 0;
	}

	if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
	    block_ctx->len) {
		printk(KERN_INFO
		       "btrfsic: file item out of bounce at logical %llu, dev %s\n",
		       block_ctx->start, block_ctx->dev->name);
		return -1;
	}
	btrfsic_read_from_block_data(block_ctx, &file_extent_item,
				     file_extent_item_offset,
				     sizeof(struct btrfs_file_extent_item));
	next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
		      le64_to_cpu(file_extent_item.offset);
	generation = le64_to_cpu(file_extent_item.generation);
	num_bytes = le64_to_cpu(file_extent_item.num_bytes);
	generation = le64_to_cpu(file_extent_item.generation);

	if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
		printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
		       " offset = %llu, num_bytes = %llu\n",
		       file_extent_item.type,
		       (unsigned long long)
		       le64_to_cpu(file_extent_item.disk_bytenr),
		       (unsigned long long)le64_to_cpu(file_extent_item.offset),
		       (unsigned long long)num_bytes);
	while (num_bytes > 0) {
		u32 chunk_len;
		int num_copies;
		int mirror_num;

		if (num_bytes > state->datablock_size)
			chunk_len = state->datablock_size;
		else
			chunk_len = num_bytes;

		num_copies =
		    btrfs_num_copies(state->root->fs_info,
				     next_bytenr, state->datablock_size);
		if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
			printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
			       (unsigned long long)next_bytenr, num_copies);
		for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
			struct btrfsic_block_data_ctx next_block_ctx;
			struct btrfsic_block *next_block;
			int block_was_created;

			if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
				printk(KERN_INFO "btrfsic_handle_extent_data("
				       "mirror_num=%d)\n", mirror_num);
			if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
				printk(KERN_INFO
				       "\tdisk_bytenr = %llu, num_bytes %u\n",
				       (unsigned long long)next_bytenr,
				       chunk_len);
			ret = btrfsic_map_block(state, next_bytenr,
						chunk_len, &next_block_ctx,
						mirror_num);
			if (ret) {
				printk(KERN_INFO
				       "btrfsic: btrfsic_map_block(@%llu,"
				       " mirror=%d) failed!\n",
				       (unsigned long long)next_bytenr,
				       mirror_num);
				return -1;
			}

			next_block = btrfsic_block_lookup_or_add(
					state,
					&next_block_ctx,
					"referenced ",
					0,
					force_iodone_flag,
					!force_iodone_flag,
					mirror_num,
					&block_was_created);
			if (NULL == next_block) {
				printk(KERN_INFO
				       "btrfsic: error, kmalloc failed!\n");
				btrfsic_release_block_ctx(&next_block_ctx);
				return -1;
			}
			if (!block_was_created) {
				if (next_block->logical_bytenr != next_bytenr &&
				    !(!next_block->is_metadata &&
				      0 == next_block->logical_bytenr)) {
					printk(KERN_INFO
					       "Referenced block"
					       " @%llu (%s/%llu/%d)"
					       " found in hash table, D,"
					       " bytenr mismatch"
					       " (!= stored %llu).\n",
					       (unsigned long long)next_bytenr,
					       next_block_ctx.dev->name,
					       (unsigned long long)
					       next_block_ctx.dev_bytenr,
					       mirror_num,
					       (unsigned long long)
					       next_block->logical_bytenr);
				}
				next_block->logical_bytenr = next_bytenr;
				next_block->mirror_num = mirror_num;
			}

			l = btrfsic_block_link_lookup_or_add(state,
							     &next_block_ctx,
							     next_block, block,
							     generation);
			btrfsic_release_block_ctx(&next_block_ctx);
			if (NULL == l)
				return -1;
		}

		next_bytenr += chunk_len;
		num_bytes -= chunk_len;
	}

	return 0;
}

static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
			     struct btrfsic_block_data_ctx *block_ctx_out,
			     int mirror_num)
{
	int ret;
	u64 length;
	struct btrfs_bio *multi = NULL;
	struct btrfs_device *device;

	length = len;
	ret = btrfs_map_block(state->root->fs_info, READ,
			      bytenr, &length, &multi, mirror_num);

	if (ret) {
		block_ctx_out->start = 0;
		block_ctx_out->dev_bytenr = 0;
		block_ctx_out->len = 0;
		block_ctx_out->dev = NULL;
		block_ctx_out->datav = NULL;
		block_ctx_out->pagev = NULL;
		block_ctx_out->mem_to_free = NULL;

		return ret;
	}

	device = multi->stripes[0].dev;
	block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
	block_ctx_out->dev_bytenr = multi->stripes[0].physical;
	block_ctx_out->start = bytenr;
	block_ctx_out->len = len;
	block_ctx_out->datav = NULL;
	block_ctx_out->pagev = NULL;
	block_ctx_out->mem_to_free = NULL;

	kfree(multi);
	if (NULL == block_ctx_out->dev) {
		ret = -ENXIO;
		printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
	}

	return ret;
}

static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
				  u32 len, struct block_device *bdev,
				  struct btrfsic_block_data_ctx *block_ctx_out)
{
	block_ctx_out->dev = btrfsic_dev_state_lookup(bdev);
	block_ctx_out->dev_bytenr = bytenr;
	block_ctx_out->start = bytenr;
	block_ctx_out->len = len;
	block_ctx_out->datav = NULL;
	block_ctx_out->pagev = NULL;
	block_ctx_out->mem_to_free = NULL;
	if (NULL != block_ctx_out->dev) {
		return 0;
	} else {
		printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n");
		return -ENXIO;
	}
}

static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
{
	if (block_ctx->mem_to_free) {
		unsigned int num_pages;

		BUG_ON(!block_ctx->datav);
		BUG_ON(!block_ctx->pagev);
		num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
			    PAGE_CACHE_SHIFT;
		while (num_pages > 0) {
			num_pages--;
			if (block_ctx->datav[num_pages]) {
				kunmap(block_ctx->pagev[num_pages]);
				block_ctx->datav[num_pages] = NULL;
			}
			if (block_ctx->pagev[num_pages]) {
				__free_page(block_ctx->pagev[num_pages]);
				block_ctx->pagev[num_pages] = NULL;
			}
		}

		kfree(block_ctx->mem_to_free);
		block_ctx->mem_to_free = NULL;
		block_ctx->pagev = NULL;
		block_ctx->datav = NULL;
	}
}

static int btrfsic_read_block(struct btrfsic_state *state,
			      struct btrfsic_block_data_ctx *block_ctx)
{
	unsigned int num_pages;
	unsigned int i;
	u64 dev_bytenr;
	int ret;

	BUG_ON(block_ctx->datav);
	BUG_ON(block_ctx->pagev);
	BUG_ON(block_ctx->mem_to_free);
	if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
		printk(KERN_INFO
		       "btrfsic: read_block() with unaligned bytenr %llu\n",
		       (unsigned long long)block_ctx->dev_bytenr);
		return -1;
	}

	num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
		    PAGE_CACHE_SHIFT;
	block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
					  sizeof(*block_ctx->pagev)) *
					 num_pages, GFP_NOFS);
	if (!block_ctx->mem_to_free)
		return -1;
	block_ctx->datav = block_ctx->mem_to_free;
	block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
	for (i = 0; i < num_pages; i++) {
		block_ctx->pagev[i] = alloc_page(GFP_NOFS);
		if (!block_ctx->pagev[i])
			return -1;
	}

	dev_bytenr = block_ctx->dev_bytenr;
	for (i = 0; i < num_pages;) {
		struct bio *bio;
		unsigned int j;
		DECLARE_COMPLETION_ONSTACK(complete);

		bio = bio_alloc(GFP_NOFS, num_pages - i);
		if (!bio) {
			printk(KERN_INFO
			       "btrfsic: bio_alloc() for %u pages failed!\n",
			       num_pages - i);
			return -1;
		}
		bio->bi_bdev = block_ctx->dev->bdev;
		bio->bi_sector = dev_bytenr >> 9;
		bio->bi_end_io = btrfsic_complete_bio_end_io;
		bio->bi_private = &complete;

		for (j = i; j < num_pages; j++) {
			ret = bio_add_page(bio, block_ctx->pagev[j],
					   PAGE_CACHE_SIZE, 0);
			if (PAGE_CACHE_SIZE != ret)
				break;
		}
		if (j == i) {
			printk(KERN_INFO
			       "btrfsic: error, failed to add a single page!\n");
			return -1;
		}
		submit_bio(READ, bio);

		/* this will also unplug the queue */
		wait_for_completion(&complete);

		if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
			printk(KERN_INFO
			       "btrfsic: read error at logical %llu dev %s!\n",
			       block_ctx->start, block_ctx->dev->name);
			bio_put(bio);
			return -1;
		}
		bio_put(bio);
		dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
		i = j;
	}
	for (i = 0; i < num_pages; i++) {
		block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
		if (!block_ctx->datav[i]) {
			printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
			       block_ctx->dev->name);
			return -1;
		}
	}

	return block_ctx->len;
}

static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
{
	complete((struct completion *)bio->bi_private);
}

static void btrfsic_dump_database(struct btrfsic_state *state)
{
	struct list_head *elem_all;

	BUG_ON(NULL == state);

	printk(KERN_INFO "all_blocks_list:\n");
	list_for_each(elem_all, &state->all_blocks_list) {
		const struct btrfsic_block *const b_all =
		    list_entry(elem_all, struct btrfsic_block,
			       all_blocks_node);
		struct list_head *elem_ref_to;
		struct list_head *elem_ref_from;

		printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
		       btrfsic_get_block_type(state, b_all),
		       (unsigned long long)b_all->logical_bytenr,
		       b_all->dev_state->name,
		       (unsigned long long)b_all->dev_bytenr,
		       b_all->mirror_num);

		list_for_each(elem_ref_to, &b_all->ref_to_list) {
			const struct btrfsic_block_link *const l =
			    list_entry(elem_ref_to,
				       struct btrfsic_block_link,
				       node_ref_to);

			printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
			       " refers %u* to"
			       " %c @%llu (%s/%llu/%d)\n",
			       btrfsic_get_block_type(state, b_all),
			       (unsigned long long)b_all->logical_bytenr,
			       b_all->dev_state->name,
			       (unsigned long long)b_all->dev_bytenr,
			       b_all->mirror_num,
			       l->ref_cnt,
			       btrfsic_get_block_type(state, l->block_ref_to),
			       (unsigned long long)
			       l->block_ref_to->logical_bytenr,
			       l->block_ref_to->dev_state->name,
			       (unsigned long long)l->block_ref_to->dev_bytenr,
			       l->block_ref_to->mirror_num);
		}

		list_for_each(elem_ref_from, &b_all->ref_from_list) {
			const struct btrfsic_block_link *const l =
			    list_entry(elem_ref_from,
				       struct btrfsic_block_link,
				       node_ref_from);

			printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
			       " is ref %u* from"
			       " %c @%llu (%s/%llu/%d)\n",
			       btrfsic_get_block_type(state, b_all),
			       (unsigned long long)b_all->logical_bytenr,
			       b_all->dev_state->name,
			       (unsigned long long)b_all->dev_bytenr,
			       b_all->mirror_num,
			       l->ref_cnt,
			       btrfsic_get_block_type(state, l->block_ref_from),
			       (unsigned long long)
			       l->block_ref_from->logical_bytenr,
			       l->block_ref_from->dev_state->name,
			       (unsigned long long)
			       l->block_ref_from->dev_bytenr,
			       l->block_ref_from->mirror_num);
		}

		printk(KERN_INFO "\n");
	}
}

/*
 * Test whether the disk block contains a tree block (leaf or node)
 * (note that this test fails for the super block)
 */
static int btrfsic_test_for_metadata(struct btrfsic_state *state,
				     char **datav, unsigned int num_pages)
{
	struct btrfs_header *h;
	u8 csum[BTRFS_CSUM_SIZE];
	u32 crc = ~(u32)0;
	unsigned int i;

	if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
		return 1; /* not metadata */
	num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
	h = (struct btrfs_header *)datav[0];

	if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
		return 1;

	for (i = 0; i < num_pages; i++) {
		u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
		size_t sublen = i ? PAGE_CACHE_SIZE :
				    (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);

		crc = crc32c(crc, data, sublen);
	}
	btrfs_csum_final(crc, csum);
	if (memcmp(csum, h->csum, state->csum_size))
		return 1;

	return 0; /* is metadata */
}

static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
					  u64 dev_bytenr, char **mapped_datav,
					  unsigned int num_pages,
					  struct bio *bio, int *bio_is_patched,
					  struct buffer_head *bh,