aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/amiflop.c60
-rw-r--r--drivers/block/ataflop.c50
-rw-r--r--drivers/block/cciss.c864
-rw-r--r--drivers/block/drbd/drbd_actlog.c41
-rw-r--r--drivers/block/drbd/drbd_bitmap.c2
-rw-r--r--drivers/block/drbd/drbd_int.h216
-rw-r--r--drivers/block/drbd/drbd_main.c593
-rw-r--r--drivers/block/drbd/drbd_nl.c270
-rw-r--r--drivers/block/drbd/drbd_proc.c34
-rw-r--r--drivers/block/drbd/drbd_receiver.c946
-rw-r--r--drivers/block/drbd/drbd_req.c165
-rw-r--r--drivers/block/drbd/drbd_req.h62
-rw-r--r--drivers/block/drbd/drbd_worker.c292
-rw-r--r--drivers/block/floppy.c66
-rw-r--r--drivers/block/loop.c101
15 files changed, 2390 insertions, 1372 deletions
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 4b852c962266..a1725e6488d3 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -115,8 +115,6 @@ static unsigned long int fd_def_df0 = FD_DD_3; /* default for df0 if it does
115module_param(fd_def_df0, ulong, 0); 115module_param(fd_def_df0, ulong, 0);
116MODULE_LICENSE("GPL"); 116MODULE_LICENSE("GPL");
117 117
118static struct request_queue *floppy_queue;
119
120/* 118/*
121 * Macros 119 * Macros
122 */ 120 */
@@ -165,6 +163,7 @@ static volatile int selected = -1; /* currently selected drive */
165static int writepending; 163static int writepending;
166static int writefromint; 164static int writefromint;
167static char *raw_buf; 165static char *raw_buf;
166static int fdc_queue;
168 167
169static DEFINE_SPINLOCK(amiflop_lock); 168static DEFINE_SPINLOCK(amiflop_lock);
170 169
@@ -1335,6 +1334,42 @@ static int get_track(int drive, int track)
1335 return -1; 1334 return -1;
1336} 1335}
1337 1336
1337/*
1338 * Round-robin between our available drives, doing one request from each
1339 */
1340static struct request *set_next_request(void)
1341{
1342 struct request_queue *q;
1343 int cnt = FD_MAX_UNITS;
1344 struct request *rq;
1345
1346 /* Find next queue we can dispatch from */
1347 fdc_queue = fdc_queue + 1;
1348 if (fdc_queue == FD_MAX_UNITS)
1349 fdc_queue = 0;
1350
1351 for(cnt = FD_MAX_UNITS; cnt > 0; cnt--) {
1352
1353 if (unit[fdc_queue].type->code == FD_NODRIVE) {
1354 if (++fdc_queue == FD_MAX_UNITS)
1355 fdc_queue = 0;
1356 continue;
1357 }
1358
1359 q = unit[fdc_queue].gendisk->queue;
1360 if (q) {
1361 rq = blk_fetch_request(q);
1362 if (rq)
1363 break;
1364 }
1365
1366 if (++fdc_queue == FD_MAX_UNITS)
1367 fdc_queue = 0;
1368 }
1369
1370 return rq;
1371}
1372
1338static void redo_fd_request(void) 1373static void redo_fd_request(void)
1339{ 1374{
1340 struct request *rq; 1375 struct request *rq;
@@ -1346,7 +1381,7 @@ static void redo_fd_request(void)
1346 int err; 1381 int err;
1347 1382
1348next_req: 1383next_req:
1349 rq = blk_fetch_request(floppy_queue); 1384 rq = set_next_request();
1350 if (!rq) { 1385 if (!rq) {
1351 /* Nothing left to do */ 1386 /* Nothing left to do */
1352 return; 1387 return;
@@ -1683,6 +1718,13 @@ static int __init fd_probe_drives(void)
1683 continue; 1718 continue;
1684 } 1719 }
1685 unit[drive].gendisk = disk; 1720 unit[drive].gendisk = disk;
1721
1722 disk->queue = blk_init_queue(do_fd_request, &amiflop_lock);
1723 if (!disk->queue) {
1724 unit[drive].type->code = FD_NODRIVE;
1725 continue;
1726 }
1727
1686 drives++; 1728 drives++;
1687 if ((unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL)) == NULL) { 1729 if ((unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL)) == NULL) {
1688 printk("no mem for "); 1730 printk("no mem for ");
@@ -1696,7 +1738,6 @@ static int __init fd_probe_drives(void)
1696 disk->fops = &floppy_fops; 1738 disk->fops = &floppy_fops;
1697 sprintf(disk->disk_name, "fd%d", drive); 1739 sprintf(disk->disk_name, "fd%d", drive);
1698 disk->private_data = &unit[drive]; 1740 disk->private_data = &unit[drive];
1699 disk->queue = floppy_queue;
1700 set_capacity(disk, 880*2); 1741 set_capacity(disk, 880*2);
1701 add_disk(disk); 1742 add_disk(disk);
1702 } 1743 }
@@ -1744,11 +1785,6 @@ static int __init amiga_floppy_probe(struct platform_device *pdev)
1744 goto out_irq2; 1785 goto out_irq2;
1745 } 1786 }
1746 1787
1747 ret = -ENOMEM;
1748 floppy_queue = blk_init_queue(do_fd_request, &amiflop_lock);
1749 if (!floppy_queue)
1750 goto out_queue;
1751
1752 ret = -ENODEV; 1788 ret = -ENODEV;
1753 if (fd_probe_drives() < 1) /* No usable drives */ 1789 if (fd_probe_drives() < 1) /* No usable drives */
1754 goto out_probe; 1790 goto out_probe;
@@ -1792,8 +1828,6 @@ static int __init amiga_floppy_probe(struct platform_device *pdev)
1792 return 0; 1828 return 0;
1793 1829
1794out_probe: 1830out_probe:
1795 blk_cleanup_queue(floppy_queue);
1796out_queue:
1797 free_irq(IRQ_AMIGA_CIAA_TB, NULL); 1831 free_irq(IRQ_AMIGA_CIAA_TB, NULL);
1798out_irq2: 1832out_irq2:
1799 free_irq(IRQ_AMIGA_DSKBLK, NULL); 1833 free_irq(IRQ_AMIGA_DSKBLK, NULL);
@@ -1811,9 +1845,12 @@ static int __exit amiga_floppy_remove(struct platform_device *pdev)
1811 1845
1812 for( i = 0; i < FD_MAX_UNITS; i++) { 1846 for( i = 0; i < FD_MAX_UNITS; i++) {
1813 if (unit[i].type->code != FD_NODRIVE) { 1847 if (unit[i].type->code != FD_NODRIVE) {
1848 struct request_queue *q = unit[i].gendisk->queue;
1814 del_gendisk(unit[i].gendisk); 1849 del_gendisk(unit[i].gendisk);
1815 put_disk(unit[i].gendisk); 1850 put_disk(unit[i].gendisk);
1816 kfree(unit[i].trackbuf); 1851 kfree(unit[i].trackbuf);
1852 if (q)
1853 blk_cleanup_queue(q);
1817 } 1854 }
1818 } 1855 }
1819 blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); 1856 blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
@@ -1821,7 +1858,6 @@ static int __exit amiga_floppy_remove(struct platform_device *pdev)
1821 free_irq(IRQ_AMIGA_DSKBLK, NULL); 1858 free_irq(IRQ_AMIGA_DSKBLK, NULL);
1822 custom.dmacon = DMAF_DISK; /* disable DMA */ 1859 custom.dmacon = DMAF_DISK; /* disable DMA */
1823 amiga_chip_free(raw_buf); 1860 amiga_chip_free(raw_buf);
1824 blk_cleanup_queue(floppy_queue);
1825 unregister_blkdev(FLOPPY_MAJOR, "fd"); 1861 unregister_blkdev(FLOPPY_MAJOR, "fd");
1826} 1862}
1827#endif 1863#endif
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 8c021bb7a991..4e4cc6c828cb 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -80,8 +80,8 @@
80#undef DEBUG 80#undef DEBUG
81 81
82static DEFINE_MUTEX(ataflop_mutex); 82static DEFINE_MUTEX(ataflop_mutex);
83static struct request_queue *floppy_queue;
84static struct request *fd_request; 83static struct request *fd_request;
84static int fdc_queue;
85 85
86/* Disk types: DD, HD, ED */ 86/* Disk types: DD, HD, ED */
87static struct atari_disk_type { 87static struct atari_disk_type {
@@ -1392,6 +1392,29 @@ static void setup_req_params( int drive )
1392 ReqTrack, ReqSector, (unsigned long)ReqData )); 1392 ReqTrack, ReqSector, (unsigned long)ReqData ));
1393} 1393}
1394 1394
1395/*
1396 * Round-robin between our available drives, doing one request from each
1397 */
1398static struct request *set_next_request(void)
1399{
1400 struct request_queue *q;
1401 int old_pos = fdc_queue;
1402 struct request *rq;
1403
1404 do {
1405 q = unit[fdc_queue].disk->queue;
1406 if (++fdc_queue == FD_MAX_UNITS)
1407 fdc_queue = 0;
1408 if (q) {
1409 rq = blk_fetch_request(q);
1410 if (rq)
1411 break;
1412 }
1413 } while (fdc_queue != old_pos);
1414
1415 return rq;
1416}
1417
1395 1418
1396static void redo_fd_request(void) 1419static void redo_fd_request(void)
1397{ 1420{
@@ -1406,7 +1429,7 @@ static void redo_fd_request(void)
1406 1429
1407repeat: 1430repeat:
1408 if (!fd_request) { 1431 if (!fd_request) {
1409 fd_request = blk_fetch_request(floppy_queue); 1432 fd_request = set_next_request();
1410 if (!fd_request) 1433 if (!fd_request)
1411 goto the_end; 1434 goto the_end;
1412 } 1435 }
@@ -1933,10 +1956,6 @@ static int __init atari_floppy_init (void)
1933 PhysTrackBuffer = virt_to_phys(TrackBuffer); 1956 PhysTrackBuffer = virt_to_phys(TrackBuffer);
1934 BufferDrive = BufferSide = BufferTrack = -1; 1957 BufferDrive = BufferSide = BufferTrack = -1;
1935 1958
1936 floppy_queue = blk_init_queue(do_fd_request, &ataflop_lock);
1937 if (!floppy_queue)
1938 goto Enomem;
1939
1940 for (i = 0; i < FD_MAX_UNITS; i++) { 1959 for (i = 0; i < FD_MAX_UNITS; i++) {
1941 unit[i].track = -1; 1960 unit[i].track = -1;
1942 unit[i].flags = 0; 1961 unit[i].flags = 0;
@@ -1945,7 +1964,10 @@ static int __init atari_floppy_init (void)
1945 sprintf(unit[i].disk->disk_name, "fd%d", i); 1964 sprintf(unit[i].disk->disk_name, "fd%d", i);
1946 unit[i].disk->fops = &floppy_fops; 1965 unit[i].disk->fops = &floppy_fops;
1947 unit[i].disk->private_data = &unit[i]; 1966 unit[i].disk->private_data = &unit[i];
1948 unit[i].disk->queue = floppy_queue; 1967 unit[i].disk->queue = blk_init_queue(do_fd_request,
1968 &ataflop_lock);
1969 if (!unit[i].disk->queue)
1970 goto Enomem;
1949 set_capacity(unit[i].disk, MAX_DISK_SIZE * 2); 1971 set_capacity(unit[i].disk, MAX_DISK_SIZE * 2);
1950 add_disk(unit[i].disk); 1972 add_disk(unit[i].disk);
1951 } 1973 }
@@ -1960,10 +1982,14 @@ static int __init atari_floppy_init (void)
1960 1982
1961 return 0; 1983 return 0;
1962Enomem: 1984Enomem:
1963 while (i--) 1985 while (i--) {
1986 struct request_queue *q = unit[i].disk->queue;
1987
1964 put_disk(unit[i].disk); 1988 put_disk(unit[i].disk);
1965 if (floppy_queue) 1989 if (q)
1966 blk_cleanup_queue(floppy_queue); 1990 blk_cleanup_queue(q);
1991 }
1992
1967 unregister_blkdev(FLOPPY_MAJOR, "fd"); 1993 unregister_blkdev(FLOPPY_MAJOR, "fd");
1968 return -ENOMEM; 1994 return -ENOMEM;
1969} 1995}
@@ -2012,12 +2038,14 @@ static void __exit atari_floppy_exit(void)
2012 int i; 2038 int i;
2013 blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); 2039 blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
2014 for (i = 0; i < FD_MAX_UNITS; i++) { 2040 for (i = 0; i < FD_MAX_UNITS; i++) {
2041 struct request_queue *q = unit[i].disk->queue;
2042
2015 del_gendisk(unit[i].disk); 2043 del_gendisk(unit[i].disk);
2016 put_disk(unit[i].disk); 2044 put_disk(unit[i].disk);
2045 blk_cleanup_queue(q);
2017 } 2046 }
2018 unregister_blkdev(FLOPPY_MAJOR, "fd"); 2047 unregister_blkdev(FLOPPY_MAJOR, "fd");
2019 2048
2020 blk_cleanup_queue(floppy_queue);
2021 del_timer_sync(&fd_timer); 2049 del_timer_sync(&fd_timer);
2022 atari_stram_free( DMABuffer ); 2050 atari_stram_free( DMABuffer );
2023} 2051}
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index c484c96e22a6..f09e6df15aa7 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -105,11 +105,12 @@ static const struct pci_device_id cciss_pci_device_id[] = {
105 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3249}, 105 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3249},
106 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324A}, 106 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324A},
107 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324B}, 107 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324B},
108 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3250}, 108 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3350},
109 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3251}, 109 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3351},
110 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3252}, 110 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3352},
111 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3253}, 111 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3353},
112 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3254}, 112 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3354},
113 {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSF, 0x103C, 0x3355},
113 {0,} 114 {0,}
114}; 115};
115 116
@@ -149,11 +150,12 @@ static struct board_type products[] = {
149 {0x3249103C, "Smart Array P812", &SA5_access}, 150 {0x3249103C, "Smart Array P812", &SA5_access},
150 {0x324A103C, "Smart Array P712m", &SA5_access}, 151 {0x324A103C, "Smart Array P712m", &SA5_access},
151 {0x324B103C, "Smart Array P711m", &SA5_access}, 152 {0x324B103C, "Smart Array P711m", &SA5_access},
152 {0x3250103C, "Smart Array", &SA5_access}, 153 {0x3350103C, "Smart Array", &SA5_access},
153 {0x3251103C, "Smart Array", &SA5_access}, 154 {0x3351103C, "Smart Array", &SA5_access},
154 {0x3252103C, "Smart Array", &SA5_access}, 155 {0x3352103C, "Smart Array", &SA5_access},
155 {0x3253103C, "Smart Array", &SA5_access}, 156 {0x3353103C, "Smart Array", &SA5_access},
156 {0x3254103C, "Smart Array", &SA5_access}, 157 {0x3354103C, "Smart Array", &SA5_access},
158 {0x3355103C, "Smart Array", &SA5_access},
157}; 159};
158 160
159/* How long to wait (in milliseconds) for board to go into simple mode */ 161/* How long to wait (in milliseconds) for board to go into simple mode */
@@ -1232,470 +1234,452 @@ static void check_ioctl_unit_attention(ctlr_info_t *h, CommandList_struct *c)
1232 c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) 1234 c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
1233 (void)check_for_unit_attention(h, c); 1235 (void)check_for_unit_attention(h, c);
1234} 1236}
1235/* 1237
1236 * ioctl 1238static int cciss_getpciinfo(ctlr_info_t *h, void __user *argp)
1237 */
1238static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
1239 unsigned int cmd, unsigned long arg)
1240{ 1239{
1241 struct gendisk *disk = bdev->bd_disk; 1240 cciss_pci_info_struct pciinfo;
1242 ctlr_info_t *h = get_host(disk);
1243 drive_info_struct *drv = get_drv(disk);
1244 void __user *argp = (void __user *)arg;
1245 1241
1246 dev_dbg(&h->pdev->dev, "cciss_ioctl: Called with cmd=%x %lx\n", 1242 if (!argp)
1247 cmd, arg); 1243 return -EINVAL;
1248 switch (cmd) { 1244 pciinfo.domain = pci_domain_nr(h->pdev->bus);
1249 case CCISS_GETPCIINFO: 1245 pciinfo.bus = h->pdev->bus->number;
1250 { 1246 pciinfo.dev_fn = h->pdev->devfn;
1251 cciss_pci_info_struct pciinfo; 1247 pciinfo.board_id = h->board_id;
1252 1248 if (copy_to_user(argp, &pciinfo, sizeof(cciss_pci_info_struct)))
1253 if (!arg) 1249 return -EFAULT;
1254 return -EINVAL; 1250 return 0;
1255 pciinfo.domain = pci_domain_nr(h->pdev->bus); 1251}
1256 pciinfo.bus = h->pdev->bus->number;
1257 pciinfo.dev_fn = h->pdev->devfn;
1258 pciinfo.board_id = h->board_id;
1259 if (copy_to_user
1260 (argp, &pciinfo, sizeof(cciss_pci_info_struct)))
1261 return -EFAULT;
1262 return 0;
1263 }
1264 case CCISS_GETINTINFO:
1265 {
1266 cciss_coalint_struct intinfo;
1267 if (!arg)
1268 return -EINVAL;
1269 intinfo.delay =
1270 readl(&h->cfgtable->HostWrite.CoalIntDelay);
1271 intinfo.count =
1272 readl(&h->cfgtable->HostWrite.CoalIntCount);
1273 if (copy_to_user
1274 (argp, &intinfo, sizeof(cciss_coalint_struct)))
1275 return -EFAULT;
1276 return 0;
1277 }
1278 case CCISS_SETINTINFO:
1279 {
1280 cciss_coalint_struct intinfo;
1281 unsigned long flags;
1282 int i;
1283
1284 if (!arg)
1285 return -EINVAL;
1286 if (!capable(CAP_SYS_ADMIN))
1287 return -EPERM;
1288 if (copy_from_user
1289 (&intinfo, argp, sizeof(cciss_coalint_struct)))
1290 return -EFAULT;
1291 if ((intinfo.delay == 0) && (intinfo.count == 0))
1292 return -EINVAL;
1293 spin_lock_irqsave(&h->lock, flags);
1294 /* Update the field, and then ring the doorbell */
1295 writel(intinfo.delay,
1296 &(h->cfgtable->HostWrite.CoalIntDelay));
1297 writel(intinfo.count,
1298 &(h->cfgtable->HostWrite.CoalIntCount));
1299 writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
1300
1301 for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
1302 if (!(readl(h->vaddr + SA5_DOORBELL)
1303 & CFGTBL_ChangeReq))
1304 break;
1305 /* delay and try again */
1306 udelay(1000);
1307 }
1308 spin_unlock_irqrestore(&h->lock, flags);
1309 if (i >= MAX_IOCTL_CONFIG_WAIT)
1310 return -EAGAIN;
1311 return 0;
1312 }
1313 case CCISS_GETNODENAME:
1314 {
1315 NodeName_type NodeName;
1316 int i;
1317
1318 if (!arg)
1319 return -EINVAL;
1320 for (i = 0; i < 16; i++)
1321 NodeName[i] =
1322 readb(&h->cfgtable->ServerName[i]);
1323 if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
1324 return -EFAULT;
1325 return 0;
1326 }
1327 case CCISS_SETNODENAME:
1328 {
1329 NodeName_type NodeName;
1330 unsigned long flags;
1331 int i;
1332 1252
1333 if (!arg) 1253static int cciss_getintinfo(ctlr_info_t *h, void __user *argp)
1334 return -EINVAL; 1254{
1335 if (!capable(CAP_SYS_ADMIN)) 1255 cciss_coalint_struct intinfo;
1336 return -EPERM;
1337 1256
1338 if (copy_from_user 1257 if (!argp)
1339 (NodeName, argp, sizeof(NodeName_type))) 1258 return -EINVAL;
1340 return -EFAULT; 1259 intinfo.delay = readl(&h->cfgtable->HostWrite.CoalIntDelay);
1260 intinfo.count = readl(&h->cfgtable->HostWrite.CoalIntCount);
1261 if (copy_to_user
1262 (argp, &intinfo, sizeof(cciss_coalint_struct)))
1263 return -EFAULT;
1264 return 0;
1265}
1341 1266
1342 spin_lock_irqsave(&h->lock, flags); 1267static int cciss_setintinfo(ctlr_info_t *h, void __user *argp)
1268{
1269 cciss_coalint_struct intinfo;
1270 unsigned long flags;
1271 int i;
1343 1272
1344 /* Update the field, and then ring the doorbell */ 1273 if (!argp)
1345 for (i = 0; i < 16; i++) 1274 return -EINVAL;
1346 writeb(NodeName[i], 1275 if (!capable(CAP_SYS_ADMIN))
1347 &h->cfgtable->ServerName[i]); 1276 return -EPERM;
1277 if (copy_from_user(&intinfo, argp, sizeof(intinfo)))
1278 return -EFAULT;
1279 if ((intinfo.delay == 0) && (intinfo.count == 0))
1280 return -EINVAL;
1281 spin_lock_irqsave(&h->lock, flags);
1282 /* Update the field, and then ring the doorbell */
1283 writel(intinfo.delay, &(h->cfgtable->HostWrite.CoalIntDelay));
1284 writel(intinfo.count, &(h->cfgtable->HostWrite.CoalIntCount));
1285 writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
1348 1286
1349 writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); 1287 for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
1288 if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
1289 break;
1290 udelay(1000); /* delay and try again */
1291 }
1292 spin_unlock_irqrestore(&h->lock, flags);
1293 if (i >= MAX_IOCTL_CONFIG_WAIT)
1294 return -EAGAIN;
1295 return 0;
1296}
1350 1297
1351 for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) { 1298static int cciss_getnodename(ctlr_info_t *h, void __user *argp)
1352 if (!(readl(h->vaddr + SA5_DOORBELL) 1299{
1353 & CFGTBL_ChangeReq)) 1300 NodeName_type NodeName;
1354 break; 1301 int i;
1355 /* delay and try again */
1356 udelay(1000);
1357 }
1358 spin_unlock_irqrestore(&h->lock, flags);
1359 if (i >= MAX_IOCTL_CONFIG_WAIT)
1360 return -EAGAIN;
1361 return 0;
1362 }
1363 1302
1364 case CCISS_GETHEARTBEAT: 1303 if (!argp)
1365 { 1304 return -EINVAL;
1366 Heartbeat_type heartbeat; 1305 for (i = 0; i < 16; i++)
1367 1306 NodeName[i] = readb(&h->cfgtable->ServerName[i]);
1368 if (!arg) 1307 if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
1369 return -EINVAL; 1308 return -EFAULT;
1370 heartbeat = readl(&h->cfgtable->HeartBeat); 1309 return 0;
1371 if (copy_to_user 1310}
1372 (argp, &heartbeat, sizeof(Heartbeat_type)))
1373 return -EFAULT;
1374 return 0;
1375 }
1376 case CCISS_GETBUSTYPES:
1377 {
1378 BusTypes_type BusTypes;
1379
1380 if (!arg)
1381 return -EINVAL;
1382 BusTypes = readl(&h->cfgtable->BusTypes);
1383 if (copy_to_user
1384 (argp, &BusTypes, sizeof(BusTypes_type)))
1385 return -EFAULT;
1386 return 0;
1387 }
1388 case CCISS_GETFIRMVER:
1389 {
1390 FirmwareVer_type firmware;
1391 1311
1392 if (!arg) 1312static int cciss_setnodename(ctlr_info_t *h, void __user *argp)
1393 return -EINVAL; 1313{
1394 memcpy(firmware, h->firm_ver, 4); 1314 NodeName_type NodeName;
1315 unsigned long flags;
1316 int i;
1395 1317
1396 if (copy_to_user 1318 if (!argp)
1397 (argp, firmware, sizeof(FirmwareVer_type))) 1319 return -EINVAL;
1398 return -EFAULT; 1320 if (!capable(CAP_SYS_ADMIN))
1399 return 0; 1321 return -EPERM;
1400 } 1322 if (copy_from_user(NodeName, argp, sizeof(NodeName_type)))
1401 case CCISS_GETDRIVVER: 1323 return -EFAULT;
1402 { 1324 spin_lock_irqsave(&h->lock, flags);
1403 DriverVer_type DriverVer = DRIVER_VERSION; 1325 /* Update the field, and then ring the doorbell */
1326 for (i = 0; i < 16; i++)
1327 writeb(NodeName[i], &h->cfgtable->ServerName[i]);
1328 writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
1329 for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
1330 if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
1331 break;
1332 udelay(1000); /* delay and try again */
1333 }
1334 spin_unlock_irqrestore(&h->lock, flags);
1335 if (i >= MAX_IOCTL_CONFIG_WAIT)
1336 return -EAGAIN;
1337 return 0;
1338}
1404 1339
1405 if (!arg) 1340static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp)
1406 return -EINVAL; 1341{
1342 Heartbeat_type heartbeat;
1407 1343
1408 if (copy_to_user 1344 if (!argp)
1409 (argp, &DriverVer, sizeof(DriverVer_type))) 1345 return -EINVAL;
1410 return -EFAULT; 1346 heartbeat = readl(&h->cfgtable->HeartBeat);
1411 return 0; 1347 if (copy_to_user(argp, &heartbeat, sizeof(Heartbeat_type)))
1412 } 1348 return -EFAULT;
1349 return 0;
1350}
1413 1351
1414 case CCISS_DEREGDISK: 1352static int cciss_getbustypes(ctlr_info_t *h, void __user *argp)
1415 case CCISS_REGNEWD: 1353{
1416 case CCISS_REVALIDVOLS: 1354 BusTypes_type BusTypes;
1417 return rebuild_lun_table(h, 0, 1); 1355
1356 if (!argp)
1357 return -EINVAL;
1358 BusTypes = readl(&h->cfgtable->BusTypes);
1359 if (copy_to_user(argp, &BusTypes, sizeof(BusTypes_type)))
1360 return -EFAULT;
1361 return 0;
1362}
1418 1363
1419 case CCISS_GETLUNINFO:{ 1364static int cciss_getfirmver(ctlr_info_t *h, void __user *argp)
1420 LogvolInfo_struct luninfo; 1365{
1366 FirmwareVer_type firmware;
1421 1367
1422 memcpy(&luninfo.LunID, drv->LunID, 1368 if (!argp)
1423 sizeof(luninfo.LunID)); 1369 return -EINVAL;
1424 luninfo.num_opens = drv->usage_count; 1370 memcpy(firmware, h->firm_ver, 4);
1425 luninfo.num_parts = 0; 1371
1426 if (copy_to_user(argp, &luninfo, 1372 if (copy_to_user
1427 sizeof(LogvolInfo_struct))) 1373 (argp, firmware, sizeof(FirmwareVer_type)))
1428 return -EFAULT; 1374 return -EFAULT;
1429 return 0; 1375 return 0;
1376}
1377
1378static int cciss_getdrivver(ctlr_info_t *h, void __user *argp)
1379{
1380 DriverVer_type DriverVer = DRIVER_VERSION;
1381
1382 if (!argp)
1383 return -EINVAL;
1384 if (copy_to_user(argp, &DriverVer, sizeof(DriverVer_type)))
1385 return -EFAULT;
1386 return 0;
1387}
1388
1389static int cciss_getluninfo(ctlr_info_t *h,
1390 struct gendisk *disk, void __user *argp)
1391{
1392 LogvolInfo_struct luninfo;
1393 drive_info_struct *drv = get_drv(disk);
1394
1395 if (!argp)
1396 return -EINVAL;
1397 memcpy(&luninfo.LunID, drv->LunID, sizeof(luninfo.LunID));
1398 luninfo.num_opens = drv->usage_count;
1399 luninfo.num_parts = 0;
1400 if (copy_to_user(argp, &luninfo, sizeof(LogvolInfo_struct)))
1401 return -EFAULT;
1402 return 0;
1403}
1404
1405static int cciss_passthru(ctlr_info_t *h, void __user *argp)
1406{
1407 IOCTL_Command_struct iocommand;
1408 CommandList_struct *c;
1409 char *buff = NULL;
1410 u64bit temp64;
1411 DECLARE_COMPLETION_ONSTACK(wait);
1412
1413 if (!argp)
1414 return -EINVAL;
1415
1416 if (!capable(CAP_SYS_RAWIO))
1417 return -EPERM;
1418
1419 if (copy_from_user
1420 (&iocommand, argp, sizeof(IOCTL_Command_struct)))
1421 return -EFAULT;
1422 if ((iocommand.buf_size < 1) &&
1423 (iocommand.Request.Type.Direction != XFER_NONE)) {
1424 return -EINVAL;
1425 }
1426 if (iocommand.buf_size > 0) {
1427 buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
1428 if (buff == NULL)
1429 return -EFAULT;
1430 }
1431 if (iocommand.Request.Type.Direction == XFER_WRITE) {
1432 /* Copy the data into the buffer we created */
1433 if (copy_from_user(buff, iocommand.buf, iocommand.buf_size)) {
1434 kfree(buff);
1435 return -EFAULT;
1430 } 1436 }
1431 case CCISS_PASSTHRU: 1437 } else {
1432 { 1438 memset(buff, 0, iocommand.buf_size);
1433 IOCTL_Command_struct iocommand; 1439 }
1434 CommandList_struct *c; 1440 c = cmd_special_alloc(h);
1435 char *buff = NULL; 1441 if (!c) {
1436 u64bit temp64; 1442 kfree(buff);
1437 DECLARE_COMPLETION_ONSTACK(wait); 1443 return -ENOMEM;
1438 1444 }
1439 if (!arg) 1445 /* Fill in the command type */
1440 return -EINVAL; 1446 c->cmd_type = CMD_IOCTL_PEND;
1441 1447 /* Fill in Command Header */
1442 if (!capable(CAP_SYS_RAWIO)) 1448 c->Header.ReplyQueue = 0; /* unused in simple mode */
1443 return -EPERM; 1449 if (iocommand.buf_size > 0) { /* buffer to fill */
1444 1450 c->Header.SGList = 1;
1445 if (copy_from_user 1451 c->Header.SGTotal = 1;
1446 (&iocommand, argp, sizeof(IOCTL_Command_struct))) 1452 } else { /* no buffers to fill */
1447 return -EFAULT; 1453 c->Header.SGList = 0;
1448 if ((iocommand.buf_size < 1) && 1454 c->Header.SGTotal = 0;
1449 (iocommand.Request.Type.Direction != XFER_NONE)) { 1455 }
1450 return -EINVAL; 1456 c->Header.LUN = iocommand.LUN_info;
1451 } 1457 /* use the kernel address the cmd block for tag */
1452#if 0 /* 'buf_size' member is 16-bits, and always smaller than kmalloc limit */ 1458 c->Header.Tag.lower = c->busaddr;
1453 /* Check kmalloc limits */
1454 if (iocommand.buf_size > 128000)
1455 return -EINVAL;
1456#endif
1457 if (iocommand.buf_size > 0) {
1458 buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
1459 if (buff == NULL)
1460 return -EFAULT;
1461 }
1462 if (iocommand.Request.Type.Direction == XFER_WRITE) {
1463 /* Copy the data into the buffer we created */
1464 if (copy_from_user
1465 (buff, iocommand.buf, iocommand.buf_size)) {
1466 kfree(buff);
1467 return -EFAULT;
1468 }
1469 } else {
1470 memset(buff, 0, iocommand.buf_size);
1471 }
1472 c = cmd_special_alloc(h);
1473 if (!c) {
1474 kfree(buff);
1475 return -ENOMEM;
1476 }
1477 /* Fill in the command type */
1478 c->cmd_type = CMD_IOCTL_PEND;
1479 /* Fill in Command Header */
1480 c->Header.ReplyQueue = 0; /* unused in simple mode */
1481 if (iocommand.buf_size > 0) /* buffer to fill */
1482 {
1483 c->Header.SGList = 1;
1484 c->Header.SGTotal = 1;
1485 } else /* no buffers to fill */
1486 {
1487 c->Header.SGList = 0;
1488 c->Header.SGTotal = 0;
1489 }
1490 c->Header.LUN = iocommand.LUN_info;
1491 /* use the kernel address the cmd block for tag */
1492 c->Header.Tag.lower = c->busaddr;
1493
1494 /* Fill in Request block */
1495 c->Request = iocommand.Request;
1496
1497 /* Fill in the scatter gather information */
1498 if (iocommand.buf_size > 0) {
1499 temp64.val = pci_map_single(h->pdev, buff,
1500 iocommand.buf_size,
1501 PCI_DMA_BIDIRECTIONAL);
1502 c->SG[0].Addr.lower = temp64.val32.lower;
1503 c->SG[0].Addr.upper = temp64.val32.upper;
1504 c->SG[0].Len = iocommand.buf_size;
1505 c->SG[0].Ext = 0; /* we are not chaining */
1506 }
1507 c->waiting = &wait;
1508 1459
1509 enqueue_cmd_and_start_io(h, c); 1460 /* Fill in Request block */
1510 wait_for_completion(&wait); 1461 c->Request = iocommand.Request;
1511 1462
1512 /* unlock the buffers from DMA */ 1463 /* Fill in the scatter gather information */
1513 temp64.val32.lower = c->SG[0].Addr.lower; 1464 if (iocommand.buf_size > 0) {
1514 temp64.val32.upper = c->SG[0].Addr.upper; 1465 temp64.val = pci_map_single(h->pdev, buff,
1515 pci_unmap_single(h->pdev, (dma_addr_t) temp64.val, 1466 iocommand.buf_size, PCI_DMA_BIDIRECTIONAL);
1516 iocommand.buf_size, 1467 c->SG[0].Addr.lower = temp64.val32.lower;
1517 PCI_DMA_BIDIRECTIONAL); 1468 c->SG[0].Addr.upper = temp64.val32.upper;
1469 c->SG[0].Len = iocommand.buf_size;
1470 c->SG[0].Ext = 0; /* we are not chaining */
1471 }
1472 c->waiting = &wait;
1518 1473
1519 check_ioctl_unit_attention(h, c); 1474 enqueue_cmd_and_start_io(h, c);
1475 wait_for_completion(&wait);
1520 1476
1521 /* Copy the error information out */ 1477 /* unlock the buffers from DMA */
1522 iocommand.error_info = *(c->err_info); 1478 temp64.val32.lower = c->SG[0].Addr.lower;
1523 if (copy_to_user 1479 temp64.val32.upper = c->SG[0].Addr.upper;
1524 (argp, &iocommand, sizeof(IOCTL_Command_struct))) { 1480 pci_unmap_single(h->pdev, (dma_addr_t) temp64.val, iocommand.buf_size,
1525 kfree(buff); 1481 PCI_DMA_BIDIRECTIONAL);
1526 cmd_special_free(h, c); 1482 check_ioctl_unit_attention(h, c);
1527 return -EFAULT; 1483
1528 } 1484 /* Copy the error information out */
1485 iocommand.error_info = *(c->err_info);
1486 if (copy_to_user(argp, &iocommand, sizeof(IOCTL_Command_struct))) {
1487 kfree(buff);
1488 cmd_special_free(h, c);
1489 return -EFAULT;
1490 }
1529 1491
1530 if (iocommand.Request.Type.Direction == XFER_READ) { 1492 if (iocommand.Request.Type.Direction == XFER_READ) {
1531 /* Copy the data out of the buffer we created */ 1493 /* Copy the data out of the buffer we created */
1532 if (copy_to_user 1494 if (copy_to_user(iocommand.buf, buff, iocommand.buf_size)) {
1533 (iocommand.buf, buff, iocommand.buf_size)) {
1534 kfree(buff);
1535 cmd_special_free(h, c);
1536 return -EFAULT;
1537 }
1538 }
1539 kfree(buff); 1495 kfree(buff);
1540 cmd_special_free(h, c); 1496 cmd_special_free(h, c);
1541 return 0; 1497 return -EFAULT;
1542 } 1498 }
1543 case CCISS_BIG_PASSTHRU:{ 1499 }
1544 BIG_IOCTL_Command_struct *ioc; 1500 kfree(buff);
1545 CommandList_struct *c; 1501 cmd_special_free(h, c);
1546 unsigned char **buff = NULL; 1502 return 0;
1547 int *buff_size = NULL; 1503}
1548 u64bit temp64; 1504
1549 BYTE sg_used = 0; 1505static int cciss_bigpassthru(ctlr_info_t *h, void __user *argp)
1550 int status = 0; 1506{
1551 int i; 1507 BIG_IOCTL_Command_struct *ioc;
1552 DECLARE_COMPLETION_ONSTACK(wait); 1508 CommandList_struct *c;
1553 __u32 left; 1509 unsigned char **buff = NULL;
1554 __u32 sz; 1510 int *buff_size = NULL;
1555 BYTE __user *data_ptr; 1511 u64bit temp64;
1556 1512 BYTE sg_used = 0;
1557 if (!arg) 1513 int status = 0;
1558 return -EINVAL; 1514 int i;
1559 if (!capable(CAP_SYS_RAWIO)) 1515 DECLARE_COMPLETION_ONSTACK(wait);
1560 return -EPERM; 1516 __u32 left;
1561 ioc = (BIG_IOCTL_Command_struct *) 1517 __u32 sz;
1562 kmalloc(sizeof(*ioc), GFP_KERNEL); 1518 BYTE __user *data_ptr;
1563 if (!ioc) { 1519
1564 status = -ENOMEM; 1520 if (!argp)
1565 goto cleanup1; 1521 return -EINVAL;
1566 } 1522 if (!capable(CAP_SYS_RAWIO))
1567 if (copy_from_user(ioc, argp, sizeof(*ioc))) { 1523 return -EPERM;
1524 ioc = (BIG_IOCTL_Command_struct *)
1525 kmalloc(sizeof(*ioc), GFP_KERNEL);
1526 if (!ioc) {
1527 status = -ENOMEM;
1528 goto cleanup1;
1529 }
1530 if (copy_from_user(ioc, argp, sizeof(*ioc))) {
1531 status = -EFAULT;
1532 goto cleanup1;
1533 }
1534 if ((ioc->buf_size < 1) &&
1535 (ioc->Request.Type.Direction != XFER_NONE)) {
1536 status = -EINVAL;
1537 goto cleanup1;
1538 }
1539 /* Check kmalloc limits using all SGs */
1540 if (ioc->malloc_size > MAX_KMALLOC_SIZE) {
1541 status = -EINVAL;
1542 goto cleanup1;
1543 }
1544 if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) {
1545 status = -EINVAL;
1546 goto cleanup1;
1547 }
1548 buff = kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL);
1549 if (!buff) {
1550 status = -ENOMEM;
1551 goto cleanup1;
1552 }
1553 buff_size = kmalloc(MAXSGENTRIES * sizeof(int), GFP_KERNEL);
1554 if (!buff_size) {
1555 status = -ENOMEM;
1556 goto cleanup1;
1557 }
1558 left = ioc->buf_size;
1559 data_ptr = ioc->buf;
1560 while (left) {
1561 sz = (left > ioc->malloc_size) ? ioc->malloc_size : left;
1562 buff_size[sg_used] = sz;
1563 buff[sg_used] = kmalloc(sz, GFP_KERNEL);
1564 if (buff[sg_used] == NULL) {
1565 status = -ENOMEM;
1566 goto cleanup1;
1567 }
1568 if (ioc->Request.Type.Direction == XFER_WRITE) {
1569 if (copy_from_user(buff[sg_used], data_ptr, sz)) {
1568 status = -EFAULT; 1570 status = -EFAULT;
1569 goto cleanup1; 1571 goto cleanup1;
1570 } 1572 }
1571 if ((ioc->buf_size < 1) && 1573 } else {
1572 (ioc->Request.Type.Direction != XFER_NONE)) { 1574 memset(buff[sg_used], 0, sz);
1573 status = -EINVAL; 1575 }
1574 goto cleanup1; 1576 left -= sz;
1575 } 1577 data_ptr += sz;
1576 /* Check kmalloc limits using all SGs */ 1578 sg_used++;
1577 if (ioc->malloc_size > MAX_KMALLOC_SIZE) { 1579 }
1578 status = -EINVAL; 1580 c = cmd_special_alloc(h);
1579 goto cleanup1; 1581 if (!c) {
1580 } 1582 status = -ENOMEM;
1581 if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) { 1583 goto cleanup1;
1582 status = -EINVAL; 1584 }
1583 goto cleanup1; 1585 c->cmd_type = CMD_IOCTL_PEND;
1584 } 1586 c->Header.ReplyQueue = 0;
1585 buff = 1587 c->Header.SGList = sg_used;
1586 kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL); 1588 c->Header.SGTotal = sg_used;
1587 if (!buff) { 1589 c->Header.LUN = ioc->LUN_info;
1588 status = -ENOMEM; 1590 c->Header.Tag.lower = c->busaddr;
1589 goto cleanup1;
1590 }
1591 buff_size = kmalloc(MAXSGENTRIES * sizeof(int),
1592 GFP_KERNEL);
1593 if (!buff_size) {
1594 status = -ENOMEM;
1595 goto cleanup1;
1596 }
1597 left = ioc->buf_size;
1598 data_ptr = ioc->buf;
1599 while (left) {
1600 sz = (left >
1601 ioc->malloc_size) ? ioc->
1602 malloc_size : left;
1603 buff_size[sg_used] = sz;
1604 buff[sg_used] = kmalloc(sz, GFP_KERNEL);
1605 if (buff[sg_used] == NULL) {
1606 status = -ENOMEM;
1607 goto cleanup1;
1608 }
1609 if (ioc->Request.Type.Direction == XFER_WRITE) {
1610 if (copy_from_user
1611 (buff[sg_used], data_ptr, sz)) {
1612 status = -EFAULT;
1613 goto cleanup1;
1614 }
1615 } else {
1616 memset(buff[sg_used], 0, sz);
1617 }
1618 left -= sz;
1619 data_ptr += sz;
1620 sg_used++;
1621 }
1622 c = cmd_special_alloc(h);
1623 if (!c) {
1624 status = -ENOMEM;
1625 goto cleanup1;
1626 }
1627 c->cmd_type = CMD_IOCTL_PEND;
1628 c->Header.ReplyQueue = 0;
1629 1591
1630 if (ioc->buf_size > 0) { 1592 c->Request = ioc->Request;
1631 c->Header.SGList = sg_used; 1593 for (i = 0; i < sg_used; i++) {
1632 c->Header.SGTotal = sg_used; 1594 temp64.val = pci_map_single(h->pdev, buff[i], buff_size[i],
1633 } else { 1595 PCI_DMA_BIDIRECTIONAL);
1634 c->Header.SGList = 0; 1596 c->SG[i].Addr.lower = temp64.val32.lower;
1635 c->Header.SGTotal = 0; 1597 c->SG[i].Addr.upper = temp64.val32.upper;
1636 } 1598 c->SG[i].Len = buff_size[i];
1637 c->Header.LUN = ioc->LUN_info; 1599 c->SG[i].Ext = 0; /* we are not chaining */
1638 c->Header.Tag.lower = c->busaddr; 1600 }
1639 1601 c->waiting = &wait;
1640 c->Request = ioc->Request; 1602 enqueue_cmd_and_start_io(h, c);
1641 if (ioc->buf_size > 0) { 1603 wait_for_completion(&wait);
1642 for (i = 0; i < sg_used; i++) { 1604 /* unlock the buffers from DMA */
1643 temp64.val = 1605 for (i = 0; i < sg_used; i++) {
1644 pci_map_single(h->pdev, buff[i], 1606 temp64.val32.lower = c->SG[i].Addr.lower;
1645 buff_size[i], 1607 temp64.val32.upper = c->SG[i].Addr.upper;
1646 PCI_DMA_BIDIRECTIONAL); 1608 pci_unmap_single(h->pdev,
1647 c->SG[i].Addr.lower = 1609 (dma_addr_t) temp64.val, buff_size[i],
1648 temp64.val32.lower; 1610 PCI_DMA_BIDIRECTIONAL);
1649 c->SG[i].Addr.upper = 1611 }
1650 temp64.val32.upper; 1612 check_ioctl_unit_attention(h, c);
1651 c->SG[i].Len = buff_size[i]; 1613 /* Copy the error information out */
1652 c->SG[i].Ext = 0; /* we are not chaining */ 1614 ioc->error_info = *(c->err_info);
1653 } 1615 if (copy_to_user(argp, ioc, sizeof(*ioc))) {
1654 } 1616 cmd_special_free(h, c);
1655 c->waiting = &wait; 1617 status = -EFAULT;
1656 enqueue_cmd_and_start_io(h, c); 1618 goto cleanup1;
1657 wait_for_completion(&wait); 1619 }
1658 /* unlock the buffers from DMA */ 1620 if (ioc->Request.Type.Direction == XFER_READ) {
1659 for (i = 0; i < sg_used; i++) { 1621 /* Copy the data out of the buffer we created */
1660 temp64.val32.lower = c->SG[i].Addr.lower; 1622 BYTE __user *ptr = ioc->buf;
1661 temp64.val32.upper = c->SG[i].Addr.upper; 1623 for (i = 0; i < sg_used; i++) {
1662 pci_unmap_single(h->pdev, 1624 if (copy_to_user(ptr, buff[i], buff_size[i])) {
1663 (dma_addr_t) temp64.val, buff_size[i],
1664 PCI_DMA_BIDIRECTIONAL);
1665 }
1666 check_ioctl_unit_attention(h, c);
1667 /* Copy the error information out */
1668 ioc->error_info = *(c->err_info);
1669 if (copy_to_user(argp, ioc, sizeof(*ioc))) {
1670 cmd_special_free(h, c); 1625 cmd_special_free(h, c);
1671 status = -EFAULT; 1626 status = -EFAULT;
1672 goto cleanup1; 1627 goto cleanup1;
1673 } 1628 }
1674 if (ioc->Request.Type.Direction == XFER_READ) { 1629 ptr += buff_size[i];
1675 /* Copy the data out of the buffer we created */
1676 BYTE __user *ptr = ioc->buf;
1677 for (i = 0; i < sg_used; i++) {
1678 if (copy_to_user
1679 (ptr, buff[i], buff_size[i])) {
1680 cmd_special_free(h, c);
1681 status = -EFAULT;
1682 goto cleanup1;
1683 }
1684 ptr += buff_size[i];
1685 }
1686 }
1687 cmd_special_free(h, c);
1688 status = 0;
1689 cleanup1:
1690 if (buff) {
1691 for (i = 0; i < sg_used; i++)
1692 kfree(buff[i]);
1693 kfree(buff);
1694 }
1695 kfree(buff_size);
1696 kfree(ioc);
1697 return status;
1698 } 1630 }
1631 }
1632 cmd_special_free(h, c);
1633 status = 0;
1634cleanup1:
1635 if (buff) {
1636 for (i = 0; i < sg_used; i++)
1637 kfree(buff[i]);
1638 kfree(buff);
1639 }
1640 kfree(buff_size);
1641 kfree(ioc);
1642 return status;
1643}
1644
1645static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
1646 unsigned int cmd, unsigned long arg)
1647{
1648 struct gendisk *disk = bdev->bd_disk;
1649 ctlr_info_t *h = get_host(disk);
1650 void __user *argp = (void __user *)arg;
1651
1652 dev_dbg(&h->pdev->dev, "cciss_ioctl: Called with cmd=%x %lx\n",
1653 cmd, arg);
1654 switch (cmd) {
1655 case CCISS_GETPCIINFO:
1656 return cciss_getpciinfo(h, argp);
1657 case CCISS_GETINTINFO:
1658 return cciss_getintinfo(h, argp);
1659 case CCISS_SETINTINFO:
1660 return cciss_setintinfo(h, argp);
1661 case CCISS_GETNODENAME:
1662 return cciss_getnodename(h, argp);
1663 case CCISS_SETNODENAME:
1664 return cciss_setnodename(h, argp);
1665 case CCISS_GETHEARTBEAT:
1666 return cciss_getheartbeat(h, argp);
1667 case CCISS_GETBUSTYPES:
1668 return cciss_getbustypes(h, argp);
1669 case CCISS_GETFIRMVER:
1670 return cciss_getfirmver(h, argp);
1671 case CCISS_GETDRIVVER:
1672 return cciss_getdrivver(h, argp);
1673 case CCISS_DEREGDISK:
1674 case CCISS_REGNEWD:
1675 case CCISS_REVALIDVOLS:
1676 return rebuild_lun_table(h, 0, 1);
1677 case CCISS_GETLUNINFO:
1678 return cciss_getluninfo(h, disk, argp);
1679 case CCISS_PASSTHRU:
1680 return cciss_passthru(h, argp);
1681 case CCISS_BIG_PASSTHRU:
1682 return cciss_bigpassthru(h, argp);
1699 1683
1700 /* scsi_cmd_ioctl handles these, below, though some are not */ 1684 /* scsi_cmd_ioctl handles these, below, though some are not */
1701 /* very meaningful for cciss. SG_IO is the main one people want. */ 1685 /* very meaningful for cciss. SG_IO is the main one people want. */
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 9400845d602e..ac04ef97eac2 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -965,29 +965,30 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
965 * ok, (capacity & 7) != 0 sometimes, but who cares... 965 * ok, (capacity & 7) != 0 sometimes, but who cares...
966 * we count rs_{total,left} in bits, not sectors. 966 * we count rs_{total,left} in bits, not sectors.
967 */ 967 */
968 spin_lock_irqsave(&mdev->al_lock, flags);
969 count = drbd_bm_clear_bits(mdev, sbnr, ebnr); 968 count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
970 if (count) { 969 if (count && get_ldev(mdev)) {
971 /* we need the lock for drbd_try_clear_on_disk_bm */ 970 unsigned long now = jiffies;
972 if (jiffies - mdev->rs_mark_time > HZ*10) { 971 unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
973 /* should be rolling marks, 972 int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
974 * but we estimate only anyways. */ 973 if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
975 if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) && 974 unsigned long tw = drbd_bm_total_weight(mdev);
975 if (mdev->rs_mark_left[mdev->rs_last_mark] != tw &&
976 mdev->state.conn != C_PAUSED_SYNC_T && 976 mdev->state.conn != C_PAUSED_SYNC_T &&
977 mdev->state.conn != C_PAUSED_SYNC_S) { 977 mdev->state.conn != C_PAUSED_SYNC_S) {
978 mdev->rs_mark_time = jiffies; 978 mdev->rs_mark_time[next] = now;
979 mdev->rs_mark_left = drbd_bm_total_weight(mdev); 979 mdev->rs_mark_left[next] = tw;
980 mdev->rs_last_mark = next;
980 } 981 }
981 } 982 }
982 if (get_ldev(mdev)) { 983 spin_lock_irqsave(&mdev->al_lock, flags);
983 drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE); 984 drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
984 put_ldev(mdev); 985 spin_unlock_irqrestore(&mdev->al_lock, flags);
985 } 986
986 /* just wake_up unconditional now, various lc_chaged(), 987 /* just wake_up unconditional now, various lc_chaged(),
987 * lc_put() in drbd_try_clear_on_disk_bm(). */ 988 * lc_put() in drbd_try_clear_on_disk_bm(). */
988 wake_up = 1; 989 wake_up = 1;
990 put_ldev(mdev);
989 } 991 }
990 spin_unlock_irqrestore(&mdev->al_lock, flags);
991 if (wake_up) 992 if (wake_up)
992 wake_up(&mdev->al_wait); 993 wake_up(&mdev->al_wait);
993} 994}
@@ -1118,7 +1119,7 @@ static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
1118 * @mdev: DRBD device. 1119 * @mdev: DRBD device.
1119 * @sector: The sector number. 1120 * @sector: The sector number.
1120 * 1121 *
1121 * This functions sleeps on al_wait. Returns 1 on success, 0 if interrupted. 1122 * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted.
1122 */ 1123 */
1123int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector) 1124int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
1124{ 1125{
@@ -1129,10 +1130,10 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
1129 sig = wait_event_interruptible(mdev->al_wait, 1130 sig = wait_event_interruptible(mdev->al_wait,
1130 (bm_ext = _bme_get(mdev, enr))); 1131 (bm_ext = _bme_get(mdev, enr)));
1131 if (sig) 1132 if (sig)
1132 return 0; 1133 return -EINTR;
1133 1134
1134 if (test_bit(BME_LOCKED, &bm_ext->flags)) 1135 if (test_bit(BME_LOCKED, &bm_ext->flags))
1135 return 1; 1136 return 0;
1136 1137
1137 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { 1138 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
1138 sig = wait_event_interruptible(mdev->al_wait, 1139 sig = wait_event_interruptible(mdev->al_wait,
@@ -1145,13 +1146,11 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
1145 wake_up(&mdev->al_wait); 1146 wake_up(&mdev->al_wait);
1146 } 1147 }
1147 spin_unlock_irq(&mdev->al_lock); 1148 spin_unlock_irq(&mdev->al_lock);
1148 return 0; 1149 return -EINTR;
1149 } 1150 }
1150 } 1151 }
1151
1152 set_bit(BME_LOCKED, &bm_ext->flags); 1152 set_bit(BME_LOCKED, &bm_ext->flags);
1153 1153 return 0;
1154 return 1;
1155} 1154}
1156 1155
1157/** 1156/**
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index e3f88d6e1412..fd42832f785b 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -569,7 +569,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
569 * 569 *
570 * maybe bm_set should be atomic_t ? 570 * maybe bm_set should be atomic_t ?
571 */ 571 */
572static unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev) 572unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
573{ 573{
574 struct drbd_bitmap *b = mdev->bitmap; 574 struct drbd_bitmap *b = mdev->bitmap;
575 unsigned long s; 575 unsigned long s;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 352441b0f92f..c07c370c4c82 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -337,13 +337,25 @@ static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
337 * NOTE that the payload starts at a long aligned offset, 337 * NOTE that the payload starts at a long aligned offset,
338 * regardless of 32 or 64 bit arch! 338 * regardless of 32 or 64 bit arch!
339 */ 339 */
340struct p_header { 340struct p_header80 {
341 u32 magic; 341 u32 magic;
342 u16 command; 342 u16 command;
343 u16 length; /* bytes of data after this header */ 343 u16 length; /* bytes of data after this header */
344 u8 payload[0]; 344 u8 payload[0];
345} __packed; 345} __packed;
346/* 8 bytes. packet FIXED for the next century! */ 346
347/* Header for big packets, Used for data packets exceeding 64kB */
348struct p_header95 {
349 u16 magic; /* use DRBD_MAGIC_BIG here */
350 u16 command;
351 u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */
352 u8 payload[0];
353} __packed;
354
355union p_header {
356 struct p_header80 h80;
357 struct p_header95 h95;
358};
347 359
348/* 360/*
349 * short commands, packets without payload, plain p_header: 361 * short commands, packets without payload, plain p_header:
@@ -362,12 +374,16 @@ struct p_header {
362 */ 374 */
363 375
364/* these defines must not be changed without changing the protocol version */ 376/* these defines must not be changed without changing the protocol version */
365#define DP_HARDBARRIER 1 377#define DP_HARDBARRIER 1 /* depricated */
366#define DP_RW_SYNC 2 378#define DP_RW_SYNC 2 /* equals REQ_SYNC */
367#define DP_MAY_SET_IN_SYNC 4 379#define DP_MAY_SET_IN_SYNC 4
380#define DP_UNPLUG 8 /* equals REQ_UNPLUG */
381#define DP_FUA 16 /* equals REQ_FUA */
382#define DP_FLUSH 32 /* equals REQ_FLUSH */
383#define DP_DISCARD 64 /* equals REQ_DISCARD */
368 384
369struct p_data { 385struct p_data {
370 struct p_header head; 386 union p_header head;
371 u64 sector; /* 64 bits sector number */ 387 u64 sector; /* 64 bits sector number */
372 u64 block_id; /* to identify the request in protocol B&C */ 388 u64 block_id; /* to identify the request in protocol B&C */
373 u32 seq_num; 389 u32 seq_num;
@@ -383,7 +399,7 @@ struct p_data {
383 * P_DATA_REQUEST, P_RS_DATA_REQUEST 399 * P_DATA_REQUEST, P_RS_DATA_REQUEST
384 */ 400 */
385struct p_block_ack { 401struct p_block_ack {
386 struct p_header head; 402 struct p_header80 head;
387 u64 sector; 403 u64 sector;
388 u64 block_id; 404 u64 block_id;
389 u32 blksize; 405 u32 blksize;
@@ -392,7 +408,7 @@ struct p_block_ack {
392 408
393 409
394struct p_block_req { 410struct p_block_req {
395 struct p_header head; 411 struct p_header80 head;
396 u64 sector; 412 u64 sector;
397 u64 block_id; 413 u64 block_id;
398 u32 blksize; 414 u32 blksize;
@@ -409,7 +425,7 @@ struct p_block_req {
409 */ 425 */
410 426
411struct p_handshake { 427struct p_handshake {
412 struct p_header head; /* 8 bytes */ 428 struct p_header80 head; /* 8 bytes */
413 u32 protocol_min; 429 u32 protocol_min;
414 u32 feature_flags; 430 u32 feature_flags;
415 u32 protocol_max; 431 u32 protocol_max;
@@ -424,19 +440,19 @@ struct p_handshake {
424/* 80 bytes, FIXED for the next century */ 440/* 80 bytes, FIXED for the next century */
425 441
426struct p_barrier { 442struct p_barrier {
427 struct p_header head; 443 struct p_header80 head;
428 u32 barrier; /* barrier number _handle_ only */ 444 u32 barrier; /* barrier number _handle_ only */
429 u32 pad; /* to multiple of 8 Byte */ 445 u32 pad; /* to multiple of 8 Byte */
430} __packed; 446} __packed;
431 447
432struct p_barrier_ack { 448struct p_barrier_ack {
433 struct p_header head; 449 struct p_header80 head;
434 u32 barrier; 450 u32 barrier;
435 u32 set_size; 451 u32 set_size;
436} __packed; 452} __packed;
437 453
438struct p_rs_param { 454struct p_rs_param {
439 struct p_header head; 455 struct p_header80 head;
440 u32 rate; 456 u32 rate;
441 457
442 /* Since protocol version 88 and higher. */ 458 /* Since protocol version 88 and higher. */
@@ -444,20 +460,31 @@ struct p_rs_param {
444} __packed; 460} __packed;
445 461
446struct p_rs_param_89 { 462struct p_rs_param_89 {
447 struct p_header head; 463 struct p_header80 head;
448 u32 rate; 464 u32 rate;
449 /* protocol version 89: */ 465 /* protocol version 89: */
450 char verify_alg[SHARED_SECRET_MAX]; 466 char verify_alg[SHARED_SECRET_MAX];
451 char csums_alg[SHARED_SECRET_MAX]; 467 char csums_alg[SHARED_SECRET_MAX];
452} __packed; 468} __packed;
453 469
470struct p_rs_param_95 {
471 struct p_header80 head;
472 u32 rate;
473 char verify_alg[SHARED_SECRET_MAX];
474 char csums_alg[SHARED_SECRET_MAX];
475 u32 c_plan_ahead;
476 u32 c_delay_target;
477 u32 c_fill_target;
478 u32 c_max_rate;
479} __packed;
480
454enum drbd_conn_flags { 481enum drbd_conn_flags {
455 CF_WANT_LOSE = 1, 482 CF_WANT_LOSE = 1,
456 CF_DRY_RUN = 2, 483 CF_DRY_RUN = 2,
457}; 484};
458 485
459struct p_protocol { 486struct p_protocol {
460 struct p_header head; 487 struct p_header80 head;
461 u32 protocol; 488 u32 protocol;
462 u32 after_sb_0p; 489 u32 after_sb_0p;
463 u32 after_sb_1p; 490 u32 after_sb_1p;
@@ -471,17 +498,17 @@ struct p_protocol {
471} __packed; 498} __packed;
472 499
473struct p_uuids { 500struct p_uuids {
474 struct p_header head; 501 struct p_header80 head;
475 u64 uuid[UI_EXTENDED_SIZE]; 502 u64 uuid[UI_EXTENDED_SIZE];
476} __packed; 503} __packed;
477 504
478struct p_rs_uuid { 505struct p_rs_uuid {
479 struct p_header head; 506 struct p_header80 head;
480 u64 uuid; 507 u64 uuid;
481} __packed; 508} __packed;
482 509
483struct p_sizes { 510struct p_sizes {
484 struct p_header head; 511 struct p_header80 head;
485 u64 d_size; /* size of disk */ 512 u64 d_size; /* size of disk */
486 u64 u_size; /* user requested size */ 513 u64 u_size; /* user requested size */
487 u64 c_size; /* current exported size */ 514 u64 c_size; /* current exported size */
@@ -491,18 +518,18 @@ struct p_sizes {
491} __packed; 518} __packed;
492 519
493struct p_state { 520struct p_state {
494 struct p_header head; 521 struct p_header80 head;
495 u32 state; 522 u32 state;
496} __packed; 523} __packed;
497 524
498struct p_req_state { 525struct p_req_state {
499 struct p_header head; 526 struct p_header80 head;
500 u32 mask; 527 u32 mask;
501 u32 val; 528 u32 val;
502} __packed; 529} __packed;
503 530
504struct p_req_state_reply { 531struct p_req_state_reply {
505 struct p_header head; 532 struct p_header80 head;
506 u32 retcode; 533 u32 retcode;
507} __packed; 534} __packed;
508 535
@@ -517,7 +544,7 @@ struct p_drbd06_param {
517} __packed; 544} __packed;
518 545
519struct p_discard { 546struct p_discard {
520 struct p_header head; 547 struct p_header80 head;
521 u64 block_id; 548 u64 block_id;
522 u32 seq_num; 549 u32 seq_num;
523 u32 pad; 550 u32 pad;
@@ -533,7 +560,7 @@ enum drbd_bitmap_code {
533}; 560};
534 561
535struct p_compressed_bm { 562struct p_compressed_bm {
536 struct p_header head; 563 struct p_header80 head;
537 /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code 564 /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
538 * (encoding & 0x80): polarity (set/unset) of first runlength 565 * (encoding & 0x80): polarity (set/unset) of first runlength
539 * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits 566 * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
@@ -544,10 +571,10 @@ struct p_compressed_bm {
544 u8 code[0]; 571 u8 code[0];
545} __packed; 572} __packed;
546 573
547struct p_delay_probe { 574struct p_delay_probe93 {
548 struct p_header head; 575 struct p_header80 head;
549 u32 seq_num; /* sequence number to match the two probe packets */ 576 u32 seq_num; /* sequence number to match the two probe packets */
550 u32 offset; /* usecs the probe got sent after the reference time point */ 577 u32 offset; /* usecs the probe got sent after the reference time point */
551} __packed; 578} __packed;
552 579
553/* DCBP: Drbd Compressed Bitmap Packet ... */ 580/* DCBP: Drbd Compressed Bitmap Packet ... */
@@ -594,7 +621,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
594 * so we need to use the fixed size 4KiB page size 621 * so we need to use the fixed size 4KiB page size
595 * most architechtures have used for a long time. 622 * most architechtures have used for a long time.
596 */ 623 */
597#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header)) 624#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80))
598#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long)) 625#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
599#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm)) 626#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
600#if (PAGE_SIZE < 4096) 627#if (PAGE_SIZE < 4096)
@@ -603,13 +630,14 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
603#endif 630#endif
604 631
605union p_polymorph { 632union p_polymorph {
606 struct p_header header; 633 union p_header header;
607 struct p_handshake handshake; 634 struct p_handshake handshake;
608 struct p_data data; 635 struct p_data data;
609 struct p_block_ack block_ack; 636 struct p_block_ack block_ack;
610 struct p_barrier barrier; 637 struct p_barrier barrier;
611 struct p_barrier_ack barrier_ack; 638 struct p_barrier_ack barrier_ack;
612 struct p_rs_param_89 rs_param_89; 639 struct p_rs_param_89 rs_param_89;
640 struct p_rs_param_95 rs_param_95;
613 struct p_protocol protocol; 641 struct p_protocol protocol;
614 struct p_sizes sizes; 642 struct p_sizes sizes;
615 struct p_uuids uuids; 643 struct p_uuids uuids;
@@ -617,6 +645,8 @@ union p_polymorph {
617 struct p_req_state req_state; 645 struct p_req_state req_state;
618 struct p_req_state_reply req_state_reply; 646 struct p_req_state_reply req_state_reply;
619 struct p_block_req block_req; 647 struct p_block_req block_req;
648 struct p_delay_probe93 delay_probe93;
649 struct p_rs_uuid rs_uuid;
620} __packed; 650} __packed;
621 651
622/**********************************************************************/ 652/**********************************************************************/
@@ -697,7 +727,7 @@ struct drbd_tl_epoch {
697 struct list_head requests; /* requests before */ 727 struct list_head requests; /* requests before */
698 struct drbd_tl_epoch *next; /* pointer to the next barrier */ 728 struct drbd_tl_epoch *next; /* pointer to the next barrier */
699 unsigned int br_number; /* the barriers identifier. */ 729 unsigned int br_number; /* the barriers identifier. */
700 int n_req; /* number of requests attached before this barrier */ 730 int n_writes; /* number of requests attached before this barrier */
701}; 731};
702 732
703struct drbd_request; 733struct drbd_request;
@@ -747,7 +777,7 @@ struct digest_info {
747struct drbd_epoch_entry { 777struct drbd_epoch_entry {
748 struct drbd_work w; 778 struct drbd_work w;
749 struct hlist_node colision; 779 struct hlist_node colision;
750 struct drbd_epoch *epoch; 780 struct drbd_epoch *epoch; /* for writes */
751 struct drbd_conf *mdev; 781 struct drbd_conf *mdev;
752 struct page *pages; 782 struct page *pages;
753 atomic_t pending_bios; 783 atomic_t pending_bios;
@@ -755,7 +785,10 @@ struct drbd_epoch_entry {
755 /* see comments on ee flag bits below */ 785 /* see comments on ee flag bits below */
756 unsigned long flags; 786 unsigned long flags;
757 sector_t sector; 787 sector_t sector;
758 u64 block_id; 788 union {
789 u64 block_id;
790 struct digest_info *digest;
791 };
759}; 792};
760 793
761/* ee flag bits. 794/* ee flag bits.
@@ -781,12 +814,16 @@ enum {
781 * if any of those fail, we set this flag atomically 814 * if any of those fail, we set this flag atomically
782 * from the endio callback */ 815 * from the endio callback */
783 __EE_WAS_ERROR, 816 __EE_WAS_ERROR,
817
818 /* This ee has a pointer to a digest instead of a block id */
819 __EE_HAS_DIGEST,
784}; 820};
785#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) 821#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
786#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) 822#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
787#define EE_IS_BARRIER (1<<__EE_IS_BARRIER) 823#define EE_IS_BARRIER (1<<__EE_IS_BARRIER)
788#define EE_RESUBMITTED (1<<__EE_RESUBMITTED) 824#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
789#define EE_WAS_ERROR (1<<__EE_WAS_ERROR) 825#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
826#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
790 827
791/* global flag bits */ 828/* global flag bits */
792enum { 829enum {
@@ -794,7 +831,6 @@ enum {
794 SIGNAL_ASENDER, /* whether asender wants to be interrupted */ 831 SIGNAL_ASENDER, /* whether asender wants to be interrupted */
795 SEND_PING, /* whether asender should send a ping asap */ 832 SEND_PING, /* whether asender should send a ping asap */
796 833
797 STOP_SYNC_TIMER, /* tell timer to cancel itself */
798 UNPLUG_QUEUED, /* only relevant with kernel 2.4 */ 834 UNPLUG_QUEUED, /* only relevant with kernel 2.4 */
799 UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */ 835 UNPLUG_REMOTE, /* sending a "UnplugRemote" could help */
800 MD_DIRTY, /* current uuids and flags not yet on disk */ 836 MD_DIRTY, /* current uuids and flags not yet on disk */
@@ -816,6 +852,7 @@ enum {
816 BITMAP_IO, /* suspend application io; 852 BITMAP_IO, /* suspend application io;
817 once no more io in flight, start bitmap io */ 853 once no more io in flight, start bitmap io */
818 BITMAP_IO_QUEUED, /* Started bitmap IO */ 854 BITMAP_IO_QUEUED, /* Started bitmap IO */
855 GO_DISKLESS, /* Disk failed, local_cnt reached zero, we are going diskless */
819 RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ 856 RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
820 NET_CONGESTED, /* The data socket is congested */ 857 NET_CONGESTED, /* The data socket is congested */
821 858
@@ -829,6 +866,8 @@ enum {
829 * the peer, if it changed there as well. */ 866 * the peer, if it changed there as well. */
830 CONN_DRY_RUN, /* Expect disconnect after resync handshake. */ 867 CONN_DRY_RUN, /* Expect disconnect after resync handshake. */
831 GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */ 868 GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */
869 NEW_CUR_UUID, /* Create new current UUID when thawing IO */
870 AL_SUSPENDED, /* Activity logging is currently suspended. */
832}; 871};
833 872
834struct drbd_bitmap; /* opaque for drbd_conf */ 873struct drbd_bitmap; /* opaque for drbd_conf */
@@ -838,10 +877,6 @@ struct drbd_bitmap; /* opaque for drbd_conf */
838 877
839/* THINK maybe we actually want to use the default "event/%s" worker threads 878/* THINK maybe we actually want to use the default "event/%s" worker threads
840 * or similar in linux 2.6, which uses per cpu data and threads. 879 * or similar in linux 2.6, which uses per cpu data and threads.
841 *
842 * To be general, this might need a spin_lock member.
843 * For now, please use the mdev->req_lock to protect list_head,
844 * see drbd_queue_work below.
845 */ 880 */
846struct drbd_work_queue { 881struct drbd_work_queue {
847 struct list_head q; 882 struct list_head q;
@@ -915,6 +950,12 @@ enum write_ordering_e {
915 WO_bio_barrier 950 WO_bio_barrier
916}; 951};
917 952
953struct fifo_buffer {
954 int *values;
955 unsigned int head_index;
956 unsigned int size;
957};
958
918struct drbd_conf { 959struct drbd_conf {
919 /* things that are stored as / read from meta data on disk */ 960 /* things that are stored as / read from meta data on disk */
920 unsigned long flags; 961 unsigned long flags;
@@ -936,9 +977,16 @@ struct drbd_conf {
936 unsigned int ko_count; 977 unsigned int ko_count;
937 struct drbd_work resync_work, 978 struct drbd_work resync_work,
938 unplug_work, 979 unplug_work,
980 go_diskless,
939 md_sync_work; 981 md_sync_work;
940 struct timer_list resync_timer; 982 struct timer_list resync_timer;
941 struct timer_list md_sync_timer; 983 struct timer_list md_sync_timer;
984#ifdef DRBD_DEBUG_MD_SYNC
985 struct {
986 unsigned int line;
987 const char* func;
988 } last_md_mark_dirty;
989#endif
942 990
943 /* Used after attach while negotiating new disk state. */ 991 /* Used after attach while negotiating new disk state. */
944 union drbd_state new_state_tmp; 992 union drbd_state new_state_tmp;
@@ -946,6 +994,7 @@ struct drbd_conf {
946 union drbd_state state; 994 union drbd_state state;
947 wait_queue_head_t misc_wait; 995 wait_queue_head_t misc_wait;
948 wait_queue_head_t state_wait; /* upon each state change. */ 996 wait_queue_head_t state_wait; /* upon each state change. */
997 wait_queue_head_t net_cnt_wait;
949 unsigned int send_cnt; 998 unsigned int send_cnt;
950 unsigned int recv_cnt; 999 unsigned int recv_cnt;
951 unsigned int read_cnt; 1000 unsigned int read_cnt;
@@ -974,12 +1023,16 @@ struct drbd_conf {
974 unsigned long rs_start; 1023 unsigned long rs_start;
975 /* cumulated time in PausedSyncX state [unit jiffies] */ 1024 /* cumulated time in PausedSyncX state [unit jiffies] */
976 unsigned long rs_paused; 1025 unsigned long rs_paused;
1026 /* skipped because csum was equal [unit BM_BLOCK_SIZE] */
1027 unsigned long rs_same_csum;
1028#define DRBD_SYNC_MARKS 8
1029#define DRBD_SYNC_MARK_STEP (3*HZ)
977 /* block not up-to-date at mark [unit BM_BLOCK_SIZE] */ 1030 /* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
978 unsigned long rs_mark_left; 1031 unsigned long rs_mark_left[DRBD_SYNC_MARKS];
979 /* marks's time [unit jiffies] */ 1032 /* marks's time [unit jiffies] */
980 unsigned long rs_mark_time; 1033 unsigned long rs_mark_time[DRBD_SYNC_MARKS];
981 /* skipped because csum was equeal [unit BM_BLOCK_SIZE] */ 1034 /* current index into rs_mark_{left,time} */
982 unsigned long rs_same_csum; 1035 int rs_last_mark;
983 1036
984 /* where does the admin want us to start? (sector) */ 1037 /* where does the admin want us to start? (sector) */
985 sector_t ov_start_sector; 1038 sector_t ov_start_sector;
@@ -1012,10 +1065,10 @@ struct drbd_conf {
1012 spinlock_t epoch_lock; 1065 spinlock_t epoch_lock;
1013 unsigned int epochs; 1066 unsigned int epochs;
1014 enum write_ordering_e write_ordering; 1067 enum write_ordering_e write_ordering;
1015 struct list_head active_ee; /* IO in progress */ 1068 struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */
1016 struct list_head sync_ee; /* IO in progress */ 1069 struct list_head sync_ee; /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
1017 struct list_head done_ee; /* send ack */ 1070 struct list_head done_ee; /* send ack */
1018 struct list_head read_ee; /* IO in progress */ 1071 struct list_head read_ee; /* IO in progress (any read) */
1019 struct list_head net_ee; /* zero-copy network send in progress */ 1072 struct list_head net_ee; /* zero-copy network send in progress */
1020 struct hlist_head *ee_hash; /* is proteced by req_lock! */ 1073 struct hlist_head *ee_hash; /* is proteced by req_lock! */
1021 unsigned int ee_hash_s; 1074 unsigned int ee_hash_s;
@@ -1026,7 +1079,8 @@ struct drbd_conf {
1026 int next_barrier_nr; 1079 int next_barrier_nr;
1027 struct hlist_head *app_reads_hash; /* is proteced by req_lock */ 1080 struct hlist_head *app_reads_hash; /* is proteced by req_lock */
1028 struct list_head resync_reads; 1081 struct list_head resync_reads;
1029 atomic_t pp_in_use; 1082 atomic_t pp_in_use; /* allocated from page pool */
1083 atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */
1030 wait_queue_head_t ee_wait; 1084 wait_queue_head_t ee_wait;
1031 struct page *md_io_page; /* one page buffer for md_io */ 1085 struct page *md_io_page; /* one page buffer for md_io */
1032 struct page *md_io_tmpp; /* for logical_block_size != 512 */ 1086 struct page *md_io_tmpp; /* for logical_block_size != 512 */
@@ -1054,6 +1108,15 @@ struct drbd_conf {
1054 u64 ed_uuid; /* UUID of the exposed data */ 1108 u64 ed_uuid; /* UUID of the exposed data */
1055 struct mutex state_mutex; 1109 struct mutex state_mutex;
1056 char congestion_reason; /* Why we where congested... */ 1110 char congestion_reason; /* Why we where congested... */
1111 atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
1112 atomic_t rs_sect_ev; /* for submitted resync data rate, both */
1113 int rs_last_sect_ev; /* counter to compare with */
1114 int rs_last_events; /* counter of read or write "events" (unit sectors)
1115 * on the lower level device when we last looked. */
1116 int c_sync_rate; /* current resync rate after syncer throttle magic */
1117 struct fifo_buffer rs_plan_s; /* correction values of resync planer */
1118 int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
1119 int rs_planed; /* resync sectors already planed */
1057}; 1120};
1058 1121
1059static inline struct drbd_conf *minor_to_mdev(unsigned int minor) 1122static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1138,6 +1201,8 @@ extern void drbd_free_resources(struct drbd_conf *mdev);
1138extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr, 1201extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
1139 unsigned int set_size); 1202 unsigned int set_size);
1140extern void tl_clear(struct drbd_conf *mdev); 1203extern void tl_clear(struct drbd_conf *mdev);
1204enum drbd_req_event;
1205extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
1141extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *); 1206extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
1142extern void drbd_free_sock(struct drbd_conf *mdev); 1207extern void drbd_free_sock(struct drbd_conf *mdev);
1143extern int drbd_send(struct drbd_conf *mdev, struct socket *sock, 1208extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
@@ -1150,12 +1215,12 @@ extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_f
1150extern int _drbd_send_state(struct drbd_conf *mdev); 1215extern int _drbd_send_state(struct drbd_conf *mdev);
1151extern int drbd_send_state(struct drbd_conf *mdev); 1216extern int drbd_send_state(struct drbd_conf *mdev);
1152extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, 1217extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
1153 enum drbd_packets cmd, struct p_header *h, 1218 enum drbd_packets cmd, struct p_header80 *h,
1154 size_t size, unsigned msg_flags); 1219 size_t size, unsigned msg_flags);
1155#define USE_DATA_SOCKET 1 1220#define USE_DATA_SOCKET 1
1156#define USE_META_SOCKET 0 1221#define USE_META_SOCKET 0
1157extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, 1222extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
1158 enum drbd_packets cmd, struct p_header *h, 1223 enum drbd_packets cmd, struct p_header80 *h,
1159 size_t size); 1224 size_t size);
1160extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, 1225extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd,
1161 char *data, size_t size); 1226 char *data, size_t size);
@@ -1167,7 +1232,7 @@ extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
1167extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd, 1232extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
1168 struct p_block_req *rp); 1233 struct p_block_req *rp);
1169extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, 1234extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
1170 struct p_data *dp); 1235 struct p_data *dp, int data_size);
1171extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd, 1236extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
1172 sector_t sector, int blksize, u64 block_id); 1237 sector_t sector, int blksize, u64 block_id);
1173extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, 1238extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
@@ -1201,7 +1266,13 @@ extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
1201extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local); 1266extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local);
1202extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local); 1267extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local);
1203extern int drbd_md_test_flag(struct drbd_backing_dev *, int); 1268extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
1269#ifndef DRBD_DEBUG_MD_SYNC
1204extern void drbd_md_mark_dirty(struct drbd_conf *mdev); 1270extern void drbd_md_mark_dirty(struct drbd_conf *mdev);
1271#else
1272#define drbd_md_mark_dirty(m) drbd_md_mark_dirty_(m, __LINE__ , __func__ )
1273extern void drbd_md_mark_dirty_(struct drbd_conf *mdev,
1274 unsigned int line, const char *func);
1275#endif
1205extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, 1276extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
1206 int (*io_fn)(struct drbd_conf *), 1277 int (*io_fn)(struct drbd_conf *),
1207 void (*done)(struct drbd_conf *, int), 1278 void (*done)(struct drbd_conf *, int),
@@ -1209,6 +1280,7 @@ extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
1209extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); 1280extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
1210extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); 1281extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
1211extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); 1282extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
1283extern void drbd_go_diskless(struct drbd_conf *mdev);
1212 1284
1213 1285
1214/* Meta data layout 1286/* Meta data layout
@@ -1264,6 +1336,8 @@ struct bm_extent {
1264 * Bit 1 ==> local node thinks this block needs to be synced. 1336 * Bit 1 ==> local node thinks this block needs to be synced.
1265 */ 1337 */
1266 1338
1339#define SLEEP_TIME (HZ/10)
1340
1267#define BM_BLOCK_SHIFT 12 /* 4k per bit */ 1341#define BM_BLOCK_SHIFT 12 /* 4k per bit */
1268#define BM_BLOCK_SIZE (1<<BM_BLOCK_SHIFT) 1342#define BM_BLOCK_SIZE (1<<BM_BLOCK_SHIFT)
1269/* (9+3) : 512 bytes @ 8 bits; representing 16M storage 1343/* (9+3) : 512 bytes @ 8 bits; representing 16M storage
@@ -1335,11 +1409,13 @@ struct bm_extent {
1335#endif 1409#endif
1336 1410
1337/* Sector shift value for the "hash" functions of tl_hash and ee_hash tables. 1411/* Sector shift value for the "hash" functions of tl_hash and ee_hash tables.
1338 * With a value of 6 all IO in one 32K block make it to the same slot of the 1412 * With a value of 8 all IO in one 128K block make it to the same slot of the
1339 * hash table. */ 1413 * hash table. */
1340#define HT_SHIFT 6 1414#define HT_SHIFT 8
1341#define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT)) 1415#define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT))
1342 1416
1417#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
1418
1343/* Number of elements in the app_reads_hash */ 1419/* Number of elements in the app_reads_hash */
1344#define APP_R_HSIZE 15 1420#define APP_R_HSIZE 15
1345 1421
@@ -1369,6 +1445,7 @@ extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_
1369/* bm_find_next variants for use while you hold drbd_bm_lock() */ 1445/* bm_find_next variants for use while you hold drbd_bm_lock() */
1370extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); 1446extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
1371extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo); 1447extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo);
1448extern unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev);
1372extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev); 1449extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev);
1373extern int drbd_bm_rs_done(struct drbd_conf *mdev); 1450extern int drbd_bm_rs_done(struct drbd_conf *mdev);
1374/* for receive_bitmap */ 1451/* for receive_bitmap */
@@ -1421,7 +1498,8 @@ extern void resync_after_online_grow(struct drbd_conf *);
1421extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local); 1498extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
1422extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, 1499extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
1423 int force); 1500 int force);
1424enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev); 1501extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
1502extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev);
1425extern int drbd_khelper(struct drbd_conf *mdev, char *cmd); 1503extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
1426 1504
1427/* drbd_worker.c */ 1505/* drbd_worker.c */
@@ -1467,10 +1545,12 @@ extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int);
1467extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int); 1545extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
1468extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int); 1546extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
1469extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int); 1547extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
1548extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
1470 1549
1471extern void resync_timer_fn(unsigned long data); 1550extern void resync_timer_fn(unsigned long data);
1472 1551
1473/* drbd_receiver.c */ 1552/* drbd_receiver.c */
1553extern int drbd_rs_should_slow_down(struct drbd_conf *mdev);
1474extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, 1554extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
1475 const unsigned rw, const int fault_type); 1555 const unsigned rw, const int fault_type);
1476extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list); 1556extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
@@ -1479,7 +1559,10 @@ extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
1479 sector_t sector, 1559 sector_t sector,
1480 unsigned int data_size, 1560 unsigned int data_size,
1481 gfp_t gfp_mask) __must_hold(local); 1561 gfp_t gfp_mask) __must_hold(local);
1482extern void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e); 1562extern void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
1563 int is_net);
1564#define drbd_free_ee(m,e) drbd_free_some_ee(m, e, 0)
1565#define drbd_free_net_ee(m,e) drbd_free_some_ee(m, e, 1)
1483extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev, 1566extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
1484 struct list_head *head); 1567 struct list_head *head);
1485extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, 1568extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
@@ -1487,6 +1570,7 @@ extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
1487extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled); 1570extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled);
1488extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed); 1571extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed);
1489extern void drbd_flush_workqueue(struct drbd_conf *mdev); 1572extern void drbd_flush_workqueue(struct drbd_conf *mdev);
1573extern void drbd_free_tl_hash(struct drbd_conf *mdev);
1490 1574
1491/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to 1575/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to
1492 * mess with get_fs/set_fs, we know we are KERNEL_DS always. */ 1576 * mess with get_fs/set_fs, we know we are KERNEL_DS always. */
@@ -1600,6 +1684,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
1600#define susp_MASK 1 1684#define susp_MASK 1
1601#define user_isp_MASK 1 1685#define user_isp_MASK 1
1602#define aftr_isp_MASK 1 1686#define aftr_isp_MASK 1
1687#define susp_nod_MASK 1
1688#define susp_fen_MASK 1
1603 1689
1604#define NS(T, S) \ 1690#define NS(T, S) \
1605 ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \ 1691 ({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
@@ -1856,13 +1942,6 @@ static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
1856} 1942}
1857 1943
1858static inline void 1944static inline void
1859_drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
1860{
1861 list_add_tail(&w->list, &q->q);
1862 up(&q->s);
1863}
1864
1865static inline void
1866drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w) 1945drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
1867{ 1946{
1868 unsigned long flags; 1947 unsigned long flags;
@@ -1899,19 +1978,19 @@ static inline void request_ping(struct drbd_conf *mdev)
1899static inline int drbd_send_short_cmd(struct drbd_conf *mdev, 1978static inline int drbd_send_short_cmd(struct drbd_conf *mdev,
1900 enum drbd_packets cmd) 1979 enum drbd_packets cmd)
1901{ 1980{
1902 struct p_header h; 1981 struct p_header80 h;
1903 return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h)); 1982 return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h));
1904} 1983}
1905 1984
1906static inline int drbd_send_ping(struct drbd_conf *mdev) 1985static inline int drbd_send_ping(struct drbd_conf *mdev)
1907{ 1986{
1908 struct p_header h; 1987 struct p_header80 h;
1909 return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h)); 1988 return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h));
1910} 1989}
1911 1990
1912static inline int drbd_send_ping_ack(struct drbd_conf *mdev) 1991static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
1913{ 1992{
1914 struct p_header h; 1993 struct p_header80 h;
1915 return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h)); 1994 return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h));
1916} 1995}
1917 1996
@@ -2013,7 +2092,7 @@ static inline void inc_unacked(struct drbd_conf *mdev)
2013static inline void put_net_conf(struct drbd_conf *mdev) 2092static inline void put_net_conf(struct drbd_conf *mdev)
2014{ 2093{
2015 if (atomic_dec_and_test(&mdev->net_cnt)) 2094 if (atomic_dec_and_test(&mdev->net_cnt))
2016 wake_up(&mdev->misc_wait); 2095 wake_up(&mdev->net_cnt_wait);
2017} 2096}
2018 2097
2019/** 2098/**
@@ -2044,10 +2123,14 @@ static inline int get_net_conf(struct drbd_conf *mdev)
2044 2123
2045static inline void put_ldev(struct drbd_conf *mdev) 2124static inline void put_ldev(struct drbd_conf *mdev)
2046{ 2125{
2126 int i = atomic_dec_return(&mdev->local_cnt);
2047 __release(local); 2127 __release(local);
2048 if (atomic_dec_and_test(&mdev->local_cnt)) 2128 D_ASSERT(i >= 0);
2129 if (i == 0) {
2130 if (mdev->state.disk == D_FAILED)
2131 drbd_go_diskless(mdev);
2049 wake_up(&mdev->misc_wait); 2132 wake_up(&mdev->misc_wait);
2050 D_ASSERT(atomic_read(&mdev->local_cnt) >= 0); 2133 }
2051} 2134}
2052 2135
2053#ifndef __CHECKER__ 2136#ifndef __CHECKER__
@@ -2179,11 +2262,16 @@ static inline int drbd_state_is_stable(union drbd_state s)
2179 return 1; 2262 return 1;
2180} 2263}
2181 2264
2265static inline int is_susp(union drbd_state s)
2266{
2267 return s.susp || s.susp_nod || s.susp_fen;
2268}
2269
2182static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) 2270static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
2183{ 2271{
2184 int mxb = drbd_get_max_buffers(mdev); 2272 int mxb = drbd_get_max_buffers(mdev);
2185 2273
2186 if (mdev->state.susp) 2274 if (is_susp(mdev->state))
2187 return 0; 2275 return 0;
2188 if (test_bit(SUSPEND_IO, &mdev->flags)) 2276 if (test_bit(SUSPEND_IO, &mdev->flags))
2189 return 0; 2277 return 0;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index e4b56119866e..c5dfe6486cf3 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -78,6 +78,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
78static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); 78static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
79static void md_sync_timer_fn(unsigned long data); 79static void md_sync_timer_fn(unsigned long data);
80static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); 80static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
81static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
81 82
82MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " 83MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
83 "Lars Ellenberg <lars@linbit.com>"); 84 "Lars Ellenberg <lars@linbit.com>");
@@ -200,7 +201,7 @@ static int tl_init(struct drbd_conf *mdev)
200 INIT_LIST_HEAD(&b->w.list); 201 INIT_LIST_HEAD(&b->w.list);
201 b->next = NULL; 202 b->next = NULL;
202 b->br_number = 4711; 203 b->br_number = 4711;
203 b->n_req = 0; 204 b->n_writes = 0;
204 b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ 205 b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
205 206
206 mdev->oldest_tle = b; 207 mdev->oldest_tle = b;
@@ -241,7 +242,7 @@ void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
241 INIT_LIST_HEAD(&new->w.list); 242 INIT_LIST_HEAD(&new->w.list);
242 new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */ 243 new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
243 new->next = NULL; 244 new->next = NULL;
244 new->n_req = 0; 245 new->n_writes = 0;
245 246
246 newest_before = mdev->newest_tle; 247 newest_before = mdev->newest_tle;
247 /* never send a barrier number == 0, because that is special-cased 248 /* never send a barrier number == 0, because that is special-cased
@@ -285,9 +286,9 @@ void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
285 barrier_nr, b->br_number); 286 barrier_nr, b->br_number);
286 goto bail; 287 goto bail;
287 } 288 }
288 if (b->n_req != set_size) { 289 if (b->n_writes != set_size) {
289 dev_err(DEV, "BAD! BarrierAck #%u received with n_req=%u, expected n_req=%u!\n", 290 dev_err(DEV, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
290 barrier_nr, set_size, b->n_req); 291 barrier_nr, set_size, b->n_writes);
291 goto bail; 292 goto bail;
292 } 293 }
293 294
@@ -334,6 +335,82 @@ bail:
334 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); 335 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
335} 336}
336 337
338/**
339 * _tl_restart() - Walks the transfer log, and applies an action to all requests
340 * @mdev: DRBD device.
341 * @what: The action/event to perform with all request objects
342 *
343 * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io,
344 * restart_frozen_disk_io.
345 */
346static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
347{
348 struct drbd_tl_epoch *b, *tmp, **pn;
349 struct list_head *le, *tle, carry_reads;
350 struct drbd_request *req;
351 int rv, n_writes, n_reads;
352
353 b = mdev->oldest_tle;
354 pn = &mdev->oldest_tle;
355 while (b) {
356 n_writes = 0;
357 n_reads = 0;
358 INIT_LIST_HEAD(&carry_reads);
359 list_for_each_safe(le, tle, &b->requests) {
360 req = list_entry(le, struct drbd_request, tl_requests);
361 rv = _req_mod(req, what);
362
363 n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
364 n_reads += (rv & MR_READ) >> MR_READ_SHIFT;
365 }
366 tmp = b->next;
367
368 if (n_writes) {
369 if (what == resend) {
370 b->n_writes = n_writes;
371 if (b->w.cb == NULL) {
372 b->w.cb = w_send_barrier;
373 inc_ap_pending(mdev);
374 set_bit(CREATE_BARRIER, &mdev->flags);
375 }
376
377 drbd_queue_work(&mdev->data.work, &b->w);
378 }
379 pn = &b->next;
380 } else {
381 if (n_reads)
382 list_add(&carry_reads, &b->requests);
383 /* there could still be requests on that ring list,
384 * in case local io is still pending */
385 list_del(&b->requests);
386
387 /* dec_ap_pending corresponding to queue_barrier.
388 * the newest barrier may not have been queued yet,
389 * in which case w.cb is still NULL. */
390 if (b->w.cb != NULL)
391 dec_ap_pending(mdev);
392
393 if (b == mdev->newest_tle) {
394 /* recycle, but reinit! */
395 D_ASSERT(tmp == NULL);
396 INIT_LIST_HEAD(&b->requests);
397 list_splice(&carry_reads, &b->requests);
398 INIT_LIST_HEAD(&b->w.list);
399 b->w.cb = NULL;
400 b->br_number = net_random();
401 b->n_writes = 0;
402
403 *pn = b;
404 break;
405 }
406 *pn = tmp;
407 kfree(b);
408 }
409 b = tmp;
410 list_splice(&carry_reads, &b->requests);
411 }
412}
413
337 414
338/** 415/**
339 * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL 416 * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
@@ -345,48 +422,12 @@ bail:
345 */ 422 */
346void tl_clear(struct drbd_conf *mdev) 423void tl_clear(struct drbd_conf *mdev)
347{ 424{
348 struct drbd_tl_epoch *b, *tmp;
349 struct list_head *le, *tle; 425 struct list_head *le, *tle;
350 struct drbd_request *r; 426 struct drbd_request *r;
351 int new_initial_bnr = net_random();
352 427
353 spin_lock_irq(&mdev->req_lock); 428 spin_lock_irq(&mdev->req_lock);
354 429
355 b = mdev->oldest_tle; 430 _tl_restart(mdev, connection_lost_while_pending);
356 while (b) {
357 list_for_each_safe(le, tle, &b->requests) {
358 r = list_entry(le, struct drbd_request, tl_requests);
359 /* It would be nice to complete outside of spinlock.
360 * But this is easier for now. */
361 _req_mod(r, connection_lost_while_pending);
362 }
363 tmp = b->next;
364
365 /* there could still be requests on that ring list,
366 * in case local io is still pending */
367 list_del(&b->requests);
368
369 /* dec_ap_pending corresponding to queue_barrier.
370 * the newest barrier may not have been queued yet,
371 * in which case w.cb is still NULL. */
372 if (b->w.cb != NULL)
373 dec_ap_pending(mdev);
374
375 if (b == mdev->newest_tle) {
376 /* recycle, but reinit! */
377 D_ASSERT(tmp == NULL);
378 INIT_LIST_HEAD(&b->requests);
379 INIT_LIST_HEAD(&b->w.list);
380 b->w.cb = NULL;
381 b->br_number = new_initial_bnr;
382 b->n_req = 0;
383
384 mdev->oldest_tle = b;
385 break;
386 }
387 kfree(b);
388 b = tmp;
389 }
390 431
391 /* we expect this list to be empty. */ 432 /* we expect this list to be empty. */
392 D_ASSERT(list_empty(&mdev->out_of_sequence_requests)); 433 D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
@@ -402,6 +443,15 @@ void tl_clear(struct drbd_conf *mdev)
402 /* ensure bit indicating barrier is required is clear */ 443 /* ensure bit indicating barrier is required is clear */
403 clear_bit(CREATE_BARRIER, &mdev->flags); 444 clear_bit(CREATE_BARRIER, &mdev->flags);
404 445
446 memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
447
448 spin_unlock_irq(&mdev->req_lock);
449}
450
451void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
452{
453 spin_lock_irq(&mdev->req_lock);
454 _tl_restart(mdev, what);
405 spin_unlock_irq(&mdev->req_lock); 455 spin_unlock_irq(&mdev->req_lock);
406} 456}
407 457
@@ -456,7 +506,7 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns);
456static int is_valid_state_transition(struct drbd_conf *, 506static int is_valid_state_transition(struct drbd_conf *,
457 union drbd_state, union drbd_state); 507 union drbd_state, union drbd_state);
458static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, 508static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
459 union drbd_state ns, int *warn_sync_abort); 509 union drbd_state ns, const char **warn_sync_abort);
460int drbd_send_state_req(struct drbd_conf *, 510int drbd_send_state_req(struct drbd_conf *,
461 union drbd_state, union drbd_state); 511 union drbd_state, union drbd_state);
462 512
@@ -606,7 +656,7 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
606 drbd_role_str(ns.peer), 656 drbd_role_str(ns.peer),
607 drbd_disk_str(ns.disk), 657 drbd_disk_str(ns.disk),
608 drbd_disk_str(ns.pdsk), 658 drbd_disk_str(ns.pdsk),
609 ns.susp ? 's' : 'r', 659 is_susp(ns) ? 's' : 'r',
610 ns.aftr_isp ? 'a' : '-', 660 ns.aftr_isp ? 'a' : '-',
611 ns.peer_isp ? 'p' : '-', 661 ns.peer_isp ? 'p' : '-',
612 ns.user_isp ? 'u' : '-' 662 ns.user_isp ? 'u' : '-'
@@ -764,7 +814,7 @@ static int is_valid_state_transition(struct drbd_conf *mdev,
764 * to D_UNKNOWN. This rule and many more along those lines are in this function. 814 * to D_UNKNOWN. This rule and many more along those lines are in this function.
765 */ 815 */
766static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os, 816static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
767 union drbd_state ns, int *warn_sync_abort) 817 union drbd_state ns, const char **warn_sync_abort)
768{ 818{
769 enum drbd_fencing_p fp; 819 enum drbd_fencing_p fp;
770 820
@@ -779,9 +829,10 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
779 os.conn <= C_DISCONNECTING) 829 os.conn <= C_DISCONNECTING)
780 ns.conn = os.conn; 830 ns.conn = os.conn;
781 831
782 /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow */ 832 /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.
833 * If you try to go into some Sync* state, that shall fail (elsewhere). */
783 if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN && 834 if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
784 ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING) 835 ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_TEAR_DOWN)
785 ns.conn = os.conn; 836 ns.conn = os.conn;
786 837
787 /* After C_DISCONNECTING only C_STANDALONE may follow */ 838 /* After C_DISCONNECTING only C_STANDALONE may follow */
@@ -799,14 +850,13 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
799 if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY) 850 if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
800 ns.aftr_isp = 0; 851 ns.aftr_isp = 0;
801 852
802 if (ns.conn <= C_DISCONNECTING && ns.disk == D_DISKLESS)
803 ns.pdsk = D_UNKNOWN;
804
805 /* Abort resync if a disk fails/detaches */ 853 /* Abort resync if a disk fails/detaches */
806 if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED && 854 if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
807 (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) { 855 (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
808 if (warn_sync_abort) 856 if (warn_sync_abort)
809 *warn_sync_abort = 1; 857 *warn_sync_abort =
858 os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
859 "Online-verify" : "Resync";
810 ns.conn = C_CONNECTED; 860 ns.conn = C_CONNECTED;
811 } 861 }
812 862
@@ -877,7 +927,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
877 if (fp == FP_STONITH && 927 if (fp == FP_STONITH &&
878 (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && 928 (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
879 !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) 929 !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
880 ns.susp = 1; 930 ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
931
932 if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
933 (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
934 !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
935 ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
881 936
882 if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { 937 if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
883 if (ns.conn == C_SYNC_SOURCE) 938 if (ns.conn == C_SYNC_SOURCE)
@@ -913,6 +968,12 @@ static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
913 } 968 }
914} 969}
915 970
971static void drbd_resume_al(struct drbd_conf *mdev)
972{
973 if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
974 dev_info(DEV, "Resumed AL updates\n");
975}
976
916/** 977/**
917 * __drbd_set_state() - Set a new DRBD state 978 * __drbd_set_state() - Set a new DRBD state
918 * @mdev: DRBD device. 979 * @mdev: DRBD device.
@@ -928,7 +989,7 @@ int __drbd_set_state(struct drbd_conf *mdev,
928{ 989{
929 union drbd_state os; 990 union drbd_state os;
930 int rv = SS_SUCCESS; 991 int rv = SS_SUCCESS;
931 int warn_sync_abort = 0; 992 const char *warn_sync_abort = NULL;
932 struct after_state_chg_work *ascw; 993 struct after_state_chg_work *ascw;
933 994
934 os = mdev->state; 995 os = mdev->state;
@@ -947,14 +1008,8 @@ int __drbd_set_state(struct drbd_conf *mdev,
947 /* If the old state was illegal as well, then let 1008 /* If the old state was illegal as well, then let
948 this happen...*/ 1009 this happen...*/
949 1010
950 if (is_valid_state(mdev, os) == rv) { 1011 if (is_valid_state(mdev, os) == rv)
951 dev_err(DEV, "Considering state change from bad state. "
952 "Error would be: '%s'\n",
953 drbd_set_st_err_str(rv));
954 print_st(mdev, "old", os);
955 print_st(mdev, "new", ns);
956 rv = is_valid_state_transition(mdev, ns, os); 1012 rv = is_valid_state_transition(mdev, ns, os);
957 }
958 } else 1013 } else
959 rv = is_valid_state_transition(mdev, ns, os); 1014 rv = is_valid_state_transition(mdev, ns, os);
960 } 1015 }
@@ -966,7 +1021,7 @@ int __drbd_set_state(struct drbd_conf *mdev,
966 } 1021 }
967 1022
968 if (warn_sync_abort) 1023 if (warn_sync_abort)
969 dev_warn(DEV, "Resync aborted.\n"); 1024 dev_warn(DEV, "%s aborted.\n", warn_sync_abort);
970 1025
971 { 1026 {
972 char *pbp, pb[300]; 1027 char *pbp, pb[300];
@@ -977,7 +1032,10 @@ int __drbd_set_state(struct drbd_conf *mdev,
977 PSC(conn); 1032 PSC(conn);
978 PSC(disk); 1033 PSC(disk);
979 PSC(pdsk); 1034 PSC(pdsk);
980 PSC(susp); 1035 if (is_susp(ns) != is_susp(os))
1036 pbp += sprintf(pbp, "susp( %s -> %s ) ",
1037 drbd_susp_str(is_susp(os)),
1038 drbd_susp_str(is_susp(ns)));
981 PSC(aftr_isp); 1039 PSC(aftr_isp);
982 PSC(peer_isp); 1040 PSC(peer_isp);
983 PSC(user_isp); 1041 PSC(user_isp);
@@ -1002,12 +1060,6 @@ int __drbd_set_state(struct drbd_conf *mdev,
1002 wake_up(&mdev->misc_wait); 1060 wake_up(&mdev->misc_wait);
1003 wake_up(&mdev->state_wait); 1061 wake_up(&mdev->state_wait);
1004 1062
1005 /* post-state-change actions */
1006 if (os.conn >= C_SYNC_SOURCE && ns.conn <= C_CONNECTED) {
1007 set_bit(STOP_SYNC_TIMER, &mdev->flags);
1008 mod_timer(&mdev->resync_timer, jiffies);
1009 }
1010
1011 /* aborted verify run. log the last position */ 1063 /* aborted verify run. log the last position */
1012 if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) && 1064 if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
1013 ns.conn < C_CONNECTED) { 1065 ns.conn < C_CONNECTED) {
@@ -1020,41 +1072,42 @@ int __drbd_set_state(struct drbd_conf *mdev,
1020 if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) && 1072 if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
1021 (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) { 1073 (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) {
1022 dev_info(DEV, "Syncer continues.\n"); 1074 dev_info(DEV, "Syncer continues.\n");
1023 mdev->rs_paused += (long)jiffies-(long)mdev->rs_mark_time; 1075 mdev->rs_paused += (long)jiffies
1024 if (ns.conn == C_SYNC_TARGET) { 1076 -(long)mdev->rs_mark_time[mdev->rs_last_mark];
1025 if (!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags)) 1077 if (ns.conn == C_SYNC_TARGET)
1026 mod_timer(&mdev->resync_timer, jiffies); 1078 mod_timer(&mdev->resync_timer, jiffies);
1027 /* This if (!test_bit) is only needed for the case
1028 that a device that has ceased to used its timer,
1029 i.e. it is already in drbd_resync_finished() gets
1030 paused and resumed. */
1031 }
1032 } 1079 }
1033 1080
1034 if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) && 1081 if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) &&
1035 (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) { 1082 (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
1036 dev_info(DEV, "Resync suspended\n"); 1083 dev_info(DEV, "Resync suspended\n");
1037 mdev->rs_mark_time = jiffies; 1084 mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
1038 if (ns.conn == C_PAUSED_SYNC_T)
1039 set_bit(STOP_SYNC_TIMER, &mdev->flags);
1040 } 1085 }
1041 1086
1042 if (os.conn == C_CONNECTED && 1087 if (os.conn == C_CONNECTED &&
1043 (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) { 1088 (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
1089 unsigned long now = jiffies;
1090 int i;
1091
1044 mdev->ov_position = 0; 1092 mdev->ov_position = 0;
1045 mdev->rs_total = 1093 mdev->rs_total = drbd_bm_bits(mdev);
1046 mdev->rs_mark_left = drbd_bm_bits(mdev);
1047 if (mdev->agreed_pro_version >= 90) 1094 if (mdev->agreed_pro_version >= 90)
1048 set_ov_position(mdev, ns.conn); 1095 set_ov_position(mdev, ns.conn);
1049 else 1096 else
1050 mdev->ov_start_sector = 0; 1097 mdev->ov_start_sector = 0;
1051 mdev->ov_left = mdev->rs_total 1098 mdev->ov_left = mdev->rs_total
1052 - BM_SECT_TO_BIT(mdev->ov_position); 1099 - BM_SECT_TO_BIT(mdev->ov_position);
1053 mdev->rs_start = 1100 mdev->rs_start = now;
1054 mdev->rs_mark_time = jiffies; 1101 mdev->rs_last_events = 0;
1102 mdev->rs_last_sect_ev = 0;
1055 mdev->ov_last_oos_size = 0; 1103 mdev->ov_last_oos_size = 0;
1056 mdev->ov_last_oos_start = 0; 1104 mdev->ov_last_oos_start = 0;
1057 1105
1106 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1107 mdev->rs_mark_left[i] = mdev->rs_total;
1108 mdev->rs_mark_time[i] = now;
1109 }
1110
1058 if (ns.conn == C_VERIFY_S) { 1111 if (ns.conn == C_VERIFY_S) {
1059 dev_info(DEV, "Starting Online Verify from sector %llu\n", 1112 dev_info(DEV, "Starting Online Verify from sector %llu\n",
1060 (unsigned long long)mdev->ov_position); 1113 (unsigned long long)mdev->ov_position);
@@ -1107,6 +1160,10 @@ int __drbd_set_state(struct drbd_conf *mdev,
1107 ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) 1160 ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
1108 drbd_thread_restart_nowait(&mdev->receiver); 1161 drbd_thread_restart_nowait(&mdev->receiver);
1109 1162
1163 /* Resume AL writing if we get a connection */
1164 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1165 drbd_resume_al(mdev);
1166
1110 ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC); 1167 ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
1111 if (ascw) { 1168 if (ascw) {
1112 ascw->os = os; 1169 ascw->os = os;
@@ -1165,6 +1222,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1165 union drbd_state ns, enum chg_state_flags flags) 1222 union drbd_state ns, enum chg_state_flags flags)
1166{ 1223{
1167 enum drbd_fencing_p fp; 1224 enum drbd_fencing_p fp;
1225 enum drbd_req_event what = nothing;
1226 union drbd_state nsm = (union drbd_state){ .i = -1 };
1168 1227
1169 if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { 1228 if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
1170 clear_bit(CRASHED_PRIMARY, &mdev->flags); 1229 clear_bit(CRASHED_PRIMARY, &mdev->flags);
@@ -1188,17 +1247,49 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1188 /* Here we have the actions that are performed after a 1247 /* Here we have the actions that are performed after a
1189 state change. This function might sleep */ 1248 state change. This function might sleep */
1190 1249
1191 if (fp == FP_STONITH && ns.susp) { 1250 nsm.i = -1;
1192 /* case1: The outdate peer handler is successful: 1251 if (ns.susp_nod) {
1193 * case2: The connection was established again: */ 1252 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
1194 if ((os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) || 1253 if (ns.conn == C_CONNECTED)
1195 (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)) { 1254 what = resend, nsm.susp_nod = 0;
1255 else /* ns.conn > C_CONNECTED */
1256 dev_err(DEV, "Unexpected Resynd going on!\n");
1257 }
1258
1259 if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
1260 what = restart_frozen_disk_io, nsm.susp_nod = 0;
1261
1262 }
1263
1264 if (ns.susp_fen) {
1265 /* case1: The outdate peer handler is successful: */
1266 if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) {
1196 tl_clear(mdev); 1267 tl_clear(mdev);
1268 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
1269 drbd_uuid_new_current(mdev);
1270 clear_bit(NEW_CUR_UUID, &mdev->flags);
1271 drbd_md_sync(mdev);
1272 }
1197 spin_lock_irq(&mdev->req_lock); 1273 spin_lock_irq(&mdev->req_lock);
1198 _drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL); 1274 _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
1199 spin_unlock_irq(&mdev->req_lock); 1275 spin_unlock_irq(&mdev->req_lock);
1200 } 1276 }
1277 /* case2: The connection was established again: */
1278 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
1279 clear_bit(NEW_CUR_UUID, &mdev->flags);
1280 what = resend;
1281 nsm.susp_fen = 0;
1282 }
1283 }
1284
1285 if (what != nothing) {
1286 spin_lock_irq(&mdev->req_lock);
1287 _tl_restart(mdev, what);
1288 nsm.i &= mdev->state.i;
1289 _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL);
1290 spin_unlock_irq(&mdev->req_lock);
1201 } 1291 }
1292
1202 /* Do not change the order of the if above and the two below... */ 1293 /* Do not change the order of the if above and the two below... */
1203 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */ 1294 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
1204 drbd_send_uuids(mdev); 1295 drbd_send_uuids(mdev);
@@ -1217,16 +1308,22 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1217 if (get_ldev(mdev)) { 1308 if (get_ldev(mdev)) {
1218 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && 1309 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
1219 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { 1310 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
1220 drbd_uuid_new_current(mdev); 1311 if (is_susp(mdev->state)) {
1221 drbd_send_uuids(mdev); 1312 set_bit(NEW_CUR_UUID, &mdev->flags);
1313 } else {
1314 drbd_uuid_new_current(mdev);
1315 drbd_send_uuids(mdev);
1316 }
1222 } 1317 }
1223 put_ldev(mdev); 1318 put_ldev(mdev);
1224 } 1319 }
1225 } 1320 }
1226 1321
1227 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { 1322 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
1228 if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) 1323 if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) {
1229 drbd_uuid_new_current(mdev); 1324 drbd_uuid_new_current(mdev);
1325 drbd_send_uuids(mdev);
1326 }
1230 1327
1231 /* D_DISKLESS Peer becomes secondary */ 1328 /* D_DISKLESS Peer becomes secondary */
1232 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) 1329 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
@@ -1268,42 +1365,51 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1268 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) 1365 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
1269 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); 1366 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
1270 1367
1368 /* first half of local IO error */
1271 if (os.disk > D_FAILED && ns.disk == D_FAILED) { 1369 if (os.disk > D_FAILED && ns.disk == D_FAILED) {
1272 enum drbd_io_error_p eh; 1370 enum drbd_io_error_p eh = EP_PASS_ON;
1371
1372 if (drbd_send_state(mdev))
1373 dev_warn(DEV, "Notified peer that my disk is broken.\n");
1374 else
1375 dev_err(DEV, "Sending state for drbd_io_error() failed\n");
1376
1377 drbd_rs_cancel_all(mdev);
1273 1378
1274 eh = EP_PASS_ON;
1275 if (get_ldev_if_state(mdev, D_FAILED)) { 1379 if (get_ldev_if_state(mdev, D_FAILED)) {
1276 eh = mdev->ldev->dc.on_io_error; 1380 eh = mdev->ldev->dc.on_io_error;
1277 put_ldev(mdev); 1381 put_ldev(mdev);
1278 } 1382 }
1383 if (eh == EP_CALL_HELPER)
1384 drbd_khelper(mdev, "local-io-error");
1385 }
1279 1386
1280 drbd_rs_cancel_all(mdev); 1387
1281 /* since get_ldev() only works as long as disk>=D_INCONSISTENT, 1388 /* second half of local IO error handling,
1282 and it is D_DISKLESS here, local_cnt can only go down, it can 1389 * after local_cnt references have reached zero: */
1283 not increase... It will reach zero */ 1390 if (os.disk == D_FAILED && ns.disk == D_DISKLESS) {
1284 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
1285 mdev->rs_total = 0; 1391 mdev->rs_total = 0;
1286 mdev->rs_failed = 0; 1392 mdev->rs_failed = 0;
1287 atomic_set(&mdev->rs_pending_cnt, 0); 1393 atomic_set(&mdev->rs_pending_cnt, 0);
1288
1289 spin_lock_irq(&mdev->req_lock);
1290 _drbd_set_state(_NS(mdev, disk, D_DISKLESS), CS_HARD, NULL);
1291 spin_unlock_irq(&mdev->req_lock);
1292
1293 if (eh == EP_CALL_HELPER)
1294 drbd_khelper(mdev, "local-io-error");
1295 } 1394 }
1296 1395
1297 if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) { 1396 if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) {
1397 /* We must still be diskless,
1398 * re-attach has to be serialized with this! */
1399 if (mdev->state.disk != D_DISKLESS)
1400 dev_err(DEV,
1401 "ASSERT FAILED: disk is %s while going diskless\n",
1402 drbd_disk_str(mdev->state.disk));
1403
1404 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state
1405 * will inc/dec it frequently. Since we became D_DISKLESS, no
1406 * one has touched the protected members anymore, though, so we
1407 * are safe to free them here. */
1408 if (drbd_send_state(mdev))
1409 dev_warn(DEV, "Notified peer that I detached my disk.\n");
1410 else
1411 dev_err(DEV, "Sending state for detach failed\n");
1298 1412
1299 if (os.disk == D_FAILED) /* && ns.disk == D_DISKLESS*/ {
1300 if (drbd_send_state(mdev))
1301 dev_warn(DEV, "Notified peer that my disk is broken.\n");
1302 else
1303 dev_err(DEV, "Sending state in drbd_io_error() failed\n");
1304 }
1305
1306 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
1307 lc_destroy(mdev->resync); 1413 lc_destroy(mdev->resync);
1308 mdev->resync = NULL; 1414 mdev->resync = NULL;
1309 lc_destroy(mdev->act_log); 1415 lc_destroy(mdev->act_log);
@@ -1312,8 +1418,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1312 drbd_free_bc(mdev->ldev); 1418 drbd_free_bc(mdev->ldev);
1313 mdev->ldev = NULL;); 1419 mdev->ldev = NULL;);
1314 1420
1315 if (mdev->md_io_tmpp) 1421 if (mdev->md_io_tmpp) {
1316 __free_page(mdev->md_io_tmpp); 1422 __free_page(mdev->md_io_tmpp);
1423 mdev->md_io_tmpp = NULL;
1424 }
1317 } 1425 }
1318 1426
1319 /* Disks got bigger while they were detached */ 1427 /* Disks got bigger while they were detached */
@@ -1329,6 +1437,15 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1329 (os.user_isp && !ns.user_isp)) 1437 (os.user_isp && !ns.user_isp))
1330 resume_next_sg(mdev); 1438 resume_next_sg(mdev);
1331 1439
1440 /* sync target done with resync. Explicitly notify peer, even though
1441 * it should (at least for non-empty resyncs) already know itself. */
1442 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
1443 drbd_send_state(mdev);
1444
1445 /* free tl_hash if we Got thawed and are C_STANDALONE */
1446 if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
1447 drbd_free_tl_hash(mdev);
1448
1332 /* Upon network connection, we need to start the receiver */ 1449 /* Upon network connection, we need to start the receiver */
1333 if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED) 1450 if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED)
1334 drbd_thread_start(&mdev->receiver); 1451 drbd_thread_start(&mdev->receiver);
@@ -1555,7 +1672,7 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
1555 1672
1556/* the appropriate socket mutex must be held already */ 1673/* the appropriate socket mutex must be held already */
1557int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, 1674int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
1558 enum drbd_packets cmd, struct p_header *h, 1675 enum drbd_packets cmd, struct p_header80 *h,
1559 size_t size, unsigned msg_flags) 1676 size_t size, unsigned msg_flags)
1560{ 1677{
1561 int sent, ok; 1678 int sent, ok;
@@ -1565,7 +1682,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
1565 1682
1566 h->magic = BE_DRBD_MAGIC; 1683 h->magic = BE_DRBD_MAGIC;
1567 h->command = cpu_to_be16(cmd); 1684 h->command = cpu_to_be16(cmd);
1568 h->length = cpu_to_be16(size-sizeof(struct p_header)); 1685 h->length = cpu_to_be16(size-sizeof(struct p_header80));
1569 1686
1570 sent = drbd_send(mdev, sock, h, size, msg_flags); 1687 sent = drbd_send(mdev, sock, h, size, msg_flags);
1571 1688
@@ -1580,7 +1697,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
1580 * when we hold the appropriate socket mutex. 1697 * when we hold the appropriate socket mutex.
1581 */ 1698 */
1582int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, 1699int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
1583 enum drbd_packets cmd, struct p_header *h, size_t size) 1700 enum drbd_packets cmd, struct p_header80 *h, size_t size)
1584{ 1701{
1585 int ok = 0; 1702 int ok = 0;
1586 struct socket *sock; 1703 struct socket *sock;
@@ -1608,7 +1725,7 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
1608int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, 1725int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
1609 size_t size) 1726 size_t size)
1610{ 1727{
1611 struct p_header h; 1728 struct p_header80 h;
1612 int ok; 1729 int ok;
1613 1730
1614 h.magic = BE_DRBD_MAGIC; 1731 h.magic = BE_DRBD_MAGIC;
@@ -1630,7 +1747,7 @@ int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
1630 1747
1631int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc) 1748int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
1632{ 1749{
1633 struct p_rs_param_89 *p; 1750 struct p_rs_param_95 *p;
1634 struct socket *sock; 1751 struct socket *sock;
1635 int size, rv; 1752 int size, rv;
1636 const int apv = mdev->agreed_pro_version; 1753 const int apv = mdev->agreed_pro_version;
@@ -1638,7 +1755,8 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
1638 size = apv <= 87 ? sizeof(struct p_rs_param) 1755 size = apv <= 87 ? sizeof(struct p_rs_param)
1639 : apv == 88 ? sizeof(struct p_rs_param) 1756 : apv == 88 ? sizeof(struct p_rs_param)
1640 + strlen(mdev->sync_conf.verify_alg) + 1 1757 + strlen(mdev->sync_conf.verify_alg) + 1
1641 : /* 89 */ sizeof(struct p_rs_param_89); 1758 : apv <= 94 ? sizeof(struct p_rs_param_89)
1759 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
1642 1760
1643 /* used from admin command context and receiver/worker context. 1761 /* used from admin command context and receiver/worker context.
1644 * to avoid kmalloc, grab the socket right here, 1762 * to avoid kmalloc, grab the socket right here,
@@ -1649,12 +1767,16 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
1649 if (likely(sock != NULL)) { 1767 if (likely(sock != NULL)) {
1650 enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM; 1768 enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
1651 1769
1652 p = &mdev->data.sbuf.rs_param_89; 1770 p = &mdev->data.sbuf.rs_param_95;
1653 1771
1654 /* initialize verify_alg and csums_alg */ 1772 /* initialize verify_alg and csums_alg */
1655 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); 1773 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
1656 1774
1657 p->rate = cpu_to_be32(sc->rate); 1775 p->rate = cpu_to_be32(sc->rate);
1776 p->c_plan_ahead = cpu_to_be32(sc->c_plan_ahead);
1777 p->c_delay_target = cpu_to_be32(sc->c_delay_target);
1778 p->c_fill_target = cpu_to_be32(sc->c_fill_target);
1779 p->c_max_rate = cpu_to_be32(sc->c_max_rate);
1658 1780
1659 if (apv >= 88) 1781 if (apv >= 88)
1660 strcpy(p->verify_alg, mdev->sync_conf.verify_alg); 1782 strcpy(p->verify_alg, mdev->sync_conf.verify_alg);
@@ -1710,7 +1832,7 @@ int drbd_send_protocol(struct drbd_conf *mdev)
1710 strcpy(p->integrity_alg, mdev->net_conf->integrity_alg); 1832 strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
1711 1833
1712 rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL, 1834 rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL,
1713 (struct p_header *)p, size); 1835 (struct p_header80 *)p, size);
1714 kfree(p); 1836 kfree(p);
1715 return rv; 1837 return rv;
1716} 1838}
@@ -1736,7 +1858,7 @@ int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
1736 put_ldev(mdev); 1858 put_ldev(mdev);
1737 1859
1738 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS, 1860 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS,
1739 (struct p_header *)&p, sizeof(p)); 1861 (struct p_header80 *)&p, sizeof(p));
1740} 1862}
1741 1863
1742int drbd_send_uuids(struct drbd_conf *mdev) 1864int drbd_send_uuids(struct drbd_conf *mdev)
@@ -1757,7 +1879,7 @@ int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
1757 p.uuid = cpu_to_be64(val); 1879 p.uuid = cpu_to_be64(val);
1758 1880
1759 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID, 1881 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
1760 (struct p_header *)&p, sizeof(p)); 1882 (struct p_header80 *)&p, sizeof(p));
1761} 1883}
1762 1884
1763int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags) 1885int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
@@ -1787,7 +1909,7 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags fl
1787 p.dds_flags = cpu_to_be16(flags); 1909 p.dds_flags = cpu_to_be16(flags);
1788 1910
1789 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES, 1911 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
1790 (struct p_header *)&p, sizeof(p)); 1912 (struct p_header80 *)&p, sizeof(p));
1791 return ok; 1913 return ok;
1792} 1914}
1793 1915
@@ -1812,7 +1934,7 @@ int drbd_send_state(struct drbd_conf *mdev)
1812 1934
1813 if (likely(sock != NULL)) { 1935 if (likely(sock != NULL)) {
1814 ok = _drbd_send_cmd(mdev, sock, P_STATE, 1936 ok = _drbd_send_cmd(mdev, sock, P_STATE,
1815 (struct p_header *)&p, sizeof(p), 0); 1937 (struct p_header80 *)&p, sizeof(p), 0);
1816 } 1938 }
1817 1939
1818 mutex_unlock(&mdev->data.mutex); 1940 mutex_unlock(&mdev->data.mutex);
@@ -1830,7 +1952,7 @@ int drbd_send_state_req(struct drbd_conf *mdev,
1830 p.val = cpu_to_be32(val.i); 1952 p.val = cpu_to_be32(val.i);
1831 1953
1832 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ, 1954 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ,
1833 (struct p_header *)&p, sizeof(p)); 1955 (struct p_header80 *)&p, sizeof(p));
1834} 1956}
1835 1957
1836int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode) 1958int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
@@ -1840,7 +1962,7 @@ int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
1840 p.retcode = cpu_to_be32(retcode); 1962 p.retcode = cpu_to_be32(retcode);
1841 1963
1842 return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY, 1964 return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY,
1843 (struct p_header *)&p, sizeof(p)); 1965 (struct p_header80 *)&p, sizeof(p));
1844} 1966}
1845 1967
1846int fill_bitmap_rle_bits(struct drbd_conf *mdev, 1968int fill_bitmap_rle_bits(struct drbd_conf *mdev,
@@ -1939,7 +2061,7 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev,
1939 2061
1940enum { OK, FAILED, DONE } 2062enum { OK, FAILED, DONE }
1941send_bitmap_rle_or_plain(struct drbd_conf *mdev, 2063send_bitmap_rle_or_plain(struct drbd_conf *mdev,
1942 struct p_header *h, struct bm_xfer_ctx *c) 2064 struct p_header80 *h, struct bm_xfer_ctx *c)
1943{ 2065{
1944 struct p_compressed_bm *p = (void*)h; 2066 struct p_compressed_bm *p = (void*)h;
1945 unsigned long num_words; 2067 unsigned long num_words;
@@ -1969,12 +2091,12 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
1969 if (len) 2091 if (len)
1970 drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload); 2092 drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
1971 ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP, 2093 ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP,
1972 h, sizeof(struct p_header) + len, 0); 2094 h, sizeof(struct p_header80) + len, 0);
1973 c->word_offset += num_words; 2095 c->word_offset += num_words;
1974 c->bit_offset = c->word_offset * BITS_PER_LONG; 2096 c->bit_offset = c->word_offset * BITS_PER_LONG;
1975 2097
1976 c->packets[1]++; 2098 c->packets[1]++;
1977 c->bytes[1] += sizeof(struct p_header) + len; 2099 c->bytes[1] += sizeof(struct p_header80) + len;
1978 2100
1979 if (c->bit_offset > c->bm_bits) 2101 if (c->bit_offset > c->bm_bits)
1980 c->bit_offset = c->bm_bits; 2102 c->bit_offset = c->bm_bits;
@@ -1990,14 +2112,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
1990int _drbd_send_bitmap(struct drbd_conf *mdev) 2112int _drbd_send_bitmap(struct drbd_conf *mdev)
1991{ 2113{
1992 struct bm_xfer_ctx c; 2114 struct bm_xfer_ctx c;
1993 struct p_header *p; 2115 struct p_header80 *p;
1994 int ret; 2116 int ret;
1995 2117
1996 ERR_IF(!mdev->bitmap) return FALSE; 2118 ERR_IF(!mdev->bitmap) return FALSE;
1997 2119
1998 /* maybe we should use some per thread scratch page, 2120 /* maybe we should use some per thread scratch page,
1999 * and allocate that during initial device creation? */ 2121 * and allocate that during initial device creation? */
2000 p = (struct p_header *) __get_free_page(GFP_NOIO); 2122 p = (struct p_header80 *) __get_free_page(GFP_NOIO);
2001 if (!p) { 2123 if (!p) {
2002 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__); 2124 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
2003 return FALSE; 2125 return FALSE;
@@ -2055,7 +2177,7 @@ int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
2055 if (mdev->state.conn < C_CONNECTED) 2177 if (mdev->state.conn < C_CONNECTED)
2056 return FALSE; 2178 return FALSE;
2057 ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK, 2179 ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
2058 (struct p_header *)&p, sizeof(p)); 2180 (struct p_header80 *)&p, sizeof(p));
2059 return ok; 2181 return ok;
2060} 2182}
2061 2183
@@ -2083,17 +2205,18 @@ static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
2083 if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED) 2205 if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
2084 return FALSE; 2206 return FALSE;
2085 ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd, 2207 ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
2086 (struct p_header *)&p, sizeof(p)); 2208 (struct p_header80 *)&p, sizeof(p));
2087 return ok; 2209 return ok;
2088} 2210}
2089 2211
2212/* dp->sector and dp->block_id already/still in network byte order,
2213 * data_size is payload size according to dp->head,
2214 * and may need to be corrected for digest size. */
2090int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd, 2215int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
2091 struct p_data *dp) 2216 struct p_data *dp, int data_size)
2092{ 2217{
2093 const int header_size = sizeof(struct p_data) 2218 data_size -= (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
2094 - sizeof(struct p_header); 2219 crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
2095 int data_size = ((struct p_header *)dp)->length - header_size;
2096
2097 return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size), 2220 return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
2098 dp->block_id); 2221 dp->block_id);
2099} 2222}
@@ -2141,7 +2264,7 @@ int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
2141 p.blksize = cpu_to_be32(size); 2264 p.blksize = cpu_to_be32(size);
2142 2265
2143 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, 2266 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd,
2144 (struct p_header *)&p, sizeof(p)); 2267 (struct p_header80 *)&p, sizeof(p));
2145 return ok; 2268 return ok;
2146} 2269}
2147 2270
@@ -2159,7 +2282,7 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev,
2159 2282
2160 p.head.magic = BE_DRBD_MAGIC; 2283 p.head.magic = BE_DRBD_MAGIC;
2161 p.head.command = cpu_to_be16(cmd); 2284 p.head.command = cpu_to_be16(cmd);
2162 p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header) + digest_size); 2285 p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size);
2163 2286
2164 mutex_lock(&mdev->data.mutex); 2287 mutex_lock(&mdev->data.mutex);
2165 2288
@@ -2181,7 +2304,7 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
2181 p.blksize = cpu_to_be32(size); 2304 p.blksize = cpu_to_be32(size);
2182 2305
2183 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST, 2306 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST,
2184 (struct p_header *)&p, sizeof(p)); 2307 (struct p_header80 *)&p, sizeof(p));
2185 return ok; 2308 return ok;
2186} 2309}
2187 2310
@@ -2333,6 +2456,18 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
2333 return 1; 2456 return 1;
2334} 2457}
2335 2458
2459static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
2460{
2461 if (mdev->agreed_pro_version >= 95)
2462 return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
2463 (bi_rw & REQ_UNPLUG ? DP_UNPLUG : 0) |
2464 (bi_rw & REQ_FUA ? DP_FUA : 0) |
2465 (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
2466 (bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
2467 else
2468 return bi_rw & (REQ_SYNC | REQ_UNPLUG) ? DP_RW_SYNC : 0;
2469}
2470
2336/* Used to send write requests 2471/* Used to send write requests
2337 * R_PRIMARY -> Peer (P_DATA) 2472 * R_PRIMARY -> Peer (P_DATA)
2338 */ 2473 */
@@ -2350,30 +2485,25 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
2350 dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? 2485 dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
2351 crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; 2486 crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
2352 2487
2353 p.head.magic = BE_DRBD_MAGIC; 2488 if (req->size <= DRBD_MAX_SIZE_H80_PACKET) {
2354 p.head.command = cpu_to_be16(P_DATA); 2489 p.head.h80.magic = BE_DRBD_MAGIC;
2355 p.head.length = 2490 p.head.h80.command = cpu_to_be16(P_DATA);
2356 cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->size); 2491 p.head.h80.length =
2492 cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size);
2493 } else {
2494 p.head.h95.magic = BE_DRBD_MAGIC_BIG;
2495 p.head.h95.command = cpu_to_be16(P_DATA);
2496 p.head.h95.length =
2497 cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size);
2498 }
2357 2499
2358 p.sector = cpu_to_be64(req->sector); 2500 p.sector = cpu_to_be64(req->sector);
2359 p.block_id = (unsigned long)req; 2501 p.block_id = (unsigned long)req;
2360 p.seq_num = cpu_to_be32(req->seq_num = 2502 p.seq_num = cpu_to_be32(req->seq_num =
2361 atomic_add_return(1, &mdev->packet_seq)); 2503 atomic_add_return(1, &mdev->packet_seq));
2362 dp_flags = 0;
2363 2504
2364 /* NOTE: no need to check if barriers supported here as we would 2505 dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
2365 * not pass the test in make_request_common in that case 2506
2366 */
2367 if (req->master_bio->bi_rw & REQ_HARDBARRIER) {
2368 dev_err(DEV, "ASSERT FAILED would have set DP_HARDBARRIER\n");
2369 /* dp_flags |= DP_HARDBARRIER; */
2370 }
2371 if (req->master_bio->bi_rw & REQ_SYNC)
2372 dp_flags |= DP_RW_SYNC;
2373 /* for now handle SYNCIO and UNPLUG
2374 * as if they still were one and the same flag */
2375 if (req->master_bio->bi_rw & REQ_UNPLUG)
2376 dp_flags |= DP_RW_SYNC;
2377 if (mdev->state.conn >= C_SYNC_SOURCE && 2507 if (mdev->state.conn >= C_SYNC_SOURCE &&
2378 mdev->state.conn <= C_PAUSED_SYNC_T) 2508 mdev->state.conn <= C_PAUSED_SYNC_T)
2379 dp_flags |= DP_MAY_SET_IN_SYNC; 2509 dp_flags |= DP_MAY_SET_IN_SYNC;
@@ -2414,10 +2544,17 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
2414 dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ? 2544 dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
2415 crypto_hash_digestsize(mdev->integrity_w_tfm) : 0; 2545 crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
2416 2546
2417 p.head.magic = BE_DRBD_MAGIC; 2547 if (e->size <= DRBD_MAX_SIZE_H80_PACKET) {
2418 p.head.command = cpu_to_be16(cmd); 2548 p.head.h80.magic = BE_DRBD_MAGIC;
2419 p.head.length = 2549 p.head.h80.command = cpu_to_be16(cmd);
2420 cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + e->size); 2550 p.head.h80.length =
2551 cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
2552 } else {
2553 p.head.h95.magic = BE_DRBD_MAGIC_BIG;
2554 p.head.h95.command = cpu_to_be16(cmd);
2555 p.head.h95.length =
2556 cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
2557 }
2421 2558
2422 p.sector = cpu_to_be64(e->sector); 2559 p.sector = cpu_to_be64(e->sector);
2423 p.block_id = e->block_id; 2560 p.block_id = e->block_id;
@@ -2430,8 +2567,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
2430 if (!drbd_get_data_sock(mdev)) 2567 if (!drbd_get_data_sock(mdev))
2431 return 0; 2568 return 0;
2432 2569
2433 ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, 2570 ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0);
2434 sizeof(p), dgs ? MSG_MORE : 0);
2435 if (ok && dgs) { 2571 if (ok && dgs) {
2436 dgb = mdev->int_dig_out; 2572 dgb = mdev->int_dig_out;
2437 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); 2573 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
@@ -2606,7 +2742,13 @@ static void drbd_set_defaults(struct drbd_conf *mdev)
2606 /* .verify_alg = */ {}, 0, 2742 /* .verify_alg = */ {}, 0,
2607 /* .cpu_mask = */ {}, 0, 2743 /* .cpu_mask = */ {}, 0,
2608 /* .csums_alg = */ {}, 0, 2744 /* .csums_alg = */ {}, 0,
2609 /* .use_rle = */ 0 2745 /* .use_rle = */ 0,
2746 /* .on_no_data = */ DRBD_ON_NO_DATA_DEF,
2747 /* .c_plan_ahead = */ DRBD_C_PLAN_AHEAD_DEF,
2748 /* .c_delay_target = */ DRBD_C_DELAY_TARGET_DEF,
2749 /* .c_fill_target = */ DRBD_C_FILL_TARGET_DEF,
2750 /* .c_max_rate = */ DRBD_C_MAX_RATE_DEF,
2751 /* .c_min_rate = */ DRBD_C_MIN_RATE_DEF
2610 }; 2752 };
2611 2753
2612 /* Have to use that way, because the layout differs between 2754 /* Have to use that way, because the layout differs between
@@ -2617,7 +2759,9 @@ static void drbd_set_defaults(struct drbd_conf *mdev)
2617 .conn = C_STANDALONE, 2759 .conn = C_STANDALONE,
2618 .disk = D_DISKLESS, 2760 .disk = D_DISKLESS,
2619 .pdsk = D_UNKNOWN, 2761 .pdsk = D_UNKNOWN,
2620 .susp = 0 2762 .susp = 0,
2763 .susp_nod = 0,
2764 .susp_fen = 0
2621 } }; 2765 } };
2622} 2766}
2623 2767
@@ -2641,6 +2785,9 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2641 atomic_set(&mdev->net_cnt, 0); 2785 atomic_set(&mdev->net_cnt, 0);
2642 atomic_set(&mdev->packet_seq, 0); 2786 atomic_set(&mdev->packet_seq, 0);
2643 atomic_set(&mdev->pp_in_use, 0); 2787 atomic_set(&mdev->pp_in_use, 0);
2788 atomic_set(&mdev->pp_in_use_by_net, 0);
2789 atomic_set(&mdev->rs_sect_in, 0);
2790 atomic_set(&mdev->rs_sect_ev, 0);
2644 2791
2645 mutex_init(&mdev->md_io_mutex); 2792 mutex_init(&mdev->md_io_mutex);
2646 mutex_init(&mdev->data.mutex); 2793 mutex_init(&mdev->data.mutex);
@@ -2667,11 +2814,13 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2667 INIT_LIST_HEAD(&mdev->meta.work.q); 2814 INIT_LIST_HEAD(&mdev->meta.work.q);
2668 INIT_LIST_HEAD(&mdev->resync_work.list); 2815 INIT_LIST_HEAD(&mdev->resync_work.list);
2669 INIT_LIST_HEAD(&mdev->unplug_work.list); 2816 INIT_LIST_HEAD(&mdev->unplug_work.list);
2817 INIT_LIST_HEAD(&mdev->go_diskless.list);
2670 INIT_LIST_HEAD(&mdev->md_sync_work.list); 2818 INIT_LIST_HEAD(&mdev->md_sync_work.list);
2671 INIT_LIST_HEAD(&mdev->bm_io_work.w.list); 2819 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
2672 2820
2673 mdev->resync_work.cb = w_resync_inactive; 2821 mdev->resync_work.cb = w_resync_inactive;
2674 mdev->unplug_work.cb = w_send_write_hint; 2822 mdev->unplug_work.cb = w_send_write_hint;
2823 mdev->go_diskless.cb = w_go_diskless;
2675 mdev->md_sync_work.cb = w_md_sync; 2824 mdev->md_sync_work.cb = w_md_sync;
2676 mdev->bm_io_work.w.cb = w_bitmap_io; 2825 mdev->bm_io_work.w.cb = w_bitmap_io;
2677 init_timer(&mdev->resync_timer); 2826 init_timer(&mdev->resync_timer);
@@ -2683,6 +2832,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2683 2832
2684 init_waitqueue_head(&mdev->misc_wait); 2833 init_waitqueue_head(&mdev->misc_wait);
2685 init_waitqueue_head(&mdev->state_wait); 2834 init_waitqueue_head(&mdev->state_wait);
2835 init_waitqueue_head(&mdev->net_cnt_wait);
2686 init_waitqueue_head(&mdev->ee_wait); 2836 init_waitqueue_head(&mdev->ee_wait);
2687 init_waitqueue_head(&mdev->al_wait); 2837 init_waitqueue_head(&mdev->al_wait);
2688 init_waitqueue_head(&mdev->seq_wait); 2838 init_waitqueue_head(&mdev->seq_wait);
@@ -2698,6 +2848,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2698 2848
2699void drbd_mdev_cleanup(struct drbd_conf *mdev) 2849void drbd_mdev_cleanup(struct drbd_conf *mdev)
2700{ 2850{
2851 int i;
2701 if (mdev->receiver.t_state != None) 2852 if (mdev->receiver.t_state != None)
2702 dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n", 2853 dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
2703 mdev->receiver.t_state); 2854 mdev->receiver.t_state);
@@ -2714,9 +2865,13 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
2714 mdev->p_size = 2865 mdev->p_size =
2715 mdev->rs_start = 2866 mdev->rs_start =
2716 mdev->rs_total = 2867 mdev->rs_total =
2717 mdev->rs_failed = 2868 mdev->rs_failed = 0;
2718 mdev->rs_mark_left = 2869 mdev->rs_last_events = 0;
2719 mdev->rs_mark_time = 0; 2870 mdev->rs_last_sect_ev = 0;
2871 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2872 mdev->rs_mark_left[i] = 0;
2873 mdev->rs_mark_time[i] = 0;
2874 }
2720 D_ASSERT(mdev->net_conf == NULL); 2875 D_ASSERT(mdev->net_conf == NULL);
2721 2876
2722 drbd_set_my_capacity(mdev, 0); 2877 drbd_set_my_capacity(mdev, 0);
@@ -2727,6 +2882,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
2727 } 2882 }
2728 2883
2729 drbd_free_resources(mdev); 2884 drbd_free_resources(mdev);
2885 clear_bit(AL_SUSPENDED, &mdev->flags);
2730 2886
2731 /* 2887 /*
2732 * currently we drbd_init_ee only on module load, so 2888 * currently we drbd_init_ee only on module load, so
@@ -2742,6 +2898,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
2742 D_ASSERT(list_empty(&mdev->meta.work.q)); 2898 D_ASSERT(list_empty(&mdev->meta.work.q));
2743 D_ASSERT(list_empty(&mdev->resync_work.list)); 2899 D_ASSERT(list_empty(&mdev->resync_work.list));
2744 D_ASSERT(list_empty(&mdev->unplug_work.list)); 2900 D_ASSERT(list_empty(&mdev->unplug_work.list));
2901 D_ASSERT(list_empty(&mdev->go_diskless.list));
2745 2902
2746} 2903}
2747 2904
@@ -3281,9 +3438,10 @@ void drbd_md_sync(struct drbd_conf *mdev)
3281 sector_t sector; 3438 sector_t sector;
3282 int i; 3439 int i;
3283 3440
3441 del_timer(&mdev->md_sync_timer);
3442 /* timer may be rearmed by drbd_md_mark_dirty() now. */
3284 if (!test_and_clear_bit(MD_DIRTY, &mdev->flags)) 3443 if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
3285 return; 3444 return;
3286 del_timer(&mdev->md_sync_timer);
3287 3445
3288 /* We use here D_FAILED and not D_ATTACHING because we try to write 3446 /* We use here D_FAILED and not D_ATTACHING because we try to write
3289 * metadata even if we detach due to a disk failure! */ 3447 * metadata even if we detach due to a disk failure! */
@@ -3311,12 +3469,9 @@ void drbd_md_sync(struct drbd_conf *mdev)
3311 D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset); 3469 D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
3312 sector = mdev->ldev->md.md_offset; 3470 sector = mdev->ldev->md.md_offset;
3313 3471
3314 if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { 3472 if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
3315 clear_bit(MD_DIRTY, &mdev->flags);
3316 } else {
3317 /* this was a try anyways ... */ 3473 /* this was a try anyways ... */
3318 dev_err(DEV, "meta data update failed!\n"); 3474 dev_err(DEV, "meta data update failed!\n");
3319
3320 drbd_chk_io_error(mdev, 1, TRUE); 3475 drbd_chk_io_error(mdev, 1, TRUE);
3321 } 3476 }
3322 3477
@@ -3403,6 +3558,28 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
3403 return rv; 3558 return rv;
3404} 3559}
3405 3560
3561static void debug_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index)
3562{
3563 static char *uuid_str[UI_EXTENDED_SIZE] = {
3564 [UI_CURRENT] = "CURRENT",
3565 [UI_BITMAP] = "BITMAP",
3566 [UI_HISTORY_START] = "HISTORY_START",
3567 [UI_HISTORY_END] = "HISTORY_END",
3568 [UI_SIZE] = "SIZE",
3569 [UI_FLAGS] = "FLAGS",
3570 };
3571
3572 if (index >= UI_EXTENDED_SIZE) {
3573 dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n");
3574 return;
3575 }
3576
3577 dynamic_dev_dbg(DEV, " uuid[%s] now %016llX\n",
3578 uuid_str[index],
3579 (unsigned long long)mdev->ldev->md.uuid[index]);
3580}
3581
3582
3406/** 3583/**
3407 * drbd_md_mark_dirty() - Mark meta data super block as dirty 3584 * drbd_md_mark_dirty() - Mark meta data super block as dirty
3408 * @mdev: DRBD device. 3585 * @mdev: DRBD device.
@@ -3411,19 +3588,31 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
3411 * the meta-data super block. This function sets MD_DIRTY, and starts a 3588 * the meta-data super block. This function sets MD_DIRTY, and starts a
3412 * timer that ensures that within five seconds you have to call drbd_md_sync(). 3589 * timer that ensures that within five seconds you have to call drbd_md_sync().
3413 */ 3590 */
3591#ifdef DEBUG
3592void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func)
3593{
3594 if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) {
3595 mod_timer(&mdev->md_sync_timer, jiffies + HZ);
3596 mdev->last_md_mark_dirty.line = line;
3597 mdev->last_md_mark_dirty.func = func;
3598 }
3599}
3600#else
3414void drbd_md_mark_dirty(struct drbd_conf *mdev) 3601void drbd_md_mark_dirty(struct drbd_conf *mdev)
3415{ 3602{
3416 set_bit(MD_DIRTY, &mdev->flags); 3603 if (!test_and_set_bit(MD_DIRTY, &mdev->flags))
3417 mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ); 3604 mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
3418} 3605}
3419 3606#endif
3420 3607
3421static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local) 3608static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
3422{ 3609{
3423 int i; 3610 int i;
3424 3611
3425 for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) 3612 for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) {
3426 mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i]; 3613 mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
3614 debug_drbd_uuid(mdev, i+1);
3615 }
3427} 3616}
3428 3617
3429void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local) 3618void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
@@ -3438,6 +3627,7 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3438 } 3627 }
3439 3628
3440 mdev->ldev->md.uuid[idx] = val; 3629 mdev->ldev->md.uuid[idx] = val;
3630 debug_drbd_uuid(mdev, idx);
3441 drbd_md_mark_dirty(mdev); 3631 drbd_md_mark_dirty(mdev);
3442} 3632}
3443 3633
@@ -3447,6 +3637,7 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3447 if (mdev->ldev->md.uuid[idx]) { 3637 if (mdev->ldev->md.uuid[idx]) {
3448 drbd_uuid_move_history(mdev); 3638 drbd_uuid_move_history(mdev);
3449 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx]; 3639 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
3640 debug_drbd_uuid(mdev, UI_HISTORY_START);
3450 } 3641 }
3451 _drbd_uuid_set(mdev, idx, val); 3642 _drbd_uuid_set(mdev, idx, val);
3452} 3643}
@@ -3465,6 +3656,7 @@ void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
3465 dev_info(DEV, "Creating new current UUID\n"); 3656 dev_info(DEV, "Creating new current UUID\n");
3466 D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0); 3657 D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0);
3467 mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT]; 3658 mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
3659 debug_drbd_uuid(mdev, UI_BITMAP);
3468 3660
3469 get_random_bytes(&val, sizeof(u64)); 3661 get_random_bytes(&val, sizeof(u64));
3470 _drbd_uuid_set(mdev, UI_CURRENT, val); 3662 _drbd_uuid_set(mdev, UI_CURRENT, val);
@@ -3479,6 +3671,8 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
3479 drbd_uuid_move_history(mdev); 3671 drbd_uuid_move_history(mdev);
3480 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP]; 3672 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
3481 mdev->ldev->md.uuid[UI_BITMAP] = 0; 3673 mdev->ldev->md.uuid[UI_BITMAP] = 0;
3674 debug_drbd_uuid(mdev, UI_HISTORY_START);
3675 debug_drbd_uuid(mdev, UI_BITMAP);
3482 } else { 3676 } else {
3483 if (mdev->ldev->md.uuid[UI_BITMAP]) 3677 if (mdev->ldev->md.uuid[UI_BITMAP])
3484 dev_warn(DEV, "bm UUID already set"); 3678 dev_warn(DEV, "bm UUID already set");
@@ -3486,6 +3680,7 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
3486 mdev->ldev->md.uuid[UI_BITMAP] = val; 3680 mdev->ldev->md.uuid[UI_BITMAP] = val;
3487 mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1); 3681 mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1);
3488 3682
3683 debug_drbd_uuid(mdev, UI_BITMAP);
3489 } 3684 }
3490 drbd_md_mark_dirty(mdev); 3685 drbd_md_mark_dirty(mdev);
3491} 3686}
@@ -3528,6 +3723,7 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
3528{ 3723{
3529 int rv = -EIO; 3724 int rv = -EIO;
3530 3725
3726 drbd_resume_al(mdev);
3531 if (get_ldev_if_state(mdev, D_ATTACHING)) { 3727 if (get_ldev_if_state(mdev, D_ATTACHING)) {
3532 drbd_bm_clear_all(mdev); 3728 drbd_bm_clear_all(mdev);
3533 rv = drbd_bm_write(mdev); 3729 rv = drbd_bm_write(mdev);
@@ -3560,6 +3756,32 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
3560 return 1; 3756 return 1;
3561} 3757}
3562 3758
3759static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused)
3760{
3761 D_ASSERT(mdev->state.disk == D_FAILED);
3762 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
3763 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
3764 * the protected members anymore, though, so in the after_state_ch work
3765 * it will be safe to free them. */
3766 drbd_force_state(mdev, NS(disk, D_DISKLESS));
3767 /* We need to wait for return of references checked out while we still
3768 * have been D_FAILED, though (drbd_md_sync, bitmap io). */
3769 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
3770
3771 clear_bit(GO_DISKLESS, &mdev->flags);
3772 return 1;
3773}
3774
3775void drbd_go_diskless(struct drbd_conf *mdev)
3776{
3777 D_ASSERT(mdev->state.disk == D_FAILED);
3778 if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
3779 drbd_queue_work(&mdev->data.work, &mdev->go_diskless);
3780 /* don't drbd_queue_work_front,
3781 * we need to serialize with the after_state_ch work
3782 * of the -> D_FAILED transition. */
3783}
3784
3563/** 3785/**
3564 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap 3786 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
3565 * @mdev: DRBD device. 3787 * @mdev: DRBD device.
@@ -3656,8 +3878,11 @@ static void md_sync_timer_fn(unsigned long data)
3656static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused) 3878static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused)
3657{ 3879{
3658 dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); 3880 dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
3881#ifdef DEBUG
3882 dev_warn(DEV, "last md_mark_dirty: %s:%u\n",
3883 mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line);
3884#endif
3659 drbd_md_sync(mdev); 3885 drbd_md_sync(mdev);
3660
3661 return 1; 3886 return 1;
3662} 3887}
3663 3888
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 73131c5ae339..87925e97e613 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -33,10 +33,13 @@
33#include <linux/blkpg.h> 33#include <linux/blkpg.h>
34#include <linux/cpumask.h> 34#include <linux/cpumask.h>
35#include "drbd_int.h" 35#include "drbd_int.h"
36#include "drbd_req.h"
36#include "drbd_wrappers.h" 37#include "drbd_wrappers.h"
37#include <asm/unaligned.h> 38#include <asm/unaligned.h>
38#include <linux/drbd_tag_magic.h> 39#include <linux/drbd_tag_magic.h>
39#include <linux/drbd_limits.h> 40#include <linux/drbd_limits.h>
41#include <linux/compiler.h>
42#include <linux/kthread.h>
40 43
41static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int); 44static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
42static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *); 45static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
@@ -169,6 +172,10 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
169 put_net_conf(mdev); 172 put_net_conf(mdev);
170 } 173 }
171 174
175 /* The helper may take some time.
176 * write out any unsynced meta data changes now */
177 drbd_md_sync(mdev);
178
172 dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb); 179 dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
173 180
174 drbd_bcast_ev_helper(mdev, cmd); 181 drbd_bcast_ev_helper(mdev, cmd);
@@ -202,12 +209,10 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
202 put_ldev(mdev); 209 put_ldev(mdev);
203 } else { 210 } else {
204 dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n"); 211 dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
205 return mdev->state.pdsk; 212 nps = mdev->state.pdsk;
213 goto out;
206 } 214 }
207 215
208 if (fp == FP_STONITH)
209 _drbd_request_state(mdev, NS(susp, 1), CS_WAIT_COMPLETE);
210
211 r = drbd_khelper(mdev, "fence-peer"); 216 r = drbd_khelper(mdev, "fence-peer");
212 217
213 switch ((r>>8) & 0xff) { 218 switch ((r>>8) & 0xff) {
@@ -252,9 +257,36 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
252 257
253 dev_info(DEV, "fence-peer helper returned %d (%s)\n", 258 dev_info(DEV, "fence-peer helper returned %d (%s)\n",
254 (r>>8) & 0xff, ex_to_string); 259 (r>>8) & 0xff, ex_to_string);
260
261out:
262 if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
263 /* The handler was not successful... unfreeze here, the
264 state engine can not unfreeze... */
265 _drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
266 }
267
255 return nps; 268 return nps;
256} 269}
257 270
271static int _try_outdate_peer_async(void *data)
272{
273 struct drbd_conf *mdev = (struct drbd_conf *)data;
274 enum drbd_disk_state nps;
275
276 nps = drbd_try_outdate_peer(mdev);
277 drbd_request_state(mdev, NS(pdsk, nps));
278
279 return 0;
280}
281
282void drbd_try_outdate_peer_async(struct drbd_conf *mdev)
283{
284 struct task_struct *opa;
285
286 opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev));
287 if (IS_ERR(opa))
288 dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
289}
258 290
259int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) 291int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
260{ 292{
@@ -394,6 +426,39 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
394 return r; 426 return r;
395} 427}
396 428
429static struct drbd_conf *ensure_mdev(int minor, int create)
430{
431 struct drbd_conf *mdev;
432
433 if (minor >= minor_count)
434 return NULL;
435
436 mdev = minor_to_mdev(minor);
437
438 if (!mdev && create) {
439 struct gendisk *disk = NULL;
440 mdev = drbd_new_device(minor);
441
442 spin_lock_irq(&drbd_pp_lock);
443 if (minor_table[minor] == NULL) {
444 minor_table[minor] = mdev;
445 disk = mdev->vdisk;
446 mdev = NULL;
447 } /* else: we lost the race */
448 spin_unlock_irq(&drbd_pp_lock);
449
450 if (disk) /* we won the race above */
451 /* in case we ever add a drbd_delete_device(),
452 * don't forget the del_gendisk! */
453 add_disk(disk);
454 else /* we lost the race above */
455 drbd_free_mdev(mdev);
456
457 mdev = minor_to_mdev(minor);
458 }
459
460 return mdev;
461}
397 462
398static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 463static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
399 struct drbd_nl_cfg_reply *reply) 464 struct drbd_nl_cfg_reply *reply)
@@ -494,6 +559,8 @@ char *ppsize(char *buf, unsigned long long size)
494void drbd_suspend_io(struct drbd_conf *mdev) 559void drbd_suspend_io(struct drbd_conf *mdev)
495{ 560{
496 set_bit(SUSPEND_IO, &mdev->flags); 561 set_bit(SUSPEND_IO, &mdev->flags);
562 if (is_susp(mdev->state))
563 return;
497 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); 564 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
498} 565}
499 566
@@ -713,9 +780,6 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu
713 blk_queue_segment_boundary(q, PAGE_SIZE-1); 780 blk_queue_segment_boundary(q, PAGE_SIZE-1);
714 blk_stack_limits(&q->limits, &b->limits, 0); 781 blk_stack_limits(&q->limits, &b->limits, 0);
715 782
716 if (b->merge_bvec_fn)
717 dev_warn(DEV, "Backing device's merge_bvec_fn() = %p\n",
718 b->merge_bvec_fn);
719 dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q)); 783 dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q));
720 784
721 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) { 785 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
@@ -729,14 +793,16 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu
729/* serialize deconfig (worker exiting, doing cleanup) 793/* serialize deconfig (worker exiting, doing cleanup)
730 * and reconfig (drbdsetup disk, drbdsetup net) 794 * and reconfig (drbdsetup disk, drbdsetup net)
731 * 795 *
732 * wait for a potentially exiting worker, then restart it, 796 * Wait for a potentially exiting worker, then restart it,
733 * or start a new one. 797 * or start a new one. Flush any pending work, there may still be an
798 * after_state_change queued.
734 */ 799 */
735static void drbd_reconfig_start(struct drbd_conf *mdev) 800static void drbd_reconfig_start(struct drbd_conf *mdev)
736{ 801{
737 wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags)); 802 wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags));
738 wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags)); 803 wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags));
739 drbd_thread_start(&mdev->worker); 804 drbd_thread_start(&mdev->worker);
805 drbd_flush_workqueue(mdev);
740} 806}
741 807
742/* if still unconfigured, stops worker again. 808/* if still unconfigured, stops worker again.
@@ -756,6 +822,29 @@ static void drbd_reconfig_done(struct drbd_conf *mdev)
756 wake_up(&mdev->state_wait); 822 wake_up(&mdev->state_wait);
757} 823}
758 824
825/* Make sure IO is suspended before calling this function(). */
826static void drbd_suspend_al(struct drbd_conf *mdev)
827{
828 int s = 0;
829
830 if (lc_try_lock(mdev->act_log)) {
831 drbd_al_shrink(mdev);
832 lc_unlock(mdev->act_log);
833 } else {
834 dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
835 return;
836 }
837
838 spin_lock_irq(&mdev->req_lock);
839 if (mdev->state.conn < C_CONNECTED)
840 s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
841
842 spin_unlock_irq(&mdev->req_lock);
843
844 if (s)
845 dev_info(DEV, "Suspended AL updates\n");
846}
847
759/* does always return 0; 848/* does always return 0;
760 * interesting return code is in reply->ret_code */ 849 * interesting return code is in reply->ret_code */
761static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 850static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
@@ -769,6 +858,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
769 struct inode *inode, *inode2; 858 struct inode *inode, *inode2;
770 struct lru_cache *resync_lru = NULL; 859 struct lru_cache *resync_lru = NULL;
771 union drbd_state ns, os; 860 union drbd_state ns, os;
861 unsigned int max_seg_s;
772 int rv; 862 int rv;
773 int cp_discovered = 0; 863 int cp_discovered = 0;
774 int logical_block_size; 864 int logical_block_size;
@@ -803,6 +893,15 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
803 goto fail; 893 goto fail;
804 } 894 }
805 895
896 if (get_net_conf(mdev)) {
897 int prot = mdev->net_conf->wire_protocol;
898 put_net_conf(mdev);
899 if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) {
900 retcode = ERR_STONITH_AND_PROT_A;
901 goto fail;
902 }
903 }
904
806 nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0); 905 nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0);
807 if (IS_ERR(nbc->lo_file)) { 906 if (IS_ERR(nbc->lo_file)) {
808 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev, 907 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
@@ -924,7 +1023,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
924 1023
925 drbd_suspend_io(mdev); 1024 drbd_suspend_io(mdev);
926 /* also wait for the last barrier ack. */ 1025 /* also wait for the last barrier ack. */
927 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt)); 1026 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state));
928 /* and for any other previously queued work */ 1027 /* and for any other previously queued work */
929 drbd_flush_workqueue(mdev); 1028 drbd_flush_workqueue(mdev);
930 1029
@@ -1021,7 +1120,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1021 else 1120 else
1022 clear_bit(CRASHED_PRIMARY, &mdev->flags); 1121 clear_bit(CRASHED_PRIMARY, &mdev->flags);
1023 1122
1024 if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND)) { 1123 if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
1124 !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) {
1025 set_bit(CRASHED_PRIMARY, &mdev->flags); 1125 set_bit(CRASHED_PRIMARY, &mdev->flags);
1026 cp_discovered = 1; 1126 cp_discovered = 1;
1027 } 1127 }
@@ -1031,7 +1131,20 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1031 mdev->read_cnt = 0; 1131 mdev->read_cnt = 0;
1032 mdev->writ_cnt = 0; 1132 mdev->writ_cnt = 0;
1033 1133
1034 drbd_setup_queue_param(mdev, DRBD_MAX_SEGMENT_SIZE); 1134 max_seg_s = DRBD_MAX_SEGMENT_SIZE;
1135 if (mdev->state.conn == C_CONNECTED) {
1136 /* We are Primary, Connected, and now attach a new local
1137 * backing store. We must not increase the user visible maximum
1138 * bio size on this device to something the peer may not be
1139 * able to handle. */
1140 if (mdev->agreed_pro_version < 94)
1141 max_seg_s = queue_max_segment_size(mdev->rq_queue);
1142 else if (mdev->agreed_pro_version == 94)
1143 max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
1144 /* else: drbd 8.3.9 and later, stay with default */
1145 }
1146
1147 drbd_setup_queue_param(mdev, max_seg_s);
1035 1148
1036 /* If I am currently not R_PRIMARY, 1149 /* If I am currently not R_PRIMARY,
1037 * but meta data primary indicator is set, 1150 * but meta data primary indicator is set,
@@ -1079,6 +1192,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1079 drbd_al_to_on_disk_bm(mdev); 1192 drbd_al_to_on_disk_bm(mdev);
1080 } 1193 }
1081 1194
1195 if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
1196 drbd_suspend_al(mdev); /* IO is still suspended here... */
1197
1082 spin_lock_irq(&mdev->req_lock); 1198 spin_lock_irq(&mdev->req_lock);
1083 os = mdev->state; 1199 os = mdev->state;
1084 ns.i = os.i; 1200 ns.i = os.i;
@@ -1235,7 +1351,16 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1235 && (new_conf->wire_protocol != DRBD_PROT_C)) { 1351 && (new_conf->wire_protocol != DRBD_PROT_C)) {
1236 retcode = ERR_NOT_PROTO_C; 1352 retcode = ERR_NOT_PROTO_C;
1237 goto fail; 1353 goto fail;
1238 }; 1354 }
1355
1356 if (get_ldev(mdev)) {
1357 enum drbd_fencing_p fp = mdev->ldev->dc.fencing;
1358 put_ldev(mdev);
1359 if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) {
1360 retcode = ERR_STONITH_AND_PROT_A;
1361 goto fail;
1362 }
1363 }
1239 1364
1240 if (mdev->state.role == R_PRIMARY && new_conf->want_lose) { 1365 if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
1241 retcode = ERR_DISCARD; 1366 retcode = ERR_DISCARD;
@@ -1350,6 +1475,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1350 } 1475 }
1351 } 1476 }
1352 1477
1478 drbd_flush_workqueue(mdev);
1353 spin_lock_irq(&mdev->req_lock); 1479 spin_lock_irq(&mdev->req_lock);
1354 if (mdev->net_conf != NULL) { 1480 if (mdev->net_conf != NULL) {
1355 retcode = ERR_NET_CONFIGURED; 1481 retcode = ERR_NET_CONFIGURED;
@@ -1388,10 +1514,9 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1388 mdev->int_dig_out=int_dig_out; 1514 mdev->int_dig_out=int_dig_out;
1389 mdev->int_dig_in=int_dig_in; 1515 mdev->int_dig_in=int_dig_in;
1390 mdev->int_dig_vv=int_dig_vv; 1516 mdev->int_dig_vv=int_dig_vv;
1517 retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL);
1391 spin_unlock_irq(&mdev->req_lock); 1518 spin_unlock_irq(&mdev->req_lock);
1392 1519
1393 retcode = _drbd_request_state(mdev, NS(conn, C_UNCONNECTED), CS_VERBOSE);
1394
1395 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); 1520 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
1396 reply->ret_code = retcode; 1521 reply->ret_code = retcode;
1397 drbd_reconfig_done(mdev); 1522 drbd_reconfig_done(mdev);
@@ -1546,6 +1671,8 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1546 struct crypto_hash *csums_tfm = NULL; 1671 struct crypto_hash *csums_tfm = NULL;
1547 struct syncer_conf sc; 1672 struct syncer_conf sc;
1548 cpumask_var_t new_cpu_mask; 1673 cpumask_var_t new_cpu_mask;
1674 int *rs_plan_s = NULL;
1675 int fifo_size;
1549 1676
1550 if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) { 1677 if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
1551 retcode = ERR_NOMEM; 1678 retcode = ERR_NOMEM;
@@ -1557,6 +1684,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1557 sc.rate = DRBD_RATE_DEF; 1684 sc.rate = DRBD_RATE_DEF;
1558 sc.after = DRBD_AFTER_DEF; 1685 sc.after = DRBD_AFTER_DEF;
1559 sc.al_extents = DRBD_AL_EXTENTS_DEF; 1686 sc.al_extents = DRBD_AL_EXTENTS_DEF;
1687 sc.on_no_data = DRBD_ON_NO_DATA_DEF;
1688 sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
1689 sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF;
1690 sc.c_fill_target = DRBD_C_FILL_TARGET_DEF;
1691 sc.c_max_rate = DRBD_C_MAX_RATE_DEF;
1692 sc.c_min_rate = DRBD_C_MIN_RATE_DEF;
1560 } else 1693 } else
1561 memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf)); 1694 memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
1562 1695
@@ -1634,6 +1767,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1634 } 1767 }
1635#undef AL_MAX 1768#undef AL_MAX
1636 1769
1770 /* to avoid spurious errors when configuring minors before configuring
1771 * the minors they depend on: if necessary, first create the minor we
1772 * depend on */
1773 if (sc.after >= 0)
1774 ensure_mdev(sc.after, 1);
1775
1637 /* most sanity checks done, try to assign the new sync-after 1776 /* most sanity checks done, try to assign the new sync-after
1638 * dependency. need to hold the global lock in there, 1777 * dependency. need to hold the global lock in there,
1639 * to avoid a race in the dependency loop check. */ 1778 * to avoid a race in the dependency loop check. */
@@ -1641,6 +1780,16 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1641 if (retcode != NO_ERROR) 1780 if (retcode != NO_ERROR)
1642 goto fail; 1781 goto fail;
1643 1782
1783 fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1784 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
1785 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
1786 if (!rs_plan_s) {
1787 dev_err(DEV, "kmalloc of fifo_buffer failed");
1788 retcode = ERR_NOMEM;
1789 goto fail;
1790 }
1791 }
1792
1644 /* ok, assign the rest of it as well. 1793 /* ok, assign the rest of it as well.
1645 * lock against receive_SyncParam() */ 1794 * lock against receive_SyncParam() */
1646 spin_lock(&mdev->peer_seq_lock); 1795 spin_lock(&mdev->peer_seq_lock);
@@ -1657,6 +1806,15 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1657 mdev->verify_tfm = verify_tfm; 1806 mdev->verify_tfm = verify_tfm;
1658 verify_tfm = NULL; 1807 verify_tfm = NULL;
1659 } 1808 }
1809
1810 if (fifo_size != mdev->rs_plan_s.size) {
1811 kfree(mdev->rs_plan_s.values);
1812 mdev->rs_plan_s.values = rs_plan_s;
1813 mdev->rs_plan_s.size = fifo_size;
1814 mdev->rs_planed = 0;
1815 rs_plan_s = NULL;
1816 }
1817
1660 spin_unlock(&mdev->peer_seq_lock); 1818 spin_unlock(&mdev->peer_seq_lock);
1661 1819
1662 if (get_ldev(mdev)) { 1820 if (get_ldev(mdev)) {
@@ -1688,6 +1846,7 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1688 1846
1689 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE); 1847 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
1690fail: 1848fail:
1849 kfree(rs_plan_s);
1691 free_cpumask_var(new_cpu_mask); 1850 free_cpumask_var(new_cpu_mask);
1692 crypto_free_hash(csums_tfm); 1851 crypto_free_hash(csums_tfm);
1693 crypto_free_hash(verify_tfm); 1852 crypto_free_hash(verify_tfm);
@@ -1721,12 +1880,38 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
1721 return 0; 1880 return 0;
1722} 1881}
1723 1882
1883static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
1884{
1885 int rv;
1886
1887 rv = drbd_bmio_set_n_write(mdev);
1888 drbd_suspend_al(mdev);
1889 return rv;
1890}
1891
1724static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1892static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1725 struct drbd_nl_cfg_reply *reply) 1893 struct drbd_nl_cfg_reply *reply)
1726{ 1894{
1895 int retcode;
1727 1896
1728 reply->ret_code = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); 1897 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
1898
1899 if (retcode < SS_SUCCESS) {
1900 if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) {
1901 /* The peer will get a resync upon connect anyways. Just make that
1902 into a full resync. */
1903 retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
1904 if (retcode >= SS_SUCCESS) {
1905 /* open coded drbd_bitmap_io() */
1906 if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
1907 "set_n_write from invalidate_peer"))
1908 retcode = ERR_IO_MD_DISK;
1909 }
1910 } else
1911 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
1912 }
1729 1913
1914 reply->ret_code = retcode;
1730 return 0; 1915 return 0;
1731} 1916}
1732 1917
@@ -1765,7 +1950,21 @@ static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
1765static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, 1950static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1766 struct drbd_nl_cfg_reply *reply) 1951 struct drbd_nl_cfg_reply *reply)
1767{ 1952{
1768 reply->ret_code = drbd_request_state(mdev, NS(susp, 0)); 1953 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
1954 drbd_uuid_new_current(mdev);
1955 clear_bit(NEW_CUR_UUID, &mdev->flags);
1956 drbd_md_sync(mdev);
1957 }
1958 drbd_suspend_io(mdev);
1959 reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
1960 if (reply->ret_code == SS_SUCCESS) {
1961 if (mdev->state.conn < C_CONNECTED)
1962 tl_clear(mdev);
1963 if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
1964 tl_restart(mdev, fail_frozen_disk_io);
1965 }
1966 drbd_resume_io(mdev);
1967
1769 return 0; 1968 return 0;
1770} 1969}
1771 1970
@@ -1941,40 +2140,6 @@ out:
1941 return 0; 2140 return 0;
1942} 2141}
1943 2142
1944static struct drbd_conf *ensure_mdev(struct drbd_nl_cfg_req *nlp)
1945{
1946 struct drbd_conf *mdev;
1947
1948 if (nlp->drbd_minor >= minor_count)
1949 return NULL;
1950
1951 mdev = minor_to_mdev(nlp->drbd_minor);
1952
1953 if (!mdev && (nlp->flags & DRBD_NL_CREATE_DEVICE)) {
1954 struct gendisk *disk = NULL;
1955 mdev = drbd_new_device(nlp->drbd_minor);
1956
1957 spin_lock_irq(&drbd_pp_lock);
1958 if (minor_table[nlp->drbd_minor] == NULL) {
1959 minor_table[nlp->drbd_minor] = mdev;
1960 disk = mdev->vdisk;
1961 mdev = NULL;
1962 } /* else: we lost the race */
1963 spin_unlock_irq(&drbd_pp_lock);
1964
1965 if (disk) /* we won the race above */
1966 /* in case we ever add a drbd_delete_device(),
1967 * don't forget the del_gendisk! */
1968 add_disk(disk);
1969 else /* we lost the race above */
1970 drbd_free_mdev(mdev);
1971
1972 mdev = minor_to_mdev(nlp->drbd_minor);
1973 }
1974
1975 return mdev;
1976}
1977
1978struct cn_handler_struct { 2143struct cn_handler_struct {
1979 int (*function)(struct drbd_conf *, 2144 int (*function)(struct drbd_conf *,
1980 struct drbd_nl_cfg_req *, 2145 struct drbd_nl_cfg_req *,
@@ -2035,7 +2200,8 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
2035 goto fail; 2200 goto fail;
2036 } 2201 }
2037 2202
2038 mdev = ensure_mdev(nlp); 2203 mdev = ensure_mdev(nlp->drbd_minor,
2204 (nlp->flags & DRBD_NL_CREATE_DEVICE));
2039 if (!mdev) { 2205 if (!mdev) {
2040 retcode = ERR_MINOR_INVALID; 2206 retcode = ERR_MINOR_INVALID;
2041 goto fail; 2207 goto fail;
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index be3374b68460..ad325c5d0ce1 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -57,6 +57,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
57 unsigned long db, dt, dbdt, rt, rs_left; 57 unsigned long db, dt, dbdt, rt, rs_left;
58 unsigned int res; 58 unsigned int res;
59 int i, x, y; 59 int i, x, y;
60 int stalled = 0;
60 61
61 drbd_get_syncer_progress(mdev, &rs_left, &res); 62 drbd_get_syncer_progress(mdev, &rs_left, &res);
62 63
@@ -90,18 +91,17 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
90 * db: blocks written from mark until now 91 * db: blocks written from mark until now
91 * rt: remaining time 92 * rt: remaining time
92 */ 93 */
93 dt = (jiffies - mdev->rs_mark_time) / HZ; 94 /* Rolling marks. last_mark+1 may just now be modified. last_mark+2 is
94 95 * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
95 if (dt > 20) { 96 * least DRBD_SYNC_MARK_STEP time before it will be modified. */
96 /* if we made no update to rs_mark_time for too long, 97 i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS;
97 * we are stalled. show that. */ 98 dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
98 seq_printf(seq, "stalled\n"); 99 if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
99 return; 100 stalled = 1;
100 }
101 101
102 if (!dt) 102 if (!dt)
103 dt++; 103 dt++;
104 db = mdev->rs_mark_left - rs_left; 104 db = mdev->rs_mark_left[i] - rs_left;
105 rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */ 105 rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
106 106
107 seq_printf(seq, "finish: %lu:%02lu:%02lu", 107 seq_printf(seq, "finish: %lu:%02lu:%02lu",
@@ -118,7 +118,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
118 /* mean speed since syncer started 118 /* mean speed since syncer started
119 * we do account for PausedSync periods */ 119 * we do account for PausedSync periods */
120 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ; 120 dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
121 if (dt <= 0) 121 if (dt == 0)
122 dt = 1; 122 dt = 1;
123 db = mdev->rs_total - rs_left; 123 db = mdev->rs_total - rs_left;
124 dbdt = Bit2KB(db/dt); 124 dbdt = Bit2KB(db/dt);
@@ -128,7 +128,14 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
128 else 128 else
129 seq_printf(seq, " (%ld)", dbdt); 129 seq_printf(seq, " (%ld)", dbdt);
130 130
131 seq_printf(seq, " K/sec\n"); 131 if (mdev->state.conn == C_SYNC_TARGET) {
132 if (mdev->c_sync_rate > 1000)
133 seq_printf(seq, " want: %d,%03d",
134 mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000);
135 else
136 seq_printf(seq, " want: %d", mdev->c_sync_rate);
137 }
138 seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
132} 139}
133 140
134static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) 141static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
@@ -196,7 +203,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
196 seq_printf(seq, "%2d: cs:Unconfigured\n", i); 203 seq_printf(seq, "%2d: cs:Unconfigured\n", i);
197 } else { 204 } else {
198 seq_printf(seq, 205 seq_printf(seq,
199 "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c\n" 206 "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
200 " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u " 207 " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
201 "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c", 208 "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
202 i, sn, 209 i, sn,
@@ -206,11 +213,12 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
206 drbd_disk_str(mdev->state.pdsk), 213 drbd_disk_str(mdev->state.pdsk),
207 (mdev->net_conf == NULL ? ' ' : 214 (mdev->net_conf == NULL ? ' ' :
208 (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')), 215 (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
209 mdev->state.susp ? 's' : 'r', 216 is_susp(mdev->state) ? 's' : 'r',
210 mdev->state.aftr_isp ? 'a' : '-', 217 mdev->state.aftr_isp ? 'a' : '-',
211 mdev->state.peer_isp ? 'p' : '-', 218 mdev->state.peer_isp ? 'p' : '-',
212 mdev->state.user_isp ? 'u' : '-', 219 mdev->state.user_isp ? 'u' : '-',
213 mdev->congestion_reason ?: '-', 220 mdev->congestion_reason ?: '-',
221 test_bit(AL_SUSPENDED, &mdev->flags) ? 's' : '-',
214 mdev->send_cnt/2, 222 mdev->send_cnt/2,
215 mdev->recv_cnt/2, 223 mdev->recv_cnt/2,
216 mdev->writ_cnt/2, 224 mdev->writ_cnt/2,
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 484ecbb6b772..760ae0df9251 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -241,7 +241,7 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
241 spin_unlock_irq(&mdev->req_lock); 241 spin_unlock_irq(&mdev->req_lock);
242 242
243 list_for_each_entry_safe(e, t, &reclaimed, w.list) 243 list_for_each_entry_safe(e, t, &reclaimed, w.list)
244 drbd_free_ee(mdev, e); 244 drbd_free_net_ee(mdev, e);
245} 245}
246 246
247/** 247/**
@@ -298,9 +298,11 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool
298 * Is also used from inside an other spin_lock_irq(&mdev->req_lock); 298 * Is also used from inside an other spin_lock_irq(&mdev->req_lock);
299 * Either links the page chain back to the global pool, 299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */ 300 * or returns all pages to the system. */
301static void drbd_pp_free(struct drbd_conf *mdev, struct page *page) 301static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
302{ 302{
303 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
303 int i; 304 int i;
305
304 if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) 306 if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count)
305 i = page_chain_free(page); 307 i = page_chain_free(page);
306 else { 308 else {
@@ -311,10 +313,10 @@ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page)
311 drbd_pp_vacant += i; 313 drbd_pp_vacant += i;
312 spin_unlock(&drbd_pp_lock); 314 spin_unlock(&drbd_pp_lock);
313 } 315 }
314 atomic_sub(i, &mdev->pp_in_use); 316 i = atomic_sub_return(i, a);
315 i = atomic_read(&mdev->pp_in_use);
316 if (i < 0) 317 if (i < 0)
317 dev_warn(DEV, "ASSERTION FAILED: pp_in_use: %d < 0\n", i); 318 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
319 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
318 wake_up(&drbd_pp_wait); 320 wake_up(&drbd_pp_wait);
319} 321}
320 322
@@ -365,7 +367,6 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
365 e->size = data_size; 367 e->size = data_size;
366 e->flags = 0; 368 e->flags = 0;
367 e->sector = sector; 369 e->sector = sector;
368 e->sector = sector;
369 e->block_id = id; 370 e->block_id = id;
370 371
371 return e; 372 return e;
@@ -375,9 +376,11 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
375 return NULL; 376 return NULL;
376} 377}
377 378
378void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) 379void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int is_net)
379{ 380{
380 drbd_pp_free(mdev, e->pages); 381 if (e->flags & EE_HAS_DIGEST)
382 kfree(e->digest);
383 drbd_pp_free(mdev, e->pages, is_net);
381 D_ASSERT(atomic_read(&e->pending_bios) == 0); 384 D_ASSERT(atomic_read(&e->pending_bios) == 0);
382 D_ASSERT(hlist_unhashed(&e->colision)); 385 D_ASSERT(hlist_unhashed(&e->colision));
383 mempool_free(e, drbd_ee_mempool); 386 mempool_free(e, drbd_ee_mempool);
@@ -388,13 +391,14 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
388 LIST_HEAD(work_list); 391 LIST_HEAD(work_list);
389 struct drbd_epoch_entry *e, *t; 392 struct drbd_epoch_entry *e, *t;
390 int count = 0; 393 int count = 0;
394 int is_net = list == &mdev->net_ee;
391 395
392 spin_lock_irq(&mdev->req_lock); 396 spin_lock_irq(&mdev->req_lock);
393 list_splice_init(list, &work_list); 397 list_splice_init(list, &work_list);
394 spin_unlock_irq(&mdev->req_lock); 398 spin_unlock_irq(&mdev->req_lock);
395 399
396 list_for_each_entry_safe(e, t, &work_list, w.list) { 400 list_for_each_entry_safe(e, t, &work_list, w.list) {
397 drbd_free_ee(mdev, e); 401 drbd_free_some_ee(mdev, e, is_net);
398 count++; 402 count++;
399 } 403 }
400 return count; 404 return count;
@@ -423,7 +427,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev)
423 spin_unlock_irq(&mdev->req_lock); 427 spin_unlock_irq(&mdev->req_lock);
424 428
425 list_for_each_entry_safe(e, t, &reclaimed, w.list) 429 list_for_each_entry_safe(e, t, &reclaimed, w.list)
426 drbd_free_ee(mdev, e); 430 drbd_free_net_ee(mdev, e);
427 431
428 /* possible callbacks here: 432 /* possible callbacks here:
429 * e_end_block, and e_end_resync_block, e_send_discard_ack. 433 * e_end_block, and e_end_resync_block, e_send_discard_ack.
@@ -719,14 +723,14 @@ out:
719static int drbd_send_fp(struct drbd_conf *mdev, 723static int drbd_send_fp(struct drbd_conf *mdev,
720 struct socket *sock, enum drbd_packets cmd) 724 struct socket *sock, enum drbd_packets cmd)
721{ 725{
722 struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; 726 struct p_header80 *h = &mdev->data.sbuf.header.h80;
723 727
724 return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); 728 return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0);
725} 729}
726 730
727static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) 731static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock)
728{ 732{
729 struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; 733 struct p_header80 *h = &mdev->data.rbuf.header.h80;
730 int rr; 734 int rr;
731 735
732 rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); 736 rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0);
@@ -776,9 +780,6 @@ static int drbd_connect(struct drbd_conf *mdev)
776 780
777 D_ASSERT(!mdev->data.socket); 781 D_ASSERT(!mdev->data.socket);
778 782
779 if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags))
780 dev_err(DEV, "CREATE_BARRIER flag was set in drbd_connect - now cleared!\n");
781
782 if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) 783 if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
783 return -2; 784 return -2;
784 785
@@ -927,6 +928,11 @@ retry:
927 928
928 drbd_thread_start(&mdev->asender); 929 drbd_thread_start(&mdev->asender);
929 930
931 if (mdev->agreed_pro_version < 95 && get_ldev(mdev)) {
932 drbd_setup_queue_param(mdev, DRBD_MAX_SIZE_H80_PACKET);
933 put_ldev(mdev);
934 }
935
930 if (!drbd_send_protocol(mdev)) 936 if (!drbd_send_protocol(mdev))
931 return -1; 937 return -1;
932 drbd_send_sync_param(mdev, &mdev->sync_conf); 938 drbd_send_sync_param(mdev, &mdev->sync_conf);
@@ -946,22 +952,28 @@ out_release_sockets:
946 return -1; 952 return -1;
947} 953}
948 954
949static int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h) 955static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size)
950{ 956{
957 union p_header *h = &mdev->data.rbuf.header;
951 int r; 958 int r;
952 959
953 r = drbd_recv(mdev, h, sizeof(*h)); 960 r = drbd_recv(mdev, h, sizeof(*h));
954
955 if (unlikely(r != sizeof(*h))) { 961 if (unlikely(r != sizeof(*h))) {
956 dev_err(DEV, "short read expecting header on sock: r=%d\n", r); 962 dev_err(DEV, "short read expecting header on sock: r=%d\n", r);
957 return FALSE; 963 return FALSE;
958 }; 964 }
959 h->command = be16_to_cpu(h->command); 965
960 h->length = be16_to_cpu(h->length); 966 if (likely(h->h80.magic == BE_DRBD_MAGIC)) {
961 if (unlikely(h->magic != BE_DRBD_MAGIC)) { 967 *cmd = be16_to_cpu(h->h80.command);
962 dev_err(DEV, "magic?? on data m: 0x%lx c: %d l: %d\n", 968 *packet_size = be16_to_cpu(h->h80.length);
963 (long)be32_to_cpu(h->magic), 969 } else if (h->h95.magic == BE_DRBD_MAGIC_BIG) {
964 h->command, h->length); 970 *cmd = be16_to_cpu(h->h95.command);
971 *packet_size = be32_to_cpu(h->h95.length);
972 } else {
973 dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n",
974 be32_to_cpu(h->h80.magic),
975 be16_to_cpu(h->h80.command),
976 be16_to_cpu(h->h80.length));
965 return FALSE; 977 return FALSE;
966 } 978 }
967 mdev->last_received = jiffies; 979 mdev->last_received = jiffies;
@@ -1268,17 +1280,12 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea
1268 return 1; 1280 return 1;
1269} 1281}
1270 1282
1271static int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) 1283static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
1272{ 1284{
1273 int rv, issue_flush; 1285 int rv, issue_flush;
1274 struct p_barrier *p = (struct p_barrier *)h; 1286 struct p_barrier *p = &mdev->data.rbuf.barrier;
1275 struct drbd_epoch *epoch; 1287 struct drbd_epoch *epoch;
1276 1288
1277 ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
1278
1279 rv = drbd_recv(mdev, h->payload, h->length);
1280 ERR_IF(rv != h->length) return FALSE;
1281
1282 inc_unacked(mdev); 1289 inc_unacked(mdev);
1283 1290
1284 if (mdev->net_conf->wire_protocol != DRBD_PROT_C) 1291 if (mdev->net_conf->wire_protocol != DRBD_PROT_C)
@@ -1457,7 +1464,7 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1457 data_size -= rr; 1464 data_size -= rr;
1458 } 1465 }
1459 kunmap(page); 1466 kunmap(page);
1460 drbd_pp_free(mdev, page); 1467 drbd_pp_free(mdev, page, 0);
1461 return rv; 1468 return rv;
1462} 1469}
1463 1470
@@ -1562,30 +1569,29 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si
1562 list_add(&e->w.list, &mdev->sync_ee); 1569 list_add(&e->w.list, &mdev->sync_ee);
1563 spin_unlock_irq(&mdev->req_lock); 1570 spin_unlock_irq(&mdev->req_lock);
1564 1571
1572 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
1565 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) 1573 if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
1566 return TRUE; 1574 return TRUE;
1567 1575
1576 /* drbd_submit_ee currently fails for one reason only:
1577 * not being able to allocate enough bios.
1578 * Is dropping the connection going to help? */
1579 spin_lock_irq(&mdev->req_lock);
1580 list_del(&e->w.list);
1581 spin_unlock_irq(&mdev->req_lock);
1582
1568 drbd_free_ee(mdev, e); 1583 drbd_free_ee(mdev, e);
1569fail: 1584fail:
1570 put_ldev(mdev); 1585 put_ldev(mdev);
1571 return FALSE; 1586 return FALSE;
1572} 1587}
1573 1588
1574static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) 1589static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
1575{ 1590{
1576 struct drbd_request *req; 1591 struct drbd_request *req;
1577 sector_t sector; 1592 sector_t sector;
1578 unsigned int header_size, data_size;
1579 int ok; 1593 int ok;
1580 struct p_data *p = (struct p_data *)h; 1594 struct p_data *p = &mdev->data.rbuf.data;
1581
1582 header_size = sizeof(*p) - sizeof(*h);
1583 data_size = h->length - header_size;
1584
1585 ERR_IF(data_size == 0) return FALSE;
1586
1587 if (drbd_recv(mdev, h->payload, header_size) != header_size)
1588 return FALSE;
1589 1595
1590 sector = be64_to_cpu(p->sector); 1596 sector = be64_to_cpu(p->sector);
1591 1597
@@ -1611,20 +1617,11 @@ static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h)
1611 return ok; 1617 return ok;
1612} 1618}
1613 1619
1614static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h) 1620static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
1615{ 1621{
1616 sector_t sector; 1622 sector_t sector;
1617 unsigned int header_size, data_size;
1618 int ok; 1623 int ok;
1619 struct p_data *p = (struct p_data *)h; 1624 struct p_data *p = &mdev->data.rbuf.data;
1620
1621 header_size = sizeof(*p) - sizeof(*h);
1622 data_size = h->length - header_size;
1623
1624 ERR_IF(data_size == 0) return FALSE;
1625
1626 if (drbd_recv(mdev, h->payload, header_size) != header_size)
1627 return FALSE;
1628 1625
1629 sector = be64_to_cpu(p->sector); 1626 sector = be64_to_cpu(p->sector);
1630 D_ASSERT(p->block_id == ID_SYNCER); 1627 D_ASSERT(p->block_id == ID_SYNCER);
@@ -1640,9 +1637,11 @@ static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h)
1640 1637
1641 ok = drbd_drain_block(mdev, data_size); 1638 ok = drbd_drain_block(mdev, data_size);
1642 1639
1643 drbd_send_ack_dp(mdev, P_NEG_ACK, p); 1640 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
1644 } 1641 }
1645 1642
1643 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1644
1646 return ok; 1645 return ok;
1647} 1646}
1648 1647
@@ -1765,24 +1764,27 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
1765 return ret; 1764 return ret;
1766} 1765}
1767 1766
1767static unsigned long write_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
1768{
1769 if (mdev->agreed_pro_version >= 95)
1770 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1771 (dpf & DP_UNPLUG ? REQ_UNPLUG : 0) |
1772 (dpf & DP_FUA ? REQ_FUA : 0) |
1773 (dpf & DP_FLUSH ? REQ_FUA : 0) |
1774 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
1775 else
1776 return dpf & DP_RW_SYNC ? (REQ_SYNC | REQ_UNPLUG) : 0;
1777}
1778
1768/* mirrored write */ 1779/* mirrored write */
1769static int receive_Data(struct drbd_conf *mdev, struct p_header *h) 1780static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
1770{ 1781{
1771 sector_t sector; 1782 sector_t sector;
1772 struct drbd_epoch_entry *e; 1783 struct drbd_epoch_entry *e;
1773 struct p_data *p = (struct p_data *)h; 1784 struct p_data *p = &mdev->data.rbuf.data;
1774 int header_size, data_size;
1775 int rw = WRITE; 1785 int rw = WRITE;
1776 u32 dp_flags; 1786 u32 dp_flags;
1777 1787
1778 header_size = sizeof(*p) - sizeof(*h);
1779 data_size = h->length - header_size;
1780
1781 ERR_IF(data_size == 0) return FALSE;
1782
1783 if (drbd_recv(mdev, h->payload, header_size) != header_size)
1784 return FALSE;
1785
1786 if (!get_ldev(mdev)) { 1788 if (!get_ldev(mdev)) {
1787 if (__ratelimit(&drbd_ratelimit_state)) 1789 if (__ratelimit(&drbd_ratelimit_state))
1788 dev_err(DEV, "Can not write mirrored data block " 1790 dev_err(DEV, "Can not write mirrored data block "
@@ -1792,7 +1794,7 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h)
1792 mdev->peer_seq++; 1794 mdev->peer_seq++;
1793 spin_unlock(&mdev->peer_seq_lock); 1795 spin_unlock(&mdev->peer_seq_lock);
1794 1796
1795 drbd_send_ack_dp(mdev, P_NEG_ACK, p); 1797 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
1796 atomic_inc(&mdev->current_epoch->epoch_size); 1798 atomic_inc(&mdev->current_epoch->epoch_size);
1797 return drbd_drain_block(mdev, data_size); 1799 return drbd_drain_block(mdev, data_size);
1798 } 1800 }
@@ -1839,12 +1841,8 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h)
1839 spin_unlock(&mdev->epoch_lock); 1841 spin_unlock(&mdev->epoch_lock);
1840 1842
1841 dp_flags = be32_to_cpu(p->dp_flags); 1843 dp_flags = be32_to_cpu(p->dp_flags);
1842 if (dp_flags & DP_HARDBARRIER) { 1844 rw |= write_flags_to_bio(mdev, dp_flags);
1843 dev_err(DEV, "ASSERT FAILED would have submitted barrier request\n"); 1845
1844 /* rw |= REQ_HARDBARRIER; */
1845 }
1846 if (dp_flags & DP_RW_SYNC)
1847 rw |= REQ_SYNC | REQ_UNPLUG;
1848 if (dp_flags & DP_MAY_SET_IN_SYNC) 1846 if (dp_flags & DP_MAY_SET_IN_SYNC)
1849 e->flags |= EE_MAY_SET_IN_SYNC; 1847 e->flags |= EE_MAY_SET_IN_SYNC;
1850 1848
@@ -2007,6 +2005,16 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h)
2007 if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) 2005 if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0)
2008 return TRUE; 2006 return TRUE;
2009 2007
2008 /* drbd_submit_ee currently fails for one reason only:
2009 * not being able to allocate enough bios.
2010 * Is dropping the connection going to help? */
2011 spin_lock_irq(&mdev->req_lock);
2012 list_del(&e->w.list);
2013 hlist_del_init(&e->colision);
2014 spin_unlock_irq(&mdev->req_lock);
2015 if (e->flags & EE_CALL_AL_COMPLETE_IO)
2016 drbd_al_complete_io(mdev, e->sector);
2017
2010out_interrupted: 2018out_interrupted:
2011 /* yes, the epoch_size now is imbalanced. 2019 /* yes, the epoch_size now is imbalanced.
2012 * but we drop the connection anyways, so we don't have a chance to 2020 * but we drop the connection anyways, so we don't have a chance to
@@ -2016,20 +2024,64 @@ out_interrupted:
2016 return FALSE; 2024 return FALSE;
2017} 2025}
2018 2026
2019static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) 2027/* We may throttle resync, if the lower device seems to be busy,
2028 * and current sync rate is above c_min_rate.
2029 *
2030 * To decide whether or not the lower device is busy, we use a scheme similar
2031 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2032 * (more than 64 sectors) of activity we cannot account for with our own resync
2033 * activity, it obviously is "busy".
2034 *
2035 * The current sync rate used here uses only the most recent two step marks,
2036 * to have a short time average so we can react faster.
2037 */
2038int drbd_rs_should_slow_down(struct drbd_conf *mdev)
2039{
2040 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2041 unsigned long db, dt, dbdt;
2042 int curr_events;
2043 int throttle = 0;
2044
2045 /* feature disabled? */
2046 if (mdev->sync_conf.c_min_rate == 0)
2047 return 0;
2048
2049 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2050 (int)part_stat_read(&disk->part0, sectors[1]) -
2051 atomic_read(&mdev->rs_sect_ev);
2052 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2053 unsigned long rs_left;
2054 int i;
2055
2056 mdev->rs_last_events = curr_events;
2057
2058 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2059 * approx. */
2060 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS;
2061 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
2062
2063 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2064 if (!dt)
2065 dt++;
2066 db = mdev->rs_mark_left[i] - rs_left;
2067 dbdt = Bit2KB(db/dt);
2068
2069 if (dbdt > mdev->sync_conf.c_min_rate)
2070 throttle = 1;
2071 }
2072 return throttle;
2073}
2074
2075
2076static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int digest_size)
2020{ 2077{
2021 sector_t sector; 2078 sector_t sector;
2022 const sector_t capacity = drbd_get_capacity(mdev->this_bdev); 2079 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
2023 struct drbd_epoch_entry *e; 2080 struct drbd_epoch_entry *e;
2024 struct digest_info *di = NULL; 2081 struct digest_info *di = NULL;
2025 int size, digest_size; 2082 int size, verb;
2026 unsigned int fault_type; 2083 unsigned int fault_type;
2027 struct p_block_req *p = 2084 struct p_block_req *p = &mdev->data.rbuf.block_req;
2028 (struct p_block_req *)h;
2029 const int brps = sizeof(*p)-sizeof(*h);
2030
2031 if (drbd_recv(mdev, h->payload, brps) != brps)
2032 return FALSE;
2033 2085
2034 sector = be64_to_cpu(p->sector); 2086 sector = be64_to_cpu(p->sector);
2035 size = be32_to_cpu(p->blksize); 2087 size = be32_to_cpu(p->blksize);
@@ -2046,12 +2098,31 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
2046 } 2098 }
2047 2099
2048 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { 2100 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
2049 if (__ratelimit(&drbd_ratelimit_state)) 2101 verb = 1;
2102 switch (cmd) {
2103 case P_DATA_REQUEST:
2104 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2105 break;
2106 case P_RS_DATA_REQUEST:
2107 case P_CSUM_RS_REQUEST:
2108 case P_OV_REQUEST:
2109 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2110 break;
2111 case P_OV_REPLY:
2112 verb = 0;
2113 dec_rs_pending(mdev);
2114 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2115 break;
2116 default:
2117 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2118 cmdname(cmd));
2119 }
2120 if (verb && __ratelimit(&drbd_ratelimit_state))
2050 dev_err(DEV, "Can not satisfy peer's read request, " 2121 dev_err(DEV, "Can not satisfy peer's read request, "
2051 "no local data.\n"); 2122 "no local data.\n");
2052 drbd_send_ack_rp(mdev, h->command == P_DATA_REQUEST ? P_NEG_DREPLY : 2123
2053 P_NEG_RS_DREPLY , p); 2124 /* drain possibly payload */
2054 return drbd_drain_block(mdev, h->length - brps); 2125 return drbd_drain_block(mdev, digest_size);
2055 } 2126 }
2056 2127
2057 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD 2128 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
@@ -2063,31 +2134,21 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
2063 return FALSE; 2134 return FALSE;
2064 } 2135 }
2065 2136
2066 switch (h->command) { 2137 switch (cmd) {
2067 case P_DATA_REQUEST: 2138 case P_DATA_REQUEST:
2068 e->w.cb = w_e_end_data_req; 2139 e->w.cb = w_e_end_data_req;
2069 fault_type = DRBD_FAULT_DT_RD; 2140 fault_type = DRBD_FAULT_DT_RD;
2070 break; 2141 /* application IO, don't drbd_rs_begin_io */
2142 goto submit;
2143
2071 case P_RS_DATA_REQUEST: 2144 case P_RS_DATA_REQUEST:
2072 e->w.cb = w_e_end_rsdata_req; 2145 e->w.cb = w_e_end_rsdata_req;
2073 fault_type = DRBD_FAULT_RS_RD; 2146 fault_type = DRBD_FAULT_RS_RD;
2074 /* Eventually this should become asynchronously. Currently it
2075 * blocks the whole receiver just to delay the reading of a
2076 * resync data block.
2077 * the drbd_work_queue mechanism is made for this...
2078 */
2079 if (!drbd_rs_begin_io(mdev, sector)) {
2080 /* we have been interrupted,
2081 * probably connection lost! */
2082 D_ASSERT(signal_pending(current));
2083 goto out_free_e;
2084 }
2085 break; 2147 break;
2086 2148
2087 case P_OV_REPLY: 2149 case P_OV_REPLY:
2088 case P_CSUM_RS_REQUEST: 2150 case P_CSUM_RS_REQUEST:
2089 fault_type = DRBD_FAULT_RS_RD; 2151 fault_type = DRBD_FAULT_RS_RD;
2090 digest_size = h->length - brps ;
2091 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO); 2152 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2092 if (!di) 2153 if (!di)
2093 goto out_free_e; 2154 goto out_free_e;
@@ -2095,31 +2156,25 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
2095 di->digest_size = digest_size; 2156 di->digest_size = digest_size;
2096 di->digest = (((char *)di)+sizeof(struct digest_info)); 2157 di->digest = (((char *)di)+sizeof(struct digest_info));
2097 2158
2159 e->digest = di;
2160 e->flags |= EE_HAS_DIGEST;
2161
2098 if (drbd_recv(mdev, di->digest, digest_size) != digest_size) 2162 if (drbd_recv(mdev, di->digest, digest_size) != digest_size)
2099 goto out_free_e; 2163 goto out_free_e;
2100 2164
2101 e->block_id = (u64)(unsigned long)di; 2165 if (cmd == P_CSUM_RS_REQUEST) {
2102 if (h->command == P_CSUM_RS_REQUEST) {
2103 D_ASSERT(mdev->agreed_pro_version >= 89); 2166 D_ASSERT(mdev->agreed_pro_version >= 89);
2104 e->w.cb = w_e_end_csum_rs_req; 2167 e->w.cb = w_e_end_csum_rs_req;
2105 } else if (h->command == P_OV_REPLY) { 2168 } else if (cmd == P_OV_REPLY) {
2106 e->w.cb = w_e_end_ov_reply; 2169 e->w.cb = w_e_end_ov_reply;
2107 dec_rs_pending(mdev); 2170 dec_rs_pending(mdev);
2108 break; 2171 /* drbd_rs_begin_io done when we sent this request,
2109 } 2172 * but accounting still needs to be done. */
2110 2173 goto submit_for_resync;
2111 if (!drbd_rs_begin_io(mdev, sector)) {
2112 /* we have been interrupted, probably connection lost! */
2113 D_ASSERT(signal_pending(current));
2114 goto out_free_e;
2115 } 2174 }
2116 break; 2175 break;
2117 2176
2118 case P_OV_REQUEST: 2177 case P_OV_REQUEST:
2119 if (mdev->state.conn >= C_CONNECTED &&
2120 mdev->state.conn != C_VERIFY_T)
2121 dev_warn(DEV, "ASSERT FAILED: got P_OV_REQUEST while being %s\n",
2122 drbd_conn_str(mdev->state.conn));
2123 if (mdev->ov_start_sector == ~(sector_t)0 && 2178 if (mdev->ov_start_sector == ~(sector_t)0 &&
2124 mdev->agreed_pro_version >= 90) { 2179 mdev->agreed_pro_version >= 90) {
2125 mdev->ov_start_sector = sector; 2180 mdev->ov_start_sector = sector;
@@ -2130,37 +2185,63 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
2130 } 2185 }
2131 e->w.cb = w_e_end_ov_req; 2186 e->w.cb = w_e_end_ov_req;
2132 fault_type = DRBD_FAULT_RS_RD; 2187 fault_type = DRBD_FAULT_RS_RD;
2133 /* Eventually this should become asynchronous. Currently it
2134 * blocks the whole receiver just to delay the reading of a
2135 * resync data block.
2136 * the drbd_work_queue mechanism is made for this...
2137 */
2138 if (!drbd_rs_begin_io(mdev, sector)) {
2139 /* we have been interrupted,
2140 * probably connection lost! */
2141 D_ASSERT(signal_pending(current));
2142 goto out_free_e;
2143 }
2144 break; 2188 break;
2145 2189
2146
2147 default: 2190 default:
2148 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n", 2191 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2149 cmdname(h->command)); 2192 cmdname(cmd));
2150 fault_type = DRBD_FAULT_MAX; 2193 fault_type = DRBD_FAULT_MAX;
2194 goto out_free_e;
2151 } 2195 }
2152 2196
2153 spin_lock_irq(&mdev->req_lock); 2197 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2154 list_add(&e->w.list, &mdev->read_ee); 2198 * wrt the receiver, but it is not as straightforward as it may seem.
2155 spin_unlock_irq(&mdev->req_lock); 2199 * Various places in the resync start and stop logic assume resync
2200 * requests are processed in order, requeuing this on the worker thread
2201 * introduces a bunch of new code for synchronization between threads.
2202 *
2203 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2204 * "forever", throttling after drbd_rs_begin_io will lock that extent
2205 * for application writes for the same time. For now, just throttle
2206 * here, where the rest of the code expects the receiver to sleep for
2207 * a while, anyways.
2208 */
2209
2210 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2211 * this defers syncer requests for some time, before letting at least
2212 * on request through. The resync controller on the receiving side
2213 * will adapt to the incoming rate accordingly.
2214 *
2215 * We cannot throttle here if remote is Primary/SyncTarget:
2216 * we would also throttle its application reads.
2217 * In that case, throttling is done on the SyncTarget only.
2218 */
2219 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev))
2220 msleep(100);
2221 if (drbd_rs_begin_io(mdev, e->sector))
2222 goto out_free_e;
2156 2223
2224submit_for_resync:
2225 atomic_add(size >> 9, &mdev->rs_sect_ev);
2226
2227submit:
2157 inc_unacked(mdev); 2228 inc_unacked(mdev);
2229 spin_lock_irq(&mdev->req_lock);
2230 list_add_tail(&e->w.list, &mdev->read_ee);
2231 spin_unlock_irq(&mdev->req_lock);
2158 2232
2159 if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) 2233 if (drbd_submit_ee(mdev, e, READ, fault_type) == 0)
2160 return TRUE; 2234 return TRUE;
2161 2235
2236 /* drbd_submit_ee currently fails for one reason only:
2237 * not being able to allocate enough bios.
2238 * Is dropping the connection going to help? */
2239 spin_lock_irq(&mdev->req_lock);
2240 list_del(&e->w.list);
2241 spin_unlock_irq(&mdev->req_lock);
2242 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2243
2162out_free_e: 2244out_free_e:
2163 kfree(di);
2164 put_ldev(mdev); 2245 put_ldev(mdev);
2165 drbd_free_ee(mdev, e); 2246 drbd_free_ee(mdev, e);
2166 return FALSE; 2247 return FALSE;
@@ -2699,20 +2780,13 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2699 return 1; 2780 return 1;
2700} 2781}
2701 2782
2702static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) 2783static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
2703{ 2784{
2704 struct p_protocol *p = (struct p_protocol *)h; 2785 struct p_protocol *p = &mdev->data.rbuf.protocol;
2705 int header_size, data_size;
2706 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; 2786 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
2707 int p_want_lose, p_two_primaries, cf; 2787 int p_want_lose, p_two_primaries, cf;
2708 char p_integrity_alg[SHARED_SECRET_MAX] = ""; 2788 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2709 2789
2710 header_size = sizeof(*p) - sizeof(*h);
2711 data_size = h->length - header_size;
2712
2713 if (drbd_recv(mdev, h->payload, header_size) != header_size)
2714 return FALSE;
2715
2716 p_proto = be32_to_cpu(p->protocol); 2790 p_proto = be32_to_cpu(p->protocol);
2717 p_after_sb_0p = be32_to_cpu(p->after_sb_0p); 2791 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2718 p_after_sb_1p = be32_to_cpu(p->after_sb_1p); 2792 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
@@ -2805,39 +2879,46 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2805 return tfm; 2879 return tfm;
2806} 2880}
2807 2881
2808static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) 2882static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size)
2809{ 2883{
2810 int ok = TRUE; 2884 int ok = TRUE;
2811 struct p_rs_param_89 *p = (struct p_rs_param_89 *)h; 2885 struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95;
2812 unsigned int header_size, data_size, exp_max_sz; 2886 unsigned int header_size, data_size, exp_max_sz;
2813 struct crypto_hash *verify_tfm = NULL; 2887 struct crypto_hash *verify_tfm = NULL;
2814 struct crypto_hash *csums_tfm = NULL; 2888 struct crypto_hash *csums_tfm = NULL;
2815 const int apv = mdev->agreed_pro_version; 2889 const int apv = mdev->agreed_pro_version;
2890 int *rs_plan_s = NULL;
2891 int fifo_size = 0;
2816 2892
2817 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) 2893 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2818 : apv == 88 ? sizeof(struct p_rs_param) 2894 : apv == 88 ? sizeof(struct p_rs_param)
2819 + SHARED_SECRET_MAX 2895 + SHARED_SECRET_MAX
2820 : /* 89 */ sizeof(struct p_rs_param_89); 2896 : apv <= 94 ? sizeof(struct p_rs_param_89)
2897 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
2821 2898
2822 if (h->length > exp_max_sz) { 2899 if (packet_size > exp_max_sz) {
2823 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", 2900 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
2824 h->length, exp_max_sz); 2901 packet_size, exp_max_sz);
2825 return FALSE; 2902 return FALSE;
2826 } 2903 }
2827 2904
2828 if (apv <= 88) { 2905 if (apv <= 88) {
2829 header_size = sizeof(struct p_rs_param) - sizeof(*h); 2906 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header80);
2830 data_size = h->length - header_size; 2907 data_size = packet_size - header_size;
2831 } else /* apv >= 89 */ { 2908 } else if (apv <= 94) {
2832 header_size = sizeof(struct p_rs_param_89) - sizeof(*h); 2909 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header80);
2833 data_size = h->length - header_size; 2910 data_size = packet_size - header_size;
2911 D_ASSERT(data_size == 0);
2912 } else {
2913 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header80);
2914 data_size = packet_size - header_size;
2834 D_ASSERT(data_size == 0); 2915 D_ASSERT(data_size == 0);
2835 } 2916 }
2836 2917
2837 /* initialize verify_alg and csums_alg */ 2918 /* initialize verify_alg and csums_alg */
2838 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); 2919 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2839 2920
2840 if (drbd_recv(mdev, h->payload, header_size) != header_size) 2921 if (drbd_recv(mdev, &p->head.payload, header_size) != header_size)
2841 return FALSE; 2922 return FALSE;
2842 2923
2843 mdev->sync_conf.rate = be32_to_cpu(p->rate); 2924 mdev->sync_conf.rate = be32_to_cpu(p->rate);
@@ -2896,6 +2977,22 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
2896 } 2977 }
2897 } 2978 }
2898 2979
2980 if (apv > 94) {
2981 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2982 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
2983 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
2984 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
2985 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
2986
2987 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2988 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
2989 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
2990 if (!rs_plan_s) {
2991 dev_err(DEV, "kmalloc of fifo_buffer failed");
2992 goto disconnect;
2993 }
2994 }
2995 }
2899 2996
2900 spin_lock(&mdev->peer_seq_lock); 2997 spin_lock(&mdev->peer_seq_lock);
2901 /* lock against drbd_nl_syncer_conf() */ 2998 /* lock against drbd_nl_syncer_conf() */
@@ -2913,6 +3010,12 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
2913 mdev->csums_tfm = csums_tfm; 3010 mdev->csums_tfm = csums_tfm;
2914 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); 3011 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
2915 } 3012 }
3013 if (fifo_size != mdev->rs_plan_s.size) {
3014 kfree(mdev->rs_plan_s.values);
3015 mdev->rs_plan_s.values = rs_plan_s;
3016 mdev->rs_plan_s.size = fifo_size;
3017 mdev->rs_planed = 0;
3018 }
2916 spin_unlock(&mdev->peer_seq_lock); 3019 spin_unlock(&mdev->peer_seq_lock);
2917 } 3020 }
2918 3021
@@ -2946,19 +3049,15 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev,
2946 (unsigned long long)a, (unsigned long long)b); 3049 (unsigned long long)a, (unsigned long long)b);
2947} 3050}
2948 3051
2949static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) 3052static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
2950{ 3053{
2951 struct p_sizes *p = (struct p_sizes *)h; 3054 struct p_sizes *p = &mdev->data.rbuf.sizes;
2952 enum determine_dev_size dd = unchanged; 3055 enum determine_dev_size dd = unchanged;
2953 unsigned int max_seg_s; 3056 unsigned int max_seg_s;
2954 sector_t p_size, p_usize, my_usize; 3057 sector_t p_size, p_usize, my_usize;
2955 int ldsc = 0; /* local disk size changed */ 3058 int ldsc = 0; /* local disk size changed */
2956 enum dds_flags ddsf; 3059 enum dds_flags ddsf;
2957 3060
2958 ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
2959 if (drbd_recv(mdev, h->payload, h->length) != h->length)
2960 return FALSE;
2961
2962 p_size = be64_to_cpu(p->d_size); 3061 p_size = be64_to_cpu(p->d_size);
2963 p_usize = be64_to_cpu(p->u_size); 3062 p_usize = be64_to_cpu(p->u_size);
2964 3063
@@ -3028,6 +3127,8 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
3028 3127
3029 if (mdev->agreed_pro_version < 94) 3128 if (mdev->agreed_pro_version < 94)
3030 max_seg_s = be32_to_cpu(p->max_segment_size); 3129 max_seg_s = be32_to_cpu(p->max_segment_size);
3130 else if (mdev->agreed_pro_version == 94)
3131 max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
3031 else /* drbd 8.3.8 onwards */ 3132 else /* drbd 8.3.8 onwards */
3032 max_seg_s = DRBD_MAX_SEGMENT_SIZE; 3133 max_seg_s = DRBD_MAX_SEGMENT_SIZE;
3033 3134
@@ -3061,16 +3162,12 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
3061 return TRUE; 3162 return TRUE;
3062} 3163}
3063 3164
3064static int receive_uuids(struct drbd_conf *mdev, struct p_header *h) 3165static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3065{ 3166{
3066 struct p_uuids *p = (struct p_uuids *)h; 3167 struct p_uuids *p = &mdev->data.rbuf.uuids;
3067 u64 *p_uuid; 3168 u64 *p_uuid;
3068 int i; 3169 int i;
3069 3170
3070 ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
3071 if (drbd_recv(mdev, h->payload, h->length) != h->length)
3072 return FALSE;
3073
3074 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); 3171 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3075 3172
3076 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) 3173 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
@@ -3106,6 +3203,11 @@ static int receive_uuids(struct drbd_conf *mdev, struct p_header *h)
3106 drbd_md_sync(mdev); 3203 drbd_md_sync(mdev);
3107 } 3204 }
3108 put_ldev(mdev); 3205 put_ldev(mdev);
3206 } else if (mdev->state.disk < D_INCONSISTENT &&
3207 mdev->state.role == R_PRIMARY) {
3208 /* I am a diskless primary, the peer just created a new current UUID
3209 for me. */
3210 drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3109 } 3211 }
3110 3212
3111 /* Before we test for the disk state, we should wait until an eventually 3213 /* Before we test for the disk state, we should wait until an eventually
@@ -3149,16 +3251,12 @@ static union drbd_state convert_state(union drbd_state ps)
3149 return ms; 3251 return ms;
3150} 3252}
3151 3253
3152static int receive_req_state(struct drbd_conf *mdev, struct p_header *h) 3254static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3153{ 3255{
3154 struct p_req_state *p = (struct p_req_state *)h; 3256 struct p_req_state *p = &mdev->data.rbuf.req_state;
3155 union drbd_state mask, val; 3257 union drbd_state mask, val;
3156 int rv; 3258 int rv;
3157 3259
3158 ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
3159 if (drbd_recv(mdev, h->payload, h->length) != h->length)
3160 return FALSE;
3161
3162 mask.i = be32_to_cpu(p->mask); 3260 mask.i = be32_to_cpu(p->mask);
3163 val.i = be32_to_cpu(p->val); 3261 val.i = be32_to_cpu(p->val);
3164 3262
@@ -3179,20 +3277,14 @@ static int receive_req_state(struct drbd_conf *mdev, struct p_header *h)
3179 return TRUE; 3277 return TRUE;
3180} 3278}
3181 3279
3182static int receive_state(struct drbd_conf *mdev, struct p_header *h) 3280static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3183{ 3281{
3184 struct p_state *p = (struct p_state *)h; 3282 struct p_state *p = &mdev->data.rbuf.state;
3185 enum drbd_conns nconn, oconn; 3283 union drbd_state os, ns, peer_state;
3186 union drbd_state ns, peer_state;
3187 enum drbd_disk_state real_peer_disk; 3284 enum drbd_disk_state real_peer_disk;
3285 enum chg_state_flags cs_flags;
3188 int rv; 3286 int rv;
3189 3287
3190 ERR_IF(h->length != (sizeof(*p)-sizeof(*h)))
3191 return FALSE;
3192
3193 if (drbd_recv(mdev, h->payload, h->length) != h->length)
3194 return FALSE;
3195
3196 peer_state.i = be32_to_cpu(p->state); 3288 peer_state.i = be32_to_cpu(p->state);
3197 3289
3198 real_peer_disk = peer_state.disk; 3290 real_peer_disk = peer_state.disk;
@@ -3203,38 +3295,72 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
3203 3295
3204 spin_lock_irq(&mdev->req_lock); 3296 spin_lock_irq(&mdev->req_lock);
3205 retry: 3297 retry:
3206 oconn = nconn = mdev->state.conn; 3298 os = ns = mdev->state;
3207 spin_unlock_irq(&mdev->req_lock); 3299 spin_unlock_irq(&mdev->req_lock);
3208 3300
3209 if (nconn == C_WF_REPORT_PARAMS) 3301 /* peer says his disk is uptodate, while we think it is inconsistent,
3210 nconn = C_CONNECTED; 3302 * and this happens while we think we have a sync going on. */
3303 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3304 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3305 /* If we are (becoming) SyncSource, but peer is still in sync
3306 * preparation, ignore its uptodate-ness to avoid flapping, it
3307 * will change to inconsistent once the peer reaches active
3308 * syncing states.
3309 * It may have changed syncer-paused flags, however, so we
3310 * cannot ignore this completely. */
3311 if (peer_state.conn > C_CONNECTED &&
3312 peer_state.conn < C_SYNC_SOURCE)
3313 real_peer_disk = D_INCONSISTENT;
3314
3315 /* if peer_state changes to connected at the same time,
3316 * it explicitly notifies us that it finished resync.
3317 * Maybe we should finish it up, too? */
3318 else if (os.conn >= C_SYNC_SOURCE &&
3319 peer_state.conn == C_CONNECTED) {
3320 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3321 drbd_resync_finished(mdev);
3322 return TRUE;
3323 }
3324 }
3325
3326 /* peer says his disk is inconsistent, while we think it is uptodate,
3327 * and this happens while the peer still thinks we have a sync going on,
3328 * but we think we are already done with the sync.
3329 * We ignore this to avoid flapping pdsk.
3330 * This should not happen, if the peer is a recent version of drbd. */
3331 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3332 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3333 real_peer_disk = D_UP_TO_DATE;
3334
3335 if (ns.conn == C_WF_REPORT_PARAMS)
3336 ns.conn = C_CONNECTED;
3211 3337
3212 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING && 3338 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3213 get_ldev_if_state(mdev, D_NEGOTIATING)) { 3339 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3214 int cr; /* consider resync */ 3340 int cr; /* consider resync */
3215 3341
3216 /* if we established a new connection */ 3342 /* if we established a new connection */
3217 cr = (oconn < C_CONNECTED); 3343 cr = (os.conn < C_CONNECTED);
3218 /* if we had an established connection 3344 /* if we had an established connection
3219 * and one of the nodes newly attaches a disk */ 3345 * and one of the nodes newly attaches a disk */
3220 cr |= (oconn == C_CONNECTED && 3346 cr |= (os.conn == C_CONNECTED &&
3221 (peer_state.disk == D_NEGOTIATING || 3347 (peer_state.disk == D_NEGOTIATING ||
3222 mdev->state.disk == D_NEGOTIATING)); 3348 os.disk == D_NEGOTIATING));
3223 /* if we have both been inconsistent, and the peer has been 3349 /* if we have both been inconsistent, and the peer has been
3224 * forced to be UpToDate with --overwrite-data */ 3350 * forced to be UpToDate with --overwrite-data */
3225 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags); 3351 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3226 /* if we had been plain connected, and the admin requested to 3352 /* if we had been plain connected, and the admin requested to
3227 * start a sync by "invalidate" or "invalidate-remote" */ 3353 * start a sync by "invalidate" or "invalidate-remote" */
3228 cr |= (oconn == C_CONNECTED && 3354 cr |= (os.conn == C_CONNECTED &&
3229 (peer_state.conn >= C_STARTING_SYNC_S && 3355 (peer_state.conn >= C_STARTING_SYNC_S &&
3230 peer_state.conn <= C_WF_BITMAP_T)); 3356 peer_state.conn <= C_WF_BITMAP_T));
3231 3357
3232 if (cr) 3358 if (cr)
3233 nconn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk); 3359 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
3234 3360
3235 put_ldev(mdev); 3361 put_ldev(mdev);
3236 if (nconn == C_MASK) { 3362 if (ns.conn == C_MASK) {
3237 nconn = C_CONNECTED; 3363 ns.conn = C_CONNECTED;
3238 if (mdev->state.disk == D_NEGOTIATING) { 3364 if (mdev->state.disk == D_NEGOTIATING) {
3239 drbd_force_state(mdev, NS(disk, D_DISKLESS)); 3365 drbd_force_state(mdev, NS(disk, D_DISKLESS));
3240 } else if (peer_state.disk == D_NEGOTIATING) { 3366 } else if (peer_state.disk == D_NEGOTIATING) {
@@ -3244,7 +3370,7 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
3244 } else { 3370 } else {
3245 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) 3371 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
3246 return FALSE; 3372 return FALSE;
3247 D_ASSERT(oconn == C_WF_REPORT_PARAMS); 3373 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
3248 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 3374 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3249 return FALSE; 3375 return FALSE;
3250 } 3376 }
@@ -3252,18 +3378,28 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
3252 } 3378 }
3253 3379
3254 spin_lock_irq(&mdev->req_lock); 3380 spin_lock_irq(&mdev->req_lock);
3255 if (mdev->state.conn != oconn) 3381 if (mdev->state.i != os.i)
3256 goto retry; 3382 goto retry;
3257 clear_bit(CONSIDER_RESYNC, &mdev->flags); 3383 clear_bit(CONSIDER_RESYNC, &mdev->flags);
3258 ns.i = mdev->state.i;
3259 ns.conn = nconn;
3260 ns.peer = peer_state.role; 3384 ns.peer = peer_state.role;
3261 ns.pdsk = real_peer_disk; 3385 ns.pdsk = real_peer_disk;
3262 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); 3386 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
3263 if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) 3387 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
3264 ns.disk = mdev->new_state_tmp.disk; 3388 ns.disk = mdev->new_state_tmp.disk;
3265 3389 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3266 rv = _drbd_set_state(mdev, ns, CS_VERBOSE | CS_HARD, NULL); 3390 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
3391 test_bit(NEW_CUR_UUID, &mdev->flags)) {
3392 /* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this
3393 for temporal network outages! */
3394 spin_unlock_irq(&mdev->req_lock);
3395 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3396 tl_clear(mdev);
3397 drbd_uuid_new_current(mdev);
3398 clear_bit(NEW_CUR_UUID, &mdev->flags);
3399 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
3400 return FALSE;
3401 }
3402 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
3267 ns = mdev->state; 3403 ns = mdev->state;
3268 spin_unlock_irq(&mdev->req_lock); 3404 spin_unlock_irq(&mdev->req_lock);
3269 3405
@@ -3272,8 +3408,8 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
3272 return FALSE; 3408 return FALSE;
3273 } 3409 }
3274 3410
3275 if (oconn > C_WF_REPORT_PARAMS) { 3411 if (os.conn > C_WF_REPORT_PARAMS) {
3276 if (nconn > C_CONNECTED && peer_state.conn <= C_CONNECTED && 3412 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
3277 peer_state.disk != D_NEGOTIATING ) { 3413 peer_state.disk != D_NEGOTIATING ) {
3278 /* we want resync, peer has not yet decided to sync... */ 3414 /* we want resync, peer has not yet decided to sync... */
3279 /* Nowadays only used when forcing a node into primary role and 3415 /* Nowadays only used when forcing a node into primary role and
@@ -3290,9 +3426,9 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
3290 return TRUE; 3426 return TRUE;
3291} 3427}
3292 3428
3293static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h) 3429static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3294{ 3430{
3295 struct p_rs_uuid *p = (struct p_rs_uuid *)h; 3431 struct p_rs_uuid *p = &mdev->data.rbuf.rs_uuid;
3296 3432
3297 wait_event(mdev->misc_wait, 3433 wait_event(mdev->misc_wait,
3298 mdev->state.conn == C_WF_SYNC_UUID || 3434 mdev->state.conn == C_WF_SYNC_UUID ||
@@ -3301,10 +3437,6 @@ static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h)
3301 3437
3302 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */ 3438 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3303 3439
3304 ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
3305 if (drbd_recv(mdev, h->payload, h->length) != h->length)
3306 return FALSE;
3307
3308 /* Here the _drbd_uuid_ functions are right, current should 3440 /* Here the _drbd_uuid_ functions are right, current should
3309 _not_ be rotated into the history */ 3441 _not_ be rotated into the history */
3310 if (get_ldev_if_state(mdev, D_NEGOTIATING)) { 3442 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
@@ -3323,14 +3455,14 @@ static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h)
3323enum receive_bitmap_ret { OK, DONE, FAILED }; 3455enum receive_bitmap_ret { OK, DONE, FAILED };
3324 3456
3325static enum receive_bitmap_ret 3457static enum receive_bitmap_ret
3326receive_bitmap_plain(struct drbd_conf *mdev, struct p_header *h, 3458receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3327 unsigned long *buffer, struct bm_xfer_ctx *c) 3459 unsigned long *buffer, struct bm_xfer_ctx *c)
3328{ 3460{
3329 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); 3461 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3330 unsigned want = num_words * sizeof(long); 3462 unsigned want = num_words * sizeof(long);
3331 3463
3332 if (want != h->length) { 3464 if (want != data_size) {
3333 dev_err(DEV, "%s:want (%u) != h->length (%u)\n", __func__, want, h->length); 3465 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
3334 return FAILED; 3466 return FAILED;
3335 } 3467 }
3336 if (want == 0) 3468 if (want == 0)
@@ -3359,7 +3491,7 @@ recv_bm_rle_bits(struct drbd_conf *mdev,
3359 u64 tmp; 3491 u64 tmp;
3360 unsigned long s = c->bit_offset; 3492 unsigned long s = c->bit_offset;
3361 unsigned long e; 3493 unsigned long e;
3362 int len = p->head.length - (sizeof(*p) - sizeof(p->head)); 3494 int len = be16_to_cpu(p->head.length) - (sizeof(*p) - sizeof(p->head));
3363 int toggle = DCBP_get_start(p); 3495 int toggle = DCBP_get_start(p);
3364 int have; 3496 int have;
3365 int bits; 3497 int bits;
@@ -3428,7 +3560,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3428 const char *direction, struct bm_xfer_ctx *c) 3560 const char *direction, struct bm_xfer_ctx *c)
3429{ 3561{
3430 /* what would it take to transfer it "plaintext" */ 3562 /* what would it take to transfer it "plaintext" */
3431 unsigned plain = sizeof(struct p_header) * 3563 unsigned plain = sizeof(struct p_header80) *
3432 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) 3564 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3433 + c->bm_words * sizeof(long); 3565 + c->bm_words * sizeof(long);
3434 unsigned total = c->bytes[0] + c->bytes[1]; 3566 unsigned total = c->bytes[0] + c->bytes[1];
@@ -3466,12 +3598,13 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3466 in order to be agnostic to the 32 vs 64 bits issue. 3598 in order to be agnostic to the 32 vs 64 bits issue.
3467 3599
3468 returns 0 on failure, 1 if we successfully received it. */ 3600 returns 0 on failure, 1 if we successfully received it. */
3469static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) 3601static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3470{ 3602{
3471 struct bm_xfer_ctx c; 3603 struct bm_xfer_ctx c;
3472 void *buffer; 3604 void *buffer;
3473 enum receive_bitmap_ret ret; 3605 enum receive_bitmap_ret ret;
3474 int ok = FALSE; 3606 int ok = FALSE;
3607 struct p_header80 *h = &mdev->data.rbuf.header.h80;
3475 3608
3476 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); 3609 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
3477 3610
@@ -3491,39 +3624,39 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h)
3491 }; 3624 };
3492 3625
3493 do { 3626 do {
3494 if (h->command == P_BITMAP) { 3627 if (cmd == P_BITMAP) {
3495 ret = receive_bitmap_plain(mdev, h, buffer, &c); 3628 ret = receive_bitmap_plain(mdev, data_size, buffer, &c);
3496 } else if (h->command == P_COMPRESSED_BITMAP) { 3629 } else if (cmd == P_COMPRESSED_BITMAP) {
3497 /* MAYBE: sanity check that we speak proto >= 90, 3630 /* MAYBE: sanity check that we speak proto >= 90,
3498 * and the feature is enabled! */ 3631 * and the feature is enabled! */
3499 struct p_compressed_bm *p; 3632 struct p_compressed_bm *p;
3500 3633
3501 if (h->length > BM_PACKET_PAYLOAD_BYTES) { 3634 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
3502 dev_err(DEV, "ReportCBitmap packet too large\n"); 3635 dev_err(DEV, "ReportCBitmap packet too large\n");
3503 goto out; 3636 goto out;
3504 } 3637 }
3505 /* use the page buff */ 3638 /* use the page buff */
3506 p = buffer; 3639 p = buffer;
3507 memcpy(p, h, sizeof(*h)); 3640 memcpy(p, h, sizeof(*h));
3508 if (drbd_recv(mdev, p->head.payload, h->length) != h->length) 3641 if (drbd_recv(mdev, p->head.payload, data_size) != data_size)
3509 goto out; 3642 goto out;
3510 if (p->head.length <= (sizeof(*p) - sizeof(p->head))) { 3643 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3511 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", p->head.length); 3644 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
3512 return FAILED; 3645 return FAILED;
3513 } 3646 }
3514 ret = decode_bitmap_c(mdev, p, &c); 3647 ret = decode_bitmap_c(mdev, p, &c);
3515 } else { 3648 } else {
3516 dev_warn(DEV, "receive_bitmap: h->command neither ReportBitMap nor ReportCBitMap (is 0x%x)", h->command); 3649 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
3517 goto out; 3650 goto out;
3518 } 3651 }
3519 3652
3520 c.packets[h->command == P_BITMAP]++; 3653 c.packets[cmd == P_BITMAP]++;
3521 c.bytes[h->command == P_BITMAP] += sizeof(struct p_header) + h->length; 3654 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size;
3522 3655
3523 if (ret != OK) 3656 if (ret != OK)
3524 break; 3657 break;
3525 3658
3526 if (!drbd_recv_header(mdev, h)) 3659 if (!drbd_recv_header(mdev, &cmd, &data_size))
3527 goto out; 3660 goto out;
3528 } while (ret == OK); 3661 } while (ret == OK);
3529 if (ret == FAILED) 3662 if (ret == FAILED)
@@ -3554,17 +3687,16 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h)
3554 return ok; 3687 return ok;
3555} 3688}
3556 3689
3557static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent) 3690static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3558{ 3691{
3559 /* TODO zero copy sink :) */ 3692 /* TODO zero copy sink :) */
3560 static char sink[128]; 3693 static char sink[128];
3561 int size, want, r; 3694 int size, want, r;
3562 3695
3563 if (!silent) 3696 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3564 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n", 3697 cmd, data_size);
3565 h->command, h->length);
3566 3698
3567 size = h->length; 3699 size = data_size;
3568 while (size > 0) { 3700 while (size > 0) {
3569 want = min_t(int, size, sizeof(sink)); 3701 want = min_t(int, size, sizeof(sink));
3570 r = drbd_recv(mdev, sink, want); 3702 r = drbd_recv(mdev, sink, want);
@@ -3574,17 +3706,7 @@ static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent)
3574 return size == 0; 3706 return size == 0;
3575} 3707}
3576 3708
3577static int receive_skip(struct drbd_conf *mdev, struct p_header *h) 3709static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
3578{
3579 return receive_skip_(mdev, h, 0);
3580}
3581
3582static int receive_skip_silent(struct drbd_conf *mdev, struct p_header *h)
3583{
3584 return receive_skip_(mdev, h, 1);
3585}
3586
3587static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
3588{ 3710{
3589 if (mdev->state.disk >= D_INCONSISTENT) 3711 if (mdev->state.disk >= D_INCONSISTENT)
3590 drbd_kick_lo(mdev); 3712 drbd_kick_lo(mdev);
@@ -3596,108 +3718,94 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
3596 return TRUE; 3718 return TRUE;
3597} 3719}
3598 3720
3599typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *); 3721typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive);
3600 3722
3601static drbd_cmd_handler_f drbd_default_handler[] = { 3723struct data_cmd {
3602 [P_DATA] = receive_Data, 3724 int expect_payload;
3603 [P_DATA_REPLY] = receive_DataReply, 3725 size_t pkt_size;
3604 [P_RS_DATA_REPLY] = receive_RSDataReply, 3726 drbd_cmd_handler_f function;
3605 [P_BARRIER] = receive_Barrier, 3727};
3606 [P_BITMAP] = receive_bitmap, 3728
3607 [P_COMPRESSED_BITMAP] = receive_bitmap, 3729static struct data_cmd drbd_cmd_handler[] = {
3608 [P_UNPLUG_REMOTE] = receive_UnplugRemote, 3730 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3609 [P_DATA_REQUEST] = receive_DataRequest, 3731 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3610 [P_RS_DATA_REQUEST] = receive_DataRequest, 3732 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3611 [P_SYNC_PARAM] = receive_SyncParam, 3733 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
3612 [P_SYNC_PARAM89] = receive_SyncParam, 3734 [P_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } ,
3613 [P_PROTOCOL] = receive_protocol, 3735 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } ,
3614 [P_UUIDS] = receive_uuids, 3736 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header80), receive_UnplugRemote },
3615 [P_SIZES] = receive_sizes, 3737 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3616 [P_STATE] = receive_state, 3738 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3617 [P_STATE_CHG_REQ] = receive_req_state, 3739 [P_SYNC_PARAM] = { 1, sizeof(struct p_header80), receive_SyncParam },
3618 [P_SYNC_UUID] = receive_sync_uuid, 3740 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header80), receive_SyncParam },
3619 [P_OV_REQUEST] = receive_DataRequest, 3741 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3620 [P_OV_REPLY] = receive_DataRequest, 3742 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3621 [P_CSUM_RS_REQUEST] = receive_DataRequest, 3743 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3622 [P_DELAY_PROBE] = receive_skip_silent, 3744 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3745 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3746 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3747 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3748 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3749 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3750 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
3623 /* anything missing from this table is in 3751 /* anything missing from this table is in
3624 * the asender_tbl, see get_asender_cmd */ 3752 * the asender_tbl, see get_asender_cmd */
3625 [P_MAX_CMD] = NULL, 3753 [P_MAX_CMD] = { 0, 0, NULL },
3626}; 3754};
3627 3755
3628static drbd_cmd_handler_f *drbd_cmd_handler = drbd_default_handler; 3756/* All handler functions that expect a sub-header get that sub-heder in
3629static drbd_cmd_handler_f *drbd_opt_cmd_handler; 3757 mdev->data.rbuf.header.head.payload.
3758
3759 Usually in mdev->data.rbuf.header.head the callback can find the usual
3760 p_header, but they may not rely on that. Since there is also p_header95 !
3761 */
3630 3762
3631static void drbdd(struct drbd_conf *mdev) 3763static void drbdd(struct drbd_conf *mdev)
3632{ 3764{
3633 drbd_cmd_handler_f handler; 3765 union p_header *header = &mdev->data.rbuf.header;
3634 struct p_header *header = &mdev->data.rbuf.header; 3766 unsigned int packet_size;
3767 enum drbd_packets cmd;
3768 size_t shs; /* sub header size */
3769 int rv;
3635 3770
3636 while (get_t_state(&mdev->receiver) == Running) { 3771 while (get_t_state(&mdev->receiver) == Running) {
3637 drbd_thread_current_set_cpu(mdev); 3772 drbd_thread_current_set_cpu(mdev);
3638 if (!drbd_recv_header(mdev, header)) { 3773 if (!drbd_recv_header(mdev, &cmd, &packet_size))
3639 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); 3774 goto err_out;
3640 break;
3641 }
3642 3775
3643 if (header->command < P_MAX_CMD) 3776 if (unlikely(cmd >= P_MAX_CMD || !drbd_cmd_handler[cmd].function)) {
3644 handler = drbd_cmd_handler[header->command]; 3777 dev_err(DEV, "unknown packet type %d, l: %d!\n", cmd, packet_size);
3645 else if (P_MAY_IGNORE < header->command 3778 goto err_out;
3646 && header->command < P_MAX_OPT_CMD) 3779 }
3647 handler = drbd_opt_cmd_handler[header->command-P_MAY_IGNORE];
3648 else if (header->command > P_MAX_OPT_CMD)
3649 handler = receive_skip;
3650 else
3651 handler = NULL;
3652 3780
3653 if (unlikely(!handler)) { 3781 shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header);
3654 dev_err(DEV, "unknown packet type %d, l: %d!\n", 3782 rv = drbd_recv(mdev, &header->h80.payload, shs);
3655 header->command, header->length); 3783 if (unlikely(rv != shs)) {
3656 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); 3784 dev_err(DEV, "short read while reading sub header: rv=%d\n", rv);
3657 break; 3785 goto err_out;
3658 } 3786 }
3659 if (unlikely(!handler(mdev, header))) { 3787
3660 dev_err(DEV, "error receiving %s, l: %d!\n", 3788 if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) {
3661 cmdname(header->command), header->length); 3789 dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size);
3662 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); 3790 goto err_out;
3663 break;
3664 } 3791 }
3665 }
3666}
3667 3792
3668static void drbd_fail_pending_reads(struct drbd_conf *mdev) 3793 rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs);
3669{
3670 struct hlist_head *slot;
3671 struct hlist_node *pos;
3672 struct hlist_node *tmp;
3673 struct drbd_request *req;
3674 int i;
3675 3794
3676 /* 3795 if (unlikely(!rv)) {
3677 * Application READ requests 3796 dev_err(DEV, "error receiving %s, l: %d!\n",
3678 */ 3797 cmdname(cmd), packet_size);
3679 spin_lock_irq(&mdev->req_lock); 3798 goto err_out;
3680 for (i = 0; i < APP_R_HSIZE; i++) {
3681 slot = mdev->app_reads_hash+i;
3682 hlist_for_each_entry_safe(req, pos, tmp, slot, colision) {
3683 /* it may (but should not any longer!)
3684 * be on the work queue; if that assert triggers,
3685 * we need to also grab the
3686 * spin_lock_irq(&mdev->data.work.q_lock);
3687 * and list_del_init here. */
3688 D_ASSERT(list_empty(&req->w.list));
3689 /* It would be nice to complete outside of spinlock.
3690 * But this is easier for now. */
3691 _req_mod(req, connection_lost_while_pending);
3692 } 3799 }
3693 } 3800 }
3694 for (i = 0; i < APP_R_HSIZE; i++)
3695 if (!hlist_empty(mdev->app_reads_hash+i))
3696 dev_warn(DEV, "ASSERT FAILED: app_reads_hash[%d].first: "
3697 "%p, should be NULL\n", i, mdev->app_reads_hash[i].first);
3698 3801
3699 memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); 3802 if (0) {
3700 spin_unlock_irq(&mdev->req_lock); 3803 err_out:
3804 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
3805 }
3806 /* If we leave here, we probably want to update at least the
3807 * "Connected" indicator on stable storage. Do so explicitly here. */
3808 drbd_md_sync(mdev);
3701} 3809}
3702 3810
3703void drbd_flush_workqueue(struct drbd_conf *mdev) 3811void drbd_flush_workqueue(struct drbd_conf *mdev)
@@ -3710,6 +3818,36 @@ void drbd_flush_workqueue(struct drbd_conf *mdev)
3710 wait_for_completion(&barr.done); 3818 wait_for_completion(&barr.done);
3711} 3819}
3712 3820
3821void drbd_free_tl_hash(struct drbd_conf *mdev)
3822{
3823 struct hlist_head *h;
3824
3825 spin_lock_irq(&mdev->req_lock);
3826
3827 if (!mdev->tl_hash || mdev->state.conn != C_STANDALONE) {
3828 spin_unlock_irq(&mdev->req_lock);
3829 return;
3830 }
3831 /* paranoia code */
3832 for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++)
3833 if (h->first)
3834 dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n",
3835 (int)(h - mdev->ee_hash), h->first);
3836 kfree(mdev->ee_hash);
3837 mdev->ee_hash = NULL;
3838 mdev->ee_hash_s = 0;
3839
3840 /* paranoia code */
3841 for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++)
3842 if (h->first)
3843 dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n",
3844 (int)(h - mdev->tl_hash), h->first);
3845 kfree(mdev->tl_hash);
3846 mdev->tl_hash = NULL;
3847 mdev->tl_hash_s = 0;
3848 spin_unlock_irq(&mdev->req_lock);
3849}
3850
3713static void drbd_disconnect(struct drbd_conf *mdev) 3851static void drbd_disconnect(struct drbd_conf *mdev)
3714{ 3852{
3715 enum drbd_fencing_p fp; 3853 enum drbd_fencing_p fp;
@@ -3727,6 +3865,7 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3727 drbd_thread_stop(&mdev->asender); 3865 drbd_thread_stop(&mdev->asender);
3728 drbd_free_sock(mdev); 3866 drbd_free_sock(mdev);
3729 3867
3868 /* wait for current activity to cease. */
3730 spin_lock_irq(&mdev->req_lock); 3869 spin_lock_irq(&mdev->req_lock);
3731 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee); 3870 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3732 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee); 3871 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
@@ -3751,7 +3890,6 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3751 3890
3752 /* make sure syncer is stopped and w_resume_next_sg queued */ 3891 /* make sure syncer is stopped and w_resume_next_sg queued */
3753 del_timer_sync(&mdev->resync_timer); 3892 del_timer_sync(&mdev->resync_timer);
3754 set_bit(STOP_SYNC_TIMER, &mdev->flags);
3755 resync_timer_fn((unsigned long)mdev); 3893 resync_timer_fn((unsigned long)mdev);
3756 3894
3757 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, 3895 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
@@ -3766,11 +3904,9 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3766 kfree(mdev->p_uuid); 3904 kfree(mdev->p_uuid);
3767 mdev->p_uuid = NULL; 3905 mdev->p_uuid = NULL;
3768 3906
3769 if (!mdev->state.susp) 3907 if (!is_susp(mdev->state))
3770 tl_clear(mdev); 3908 tl_clear(mdev);
3771 3909
3772 drbd_fail_pending_reads(mdev);
3773
3774 dev_info(DEV, "Connection closed\n"); 3910 dev_info(DEV, "Connection closed\n");
3775 3911
3776 drbd_md_sync(mdev); 3912 drbd_md_sync(mdev);
@@ -3781,12 +3917,8 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3781 put_ldev(mdev); 3917 put_ldev(mdev);
3782 } 3918 }
3783 3919
3784 if (mdev->state.role == R_PRIMARY) { 3920 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
3785 if (fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) { 3921 drbd_try_outdate_peer_async(mdev);
3786 enum drbd_disk_state nps = drbd_try_outdate_peer(mdev);
3787 drbd_request_state(mdev, NS(pdsk, nps));
3788 }
3789 }
3790 3922
3791 spin_lock_irq(&mdev->req_lock); 3923 spin_lock_irq(&mdev->req_lock);
3792 os = mdev->state; 3924 os = mdev->state;
@@ -3799,32 +3931,14 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3799 spin_unlock_irq(&mdev->req_lock); 3931 spin_unlock_irq(&mdev->req_lock);
3800 3932
3801 if (os.conn == C_DISCONNECTING) { 3933 if (os.conn == C_DISCONNECTING) {
3802 struct hlist_head *h; 3934 wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0);
3803 wait_event(mdev->misc_wait, atomic_read(&mdev->net_cnt) == 0);
3804 3935
3805 /* we must not free the tl_hash 3936 if (!is_susp(mdev->state)) {
3806 * while application io is still on the fly */ 3937 /* we must not free the tl_hash
3807 wait_event(mdev->misc_wait, atomic_read(&mdev->ap_bio_cnt) == 0); 3938 * while application io is still on the fly */
3808 3939 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
3809 spin_lock_irq(&mdev->req_lock); 3940 drbd_free_tl_hash(mdev);
3810 /* paranoia code */ 3941 }
3811 for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++)
3812 if (h->first)
3813 dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n",
3814 (int)(h - mdev->ee_hash), h->first);
3815 kfree(mdev->ee_hash);
3816 mdev->ee_hash = NULL;
3817 mdev->ee_hash_s = 0;
3818
3819 /* paranoia code */
3820 for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++)
3821 if (h->first)
3822 dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n",
3823 (int)(h - mdev->tl_hash), h->first);
3824 kfree(mdev->tl_hash);
3825 mdev->tl_hash = NULL;
3826 mdev->tl_hash_s = 0;
3827 spin_unlock_irq(&mdev->req_lock);
3828 3942
3829 crypto_free_hash(mdev->cram_hmac_tfm); 3943 crypto_free_hash(mdev->cram_hmac_tfm);
3830 mdev->cram_hmac_tfm = NULL; 3944 mdev->cram_hmac_tfm = NULL;
@@ -3844,6 +3958,9 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3844 i = drbd_release_ee(mdev, &mdev->net_ee); 3958 i = drbd_release_ee(mdev, &mdev->net_ee);
3845 if (i) 3959 if (i)
3846 dev_info(DEV, "net_ee not empty, killed %u entries\n", i); 3960 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
3961 i = atomic_read(&mdev->pp_in_use_by_net);
3962 if (i)
3963 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
3847 i = atomic_read(&mdev->pp_in_use); 3964 i = atomic_read(&mdev->pp_in_use);
3848 if (i) 3965 if (i)
3849 dev_info(DEV, "pp_in_use = %d, expected 0\n", i); 3966 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
@@ -3887,7 +4004,7 @@ static int drbd_send_handshake(struct drbd_conf *mdev)
3887 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); 4004 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
3888 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); 4005 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
3889 ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE, 4006 ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE,
3890 (struct p_header *)p, sizeof(*p), 0 ); 4007 (struct p_header80 *)p, sizeof(*p), 0 );
3891 mutex_unlock(&mdev->data.mutex); 4008 mutex_unlock(&mdev->data.mutex);
3892 return ok; 4009 return ok;
3893} 4010}
@@ -3903,27 +4020,28 @@ static int drbd_do_handshake(struct drbd_conf *mdev)
3903{ 4020{
3904 /* ASSERT current == mdev->receiver ... */ 4021 /* ASSERT current == mdev->receiver ... */
3905 struct p_handshake *p = &mdev->data.rbuf.handshake; 4022 struct p_handshake *p = &mdev->data.rbuf.handshake;
3906 const int expect = sizeof(struct p_handshake) 4023 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
3907 -sizeof(struct p_header); 4024 unsigned int length;
4025 enum drbd_packets cmd;
3908 int rv; 4026 int rv;
3909 4027
3910 rv = drbd_send_handshake(mdev); 4028 rv = drbd_send_handshake(mdev);
3911 if (!rv) 4029 if (!rv)
3912 return 0; 4030 return 0;
3913 4031
3914 rv = drbd_recv_header(mdev, &p->head); 4032 rv = drbd_recv_header(mdev, &cmd, &length);
3915 if (!rv) 4033 if (!rv)
3916 return 0; 4034 return 0;
3917 4035
3918 if (p->head.command != P_HAND_SHAKE) { 4036 if (cmd != P_HAND_SHAKE) {
3919 dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n", 4037 dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n",
3920 cmdname(p->head.command), p->head.command); 4038 cmdname(cmd), cmd);
3921 return -1; 4039 return -1;
3922 } 4040 }
3923 4041
3924 if (p->head.length != expect) { 4042 if (length != expect) {
3925 dev_err(DEV, "expected HandShake length: %u, received: %u\n", 4043 dev_err(DEV, "expected HandShake length: %u, received: %u\n",
3926 expect, p->head.length); 4044 expect, length);
3927 return -1; 4045 return -1;
3928 } 4046 }
3929 4047
@@ -3981,10 +4099,11 @@ static int drbd_do_auth(struct drbd_conf *mdev)
3981 char *response = NULL; 4099 char *response = NULL;
3982 char *right_response = NULL; 4100 char *right_response = NULL;
3983 char *peers_ch = NULL; 4101 char *peers_ch = NULL;
3984 struct p_header p;
3985 unsigned int key_len = strlen(mdev->net_conf->shared_secret); 4102 unsigned int key_len = strlen(mdev->net_conf->shared_secret);
3986 unsigned int resp_size; 4103 unsigned int resp_size;
3987 struct hash_desc desc; 4104 struct hash_desc desc;
4105 enum drbd_packets cmd;
4106 unsigned int length;
3988 int rv; 4107 int rv;
3989 4108
3990 desc.tfm = mdev->cram_hmac_tfm; 4109 desc.tfm = mdev->cram_hmac_tfm;
@@ -4004,33 +4123,33 @@ static int drbd_do_auth(struct drbd_conf *mdev)
4004 if (!rv) 4123 if (!rv)
4005 goto fail; 4124 goto fail;
4006 4125
4007 rv = drbd_recv_header(mdev, &p); 4126 rv = drbd_recv_header(mdev, &cmd, &length);
4008 if (!rv) 4127 if (!rv)
4009 goto fail; 4128 goto fail;
4010 4129
4011 if (p.command != P_AUTH_CHALLENGE) { 4130 if (cmd != P_AUTH_CHALLENGE) {
4012 dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n", 4131 dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n",
4013 cmdname(p.command), p.command); 4132 cmdname(cmd), cmd);
4014 rv = 0; 4133 rv = 0;
4015 goto fail; 4134 goto fail;
4016 } 4135 }
4017 4136
4018 if (p.length > CHALLENGE_LEN*2) { 4137 if (length > CHALLENGE_LEN * 2) {
4019 dev_err(DEV, "expected AuthChallenge payload too big.\n"); 4138 dev_err(DEV, "expected AuthChallenge payload too big.\n");
4020 rv = -1; 4139 rv = -1;
4021 goto fail; 4140 goto fail;
4022 } 4141 }
4023 4142
4024 peers_ch = kmalloc(p.length, GFP_NOIO); 4143 peers_ch = kmalloc(length, GFP_NOIO);
4025 if (peers_ch == NULL) { 4144 if (peers_ch == NULL) {
4026 dev_err(DEV, "kmalloc of peers_ch failed\n"); 4145 dev_err(DEV, "kmalloc of peers_ch failed\n");
4027 rv = -1; 4146 rv = -1;
4028 goto fail; 4147 goto fail;
4029 } 4148 }
4030 4149
4031 rv = drbd_recv(mdev, peers_ch, p.length); 4150 rv = drbd_recv(mdev, peers_ch, length);
4032 4151
4033 if (rv != p.length) { 4152 if (rv != length) {
4034 dev_err(DEV, "short read AuthChallenge: l=%u\n", rv); 4153 dev_err(DEV, "short read AuthChallenge: l=%u\n", rv);
4035 rv = 0; 4154 rv = 0;
4036 goto fail; 4155 goto fail;
@@ -4045,7 +4164,7 @@ static int drbd_do_auth(struct drbd_conf *mdev)
4045 } 4164 }
4046 4165
4047 sg_init_table(&sg, 1); 4166 sg_init_table(&sg, 1);
4048 sg_set_buf(&sg, peers_ch, p.length); 4167 sg_set_buf(&sg, peers_ch, length);
4049 4168
4050 rv = crypto_hash_digest(&desc, &sg, sg.length, response); 4169 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4051 if (rv) { 4170 if (rv) {
@@ -4058,18 +4177,18 @@ static int drbd_do_auth(struct drbd_conf *mdev)
4058 if (!rv) 4177 if (!rv)
4059 goto fail; 4178 goto fail;
4060 4179
4061 rv = drbd_recv_header(mdev, &p); 4180 rv = drbd_recv_header(mdev, &cmd, &length);
4062 if (!rv) 4181 if (!rv)
4063 goto fail; 4182 goto fail;
4064 4183
4065 if (p.command != P_AUTH_RESPONSE) { 4184 if (cmd != P_AUTH_RESPONSE) {
4066 dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n", 4185 dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n",
4067 cmdname(p.command), p.command); 4186 cmdname(cmd), cmd);
4068 rv = 0; 4187 rv = 0;
4069 goto fail; 4188 goto fail;
4070 } 4189 }
4071 4190
4072 if (p.length != resp_size) { 4191 if (length != resp_size) {
4073 dev_err(DEV, "expected AuthResponse payload of wrong size\n"); 4192 dev_err(DEV, "expected AuthResponse payload of wrong size\n");
4074 rv = 0; 4193 rv = 0;
4075 goto fail; 4194 goto fail;
@@ -4154,7 +4273,7 @@ int drbdd_init(struct drbd_thread *thi)
4154 4273
4155/* ********* acknowledge sender ******** */ 4274/* ********* acknowledge sender ******** */
4156 4275
4157static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) 4276static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h)
4158{ 4277{
4159 struct p_req_state_reply *p = (struct p_req_state_reply *)h; 4278 struct p_req_state_reply *p = (struct p_req_state_reply *)h;
4160 4279
@@ -4172,13 +4291,13 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h)
4172 return TRUE; 4291 return TRUE;
4173} 4292}
4174 4293
4175static int got_Ping(struct drbd_conf *mdev, struct p_header *h) 4294static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h)
4176{ 4295{
4177 return drbd_send_ping_ack(mdev); 4296 return drbd_send_ping_ack(mdev);
4178 4297
4179} 4298}
4180 4299
4181static int got_PingAck(struct drbd_conf *mdev, struct p_header *h) 4300static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h)
4182{ 4301{
4183 /* restore idle timeout */ 4302 /* restore idle timeout */
4184 mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; 4303 mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
@@ -4188,7 +4307,7 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header *h)
4188 return TRUE; 4307 return TRUE;
4189} 4308}
4190 4309
4191static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h) 4310static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h)
4192{ 4311{
4193 struct p_block_ack *p = (struct p_block_ack *)h; 4312 struct p_block_ack *p = (struct p_block_ack *)h;
4194 sector_t sector = be64_to_cpu(p->sector); 4313 sector_t sector = be64_to_cpu(p->sector);
@@ -4198,11 +4317,15 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h)
4198 4317
4199 update_peer_seq(mdev, be32_to_cpu(p->seq_num)); 4318 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4200 4319
4201 drbd_rs_complete_io(mdev, sector); 4320 if (get_ldev(mdev)) {
4202 drbd_set_in_sync(mdev, sector, blksize); 4321 drbd_rs_complete_io(mdev, sector);
4203 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ 4322 drbd_set_in_sync(mdev, sector, blksize);
4204 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); 4323 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4324 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4325 put_ldev(mdev);
4326 }
4205 dec_rs_pending(mdev); 4327 dec_rs_pending(mdev);
4328 atomic_add(blksize >> 9, &mdev->rs_sect_in);
4206 4329
4207 return TRUE; 4330 return TRUE;
4208} 4331}
@@ -4258,7 +4381,7 @@ static int validate_req_change_req_state(struct drbd_conf *mdev,
4258 return TRUE; 4381 return TRUE;
4259} 4382}
4260 4383
4261static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) 4384static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h)
4262{ 4385{
4263 struct p_block_ack *p = (struct p_block_ack *)h; 4386 struct p_block_ack *p = (struct p_block_ack *)h;
4264 sector_t sector = be64_to_cpu(p->sector); 4387 sector_t sector = be64_to_cpu(p->sector);
@@ -4298,7 +4421,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h)
4298 _ack_id_to_req, __func__ , what); 4421 _ack_id_to_req, __func__ , what);
4299} 4422}
4300 4423
4301static int got_NegAck(struct drbd_conf *mdev, struct p_header *h) 4424static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h)
4302{ 4425{
4303 struct p_block_ack *p = (struct p_block_ack *)h; 4426 struct p_block_ack *p = (struct p_block_ack *)h;
4304 sector_t sector = be64_to_cpu(p->sector); 4427 sector_t sector = be64_to_cpu(p->sector);
@@ -4318,7 +4441,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header *h)
4318 _ack_id_to_req, __func__ , neg_acked); 4441 _ack_id_to_req, __func__ , neg_acked);
4319} 4442}
4320 4443
4321static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h) 4444static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h)
4322{ 4445{
4323 struct p_block_ack *p = (struct p_block_ack *)h; 4446 struct p_block_ack *p = (struct p_block_ack *)h;
4324 sector_t sector = be64_to_cpu(p->sector); 4447 sector_t sector = be64_to_cpu(p->sector);
@@ -4331,7 +4454,7 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h)
4331 _ar_id_to_req, __func__ , neg_acked); 4454 _ar_id_to_req, __func__ , neg_acked);
4332} 4455}
4333 4456
4334static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) 4457static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h)
4335{ 4458{
4336 sector_t sector; 4459 sector_t sector;
4337 int size; 4460 int size;
@@ -4353,7 +4476,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h)
4353 return TRUE; 4476 return TRUE;
4354} 4477}
4355 4478
4356static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h) 4479static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h)
4357{ 4480{
4358 struct p_barrier_ack *p = (struct p_barrier_ack *)h; 4481 struct p_barrier_ack *p = (struct p_barrier_ack *)h;
4359 4482
@@ -4362,7 +4485,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h)
4362 return TRUE; 4485 return TRUE;
4363} 4486}
4364 4487
4365static int got_OVResult(struct drbd_conf *mdev, struct p_header *h) 4488static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h)
4366{ 4489{
4367 struct p_block_ack *p = (struct p_block_ack *)h; 4490 struct p_block_ack *p = (struct p_block_ack *)h;
4368 struct drbd_work *w; 4491 struct drbd_work *w;
@@ -4379,6 +4502,9 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header *h)
4379 else 4502 else
4380 ov_oos_print(mdev); 4503 ov_oos_print(mdev);
4381 4504
4505 if (!get_ldev(mdev))
4506 return TRUE;
4507
4382 drbd_rs_complete_io(mdev, sector); 4508 drbd_rs_complete_io(mdev, sector);
4383 dec_rs_pending(mdev); 4509 dec_rs_pending(mdev);
4384 4510
@@ -4393,18 +4519,18 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header *h)
4393 drbd_resync_finished(mdev); 4519 drbd_resync_finished(mdev);
4394 } 4520 }
4395 } 4521 }
4522 put_ldev(mdev);
4396 return TRUE; 4523 return TRUE;
4397} 4524}
4398 4525
4399static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h) 4526static int got_skip(struct drbd_conf *mdev, struct p_header80 *h)
4400{ 4527{
4401 /* IGNORE */
4402 return TRUE; 4528 return TRUE;
4403} 4529}
4404 4530
4405struct asender_cmd { 4531struct asender_cmd {
4406 size_t pkt_size; 4532 size_t pkt_size;
4407 int (*process)(struct drbd_conf *mdev, struct p_header *h); 4533 int (*process)(struct drbd_conf *mdev, struct p_header80 *h);
4408}; 4534};
4409 4535
4410static struct asender_cmd *get_asender_cmd(int cmd) 4536static struct asender_cmd *get_asender_cmd(int cmd)
@@ -4413,8 +4539,8 @@ static struct asender_cmd *get_asender_cmd(int cmd)
4413 /* anything missing from this table is in 4539 /* anything missing from this table is in
4414 * the drbd_cmd_handler (drbd_default_handler) table, 4540 * the drbd_cmd_handler (drbd_default_handler) table,
4415 * see the beginning of drbdd() */ 4541 * see the beginning of drbdd() */
4416 [P_PING] = { sizeof(struct p_header), got_Ping }, 4542 [P_PING] = { sizeof(struct p_header80), got_Ping },
4417 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, 4543 [P_PING_ACK] = { sizeof(struct p_header80), got_PingAck },
4418 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 4544 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4419 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 4545 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4420 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, 4546 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
@@ -4426,7 +4552,7 @@ static struct asender_cmd *get_asender_cmd(int cmd)
4426 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, 4552 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4427 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, 4553 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4428 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, 4554 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
4429 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe), got_something_to_ignore_m }, 4555 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
4430 [P_MAX_CMD] = { 0, NULL }, 4556 [P_MAX_CMD] = { 0, NULL },
4431 }; 4557 };
4432 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) 4558 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
@@ -4437,13 +4563,13 @@ static struct asender_cmd *get_asender_cmd(int cmd)
4437int drbd_asender(struct drbd_thread *thi) 4563int drbd_asender(struct drbd_thread *thi)
4438{ 4564{
4439 struct drbd_conf *mdev = thi->mdev; 4565 struct drbd_conf *mdev = thi->mdev;
4440 struct p_header *h = &mdev->meta.rbuf.header; 4566 struct p_header80 *h = &mdev->meta.rbuf.header.h80;
4441 struct asender_cmd *cmd = NULL; 4567 struct asender_cmd *cmd = NULL;
4442 4568
4443 int rv, len; 4569 int rv, len;
4444 void *buf = h; 4570 void *buf = h;
4445 int received = 0; 4571 int received = 0;
4446 int expect = sizeof(struct p_header); 4572 int expect = sizeof(struct p_header80);
4447 int empty; 4573 int empty;
4448 4574
4449 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); 4575 sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
@@ -4467,10 +4593,8 @@ int drbd_asender(struct drbd_thread *thi)
4467 while (1) { 4593 while (1) {
4468 clear_bit(SIGNAL_ASENDER, &mdev->flags); 4594 clear_bit(SIGNAL_ASENDER, &mdev->flags);
4469 flush_signals(current); 4595 flush_signals(current);
4470 if (!drbd_process_done_ee(mdev)) { 4596 if (!drbd_process_done_ee(mdev))
4471 dev_err(DEV, "process_done_ee() = NOT_OK\n");
4472 goto reconnect; 4597 goto reconnect;
4473 }
4474 /* to avoid race with newly queued ACKs */ 4598 /* to avoid race with newly queued ACKs */
4475 set_bit(SIGNAL_ASENDER, &mdev->flags); 4599 set_bit(SIGNAL_ASENDER, &mdev->flags);
4476 spin_lock_irq(&mdev->req_lock); 4600 spin_lock_irq(&mdev->req_lock);
@@ -4529,21 +4653,23 @@ int drbd_asender(struct drbd_thread *thi)
4529 4653
4530 if (received == expect && cmd == NULL) { 4654 if (received == expect && cmd == NULL) {
4531 if (unlikely(h->magic != BE_DRBD_MAGIC)) { 4655 if (unlikely(h->magic != BE_DRBD_MAGIC)) {
4532 dev_err(DEV, "magic?? on meta m: 0x%lx c: %d l: %d\n", 4656 dev_err(DEV, "magic?? on meta m: 0x%08x c: %d l: %d\n",
4533 (long)be32_to_cpu(h->magic), 4657 be32_to_cpu(h->magic),
4534 h->command, h->length); 4658 be16_to_cpu(h->command),
4659 be16_to_cpu(h->length));
4535 goto reconnect; 4660 goto reconnect;
4536 } 4661 }
4537 cmd = get_asender_cmd(be16_to_cpu(h->command)); 4662 cmd = get_asender_cmd(be16_to_cpu(h->command));
4538 len = be16_to_cpu(h->length); 4663 len = be16_to_cpu(h->length);
4539 if (unlikely(cmd == NULL)) { 4664 if (unlikely(cmd == NULL)) {
4540 dev_err(DEV, "unknown command?? on meta m: 0x%lx c: %d l: %d\n", 4665 dev_err(DEV, "unknown command?? on meta m: 0x%08x c: %d l: %d\n",
4541 (long)be32_to_cpu(h->magic), 4666 be32_to_cpu(h->magic),
4542 h->command, h->length); 4667 be16_to_cpu(h->command),
4668 be16_to_cpu(h->length));
4543 goto disconnect; 4669 goto disconnect;
4544 } 4670 }
4545 expect = cmd->pkt_size; 4671 expect = cmd->pkt_size;
4546 ERR_IF(len != expect-sizeof(struct p_header)) 4672 ERR_IF(len != expect-sizeof(struct p_header80))
4547 goto reconnect; 4673 goto reconnect;
4548 } 4674 }
4549 if (received == expect) { 4675 if (received == expect) {
@@ -4553,7 +4679,7 @@ int drbd_asender(struct drbd_thread *thi)
4553 4679
4554 buf = h; 4680 buf = h;
4555 received = 0; 4681 received = 0;
4556 expect = sizeof(struct p_header); 4682 expect = sizeof(struct p_header80);
4557 cmd = NULL; 4683 cmd = NULL;
4558 } 4684 }
4559 } 4685 }
@@ -4561,10 +4687,12 @@ int drbd_asender(struct drbd_thread *thi)
4561 if (0) { 4687 if (0) {
4562reconnect: 4688reconnect:
4563 drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); 4689 drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
4690 drbd_md_sync(mdev);
4564 } 4691 }
4565 if (0) { 4692 if (0) {
4566disconnect: 4693disconnect:
4567 drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); 4694 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
4695 drbd_md_sync(mdev);
4568 } 4696 }
4569 clear_bit(SIGNAL_ASENDER, &mdev->flags); 4697 clear_bit(SIGNAL_ASENDER, &mdev->flags);
4570 4698
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index f761d98a4e90..9e91a2545fc8 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -59,17 +59,19 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req)
59static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw) 59static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
60{ 60{
61 const unsigned long s = req->rq_state; 61 const unsigned long s = req->rq_state;
62
63 /* remove it from the transfer log.
64 * well, only if it had been there in the first
65 * place... if it had not (local only or conflicting
66 * and never sent), it should still be "empty" as
67 * initialized in drbd_req_new(), so we can list_del() it
68 * here unconditionally */
69 list_del(&req->tl_requests);
70
62 /* if it was a write, we may have to set the corresponding 71 /* if it was a write, we may have to set the corresponding
63 * bit(s) out-of-sync first. If it had a local part, we need to 72 * bit(s) out-of-sync first. If it had a local part, we need to
64 * release the reference to the activity log. */ 73 * release the reference to the activity log. */
65 if (rw == WRITE) { 74 if (rw == WRITE) {
66 /* remove it from the transfer log.
67 * well, only if it had been there in the first
68 * place... if it had not (local only or conflicting
69 * and never sent), it should still be "empty" as
70 * initialized in drbd_req_new(), so we can list_del() it
71 * here unconditionally */
72 list_del(&req->tl_requests);
73 /* Set out-of-sync unless both OK flags are set 75 /* Set out-of-sync unless both OK flags are set
74 * (local only or remote failed). 76 * (local only or remote failed).
75 * Other places where we set out-of-sync: 77 * Other places where we set out-of-sync:
@@ -92,7 +94,8 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
92 */ 94 */
93 if (s & RQ_LOCAL_MASK) { 95 if (s & RQ_LOCAL_MASK) {
94 if (get_ldev_if_state(mdev, D_FAILED)) { 96 if (get_ldev_if_state(mdev, D_FAILED)) {
95 drbd_al_complete_io(mdev, req->sector); 97 if (s & RQ_IN_ACT_LOG)
98 drbd_al_complete_io(mdev, req->sector);
96 put_ldev(mdev); 99 put_ldev(mdev);
97 } else if (__ratelimit(&drbd_ratelimit_state)) { 100 } else if (__ratelimit(&drbd_ratelimit_state)) {
98 dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), " 101 dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), "
@@ -280,6 +283,14 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
280 * protocol A or B, barrier ack still pending... */ 283 * protocol A or B, barrier ack still pending... */
281} 284}
282 285
286static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m)
287{
288 struct drbd_conf *mdev = req->mdev;
289
290 if (!is_susp(mdev->state))
291 _req_may_be_done(req, m);
292}
293
283/* 294/*
284 * checks whether there was an overlapping request 295 * checks whether there was an overlapping request
285 * or ee already registered. 296 * or ee already registered.
@@ -380,10 +391,11 @@ out_conflict:
380 * and it enforces that we have to think in a very structured manner 391 * and it enforces that we have to think in a very structured manner
381 * about the "events" that may happen to a request during its life time ... 392 * about the "events" that may happen to a request during its life time ...
382 */ 393 */
383void __req_mod(struct drbd_request *req, enum drbd_req_event what, 394int __req_mod(struct drbd_request *req, enum drbd_req_event what,
384 struct bio_and_error *m) 395 struct bio_and_error *m)
385{ 396{
386 struct drbd_conf *mdev = req->mdev; 397 struct drbd_conf *mdev = req->mdev;
398 int rv = 0;
387 m->bio = NULL; 399 m->bio = NULL;
388 400
389 switch (what) { 401 switch (what) {
@@ -420,7 +432,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
420 req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK); 432 req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
421 req->rq_state &= ~RQ_LOCAL_PENDING; 433 req->rq_state &= ~RQ_LOCAL_PENDING;
422 434
423 _req_may_be_done(req, m); 435 _req_may_be_done_not_susp(req, m);
424 put_ldev(mdev); 436 put_ldev(mdev);
425 break; 437 break;
426 438
@@ -429,7 +441,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
429 req->rq_state &= ~RQ_LOCAL_PENDING; 441 req->rq_state &= ~RQ_LOCAL_PENDING;
430 442
431 __drbd_chk_io_error(mdev, FALSE); 443 __drbd_chk_io_error(mdev, FALSE);
432 _req_may_be_done(req, m); 444 _req_may_be_done_not_susp(req, m);
433 put_ldev(mdev); 445 put_ldev(mdev);
434 break; 446 break;
435 447
@@ -437,7 +449,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
437 /* it is legal to fail READA */ 449 /* it is legal to fail READA */
438 req->rq_state |= RQ_LOCAL_COMPLETED; 450 req->rq_state |= RQ_LOCAL_COMPLETED;
439 req->rq_state &= ~RQ_LOCAL_PENDING; 451 req->rq_state &= ~RQ_LOCAL_PENDING;
440 _req_may_be_done(req, m); 452 _req_may_be_done_not_susp(req, m);
441 put_ldev(mdev); 453 put_ldev(mdev);
442 break; 454 break;
443 455
@@ -455,7 +467,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
455 /* no point in retrying if there is no good remote data, 467 /* no point in retrying if there is no good remote data,
456 * or we have no connection. */ 468 * or we have no connection. */
457 if (mdev->state.pdsk != D_UP_TO_DATE) { 469 if (mdev->state.pdsk != D_UP_TO_DATE) {
458 _req_may_be_done(req, m); 470 _req_may_be_done_not_susp(req, m);
459 break; 471 break;
460 } 472 }
461 473
@@ -517,11 +529,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
517 D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0); 529 D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
518 530
519 req->epoch = mdev->newest_tle->br_number; 531 req->epoch = mdev->newest_tle->br_number;
520 list_add_tail(&req->tl_requests,
521 &mdev->newest_tle->requests);
522 532
523 /* increment size of current epoch */ 533 /* increment size of current epoch */
524 mdev->newest_tle->n_req++; 534 mdev->newest_tle->n_writes++;
525 535
526 /* queue work item to send data */ 536 /* queue work item to send data */
527 D_ASSERT(req->rq_state & RQ_NET_PENDING); 537 D_ASSERT(req->rq_state & RQ_NET_PENDING);
@@ -530,7 +540,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
530 drbd_queue_work(&mdev->data.work, &req->w); 540 drbd_queue_work(&mdev->data.work, &req->w);
531 541
532 /* close the epoch, in case it outgrew the limit */ 542 /* close the epoch, in case it outgrew the limit */
533 if (mdev->newest_tle->n_req >= mdev->net_conf->max_epoch_size) 543 if (mdev->newest_tle->n_writes >= mdev->net_conf->max_epoch_size)
534 queue_barrier(mdev); 544 queue_barrier(mdev);
535 545
536 break; 546 break;
@@ -543,7 +553,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
543 req->rq_state &= ~RQ_NET_QUEUED; 553 req->rq_state &= ~RQ_NET_QUEUED;
544 /* if we did it right, tl_clear should be scheduled only after 554 /* if we did it right, tl_clear should be scheduled only after
545 * this, so this should not be necessary! */ 555 * this, so this should not be necessary! */
546 _req_may_be_done(req, m); 556 _req_may_be_done_not_susp(req, m);
547 break; 557 break;
548 558
549 case handed_over_to_network: 559 case handed_over_to_network:
@@ -568,7 +578,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
568 * "completed_ok" events came in, once we return from 578 * "completed_ok" events came in, once we return from
569 * _drbd_send_zc_bio (drbd_send_dblock), we have to check 579 * _drbd_send_zc_bio (drbd_send_dblock), we have to check
570 * whether it is done already, and end it. */ 580 * whether it is done already, and end it. */
571 _req_may_be_done(req, m); 581 _req_may_be_done_not_susp(req, m);
572 break; 582 break;
573 583
574 case read_retry_remote_canceled: 584 case read_retry_remote_canceled:
@@ -584,7 +594,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
584 /* if it is still queued, we may not complete it here. 594 /* if it is still queued, we may not complete it here.
585 * it will be canceled soon. */ 595 * it will be canceled soon. */
586 if (!(req->rq_state & RQ_NET_QUEUED)) 596 if (!(req->rq_state & RQ_NET_QUEUED))
587 _req_may_be_done(req, m); 597 _req_may_be_done(req, m); /* Allowed while state.susp */
588 break; 598 break;
589 599
590 case write_acked_by_peer_and_sis: 600 case write_acked_by_peer_and_sis:
@@ -619,7 +629,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
619 D_ASSERT(req->rq_state & RQ_NET_PENDING); 629 D_ASSERT(req->rq_state & RQ_NET_PENDING);
620 dec_ap_pending(mdev); 630 dec_ap_pending(mdev);
621 req->rq_state &= ~RQ_NET_PENDING; 631 req->rq_state &= ~RQ_NET_PENDING;
622 _req_may_be_done(req, m); 632 _req_may_be_done_not_susp(req, m);
623 break; 633 break;
624 634
625 case neg_acked: 635 case neg_acked:
@@ -629,11 +639,50 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
629 req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING); 639 req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
630 640
631 req->rq_state |= RQ_NET_DONE; 641 req->rq_state |= RQ_NET_DONE;
632 _req_may_be_done(req, m); 642 _req_may_be_done_not_susp(req, m);
633 /* else: done by handed_over_to_network */ 643 /* else: done by handed_over_to_network */
634 break; 644 break;
635 645
646 case fail_frozen_disk_io:
647 if (!(req->rq_state & RQ_LOCAL_COMPLETED))
648 break;
649
650 _req_may_be_done(req, m); /* Allowed while state.susp */
651 break;
652
653 case restart_frozen_disk_io:
654 if (!(req->rq_state & RQ_LOCAL_COMPLETED))
655 break;
656
657 req->rq_state &= ~RQ_LOCAL_COMPLETED;
658
659 rv = MR_READ;
660 if (bio_data_dir(req->master_bio) == WRITE)
661 rv = MR_WRITE;
662
663 get_ldev(mdev);
664 req->w.cb = w_restart_disk_io;
665 drbd_queue_work(&mdev->data.work, &req->w);
666 break;
667
668 case resend:
669 /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
670 before the connection loss (B&C only); only P_BARRIER_ACK was missing.
671 Trowing them out of the TL here by pretending we got a BARRIER_ACK
672 We ensure that the peer was not rebooted */
673 if (!(req->rq_state & RQ_NET_OK)) {
674 if (req->w.cb) {
675 drbd_queue_work(&mdev->data.work, &req->w);
676 rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
677 }
678 break;
679 }
680 /* else, fall through to barrier_acked */
681
636 case barrier_acked: 682 case barrier_acked:
683 if (!(req->rq_state & RQ_WRITE))
684 break;
685
637 if (req->rq_state & RQ_NET_PENDING) { 686 if (req->rq_state & RQ_NET_PENDING) {
638 /* barrier came in before all requests have been acked. 687 /* barrier came in before all requests have been acked.
639 * this is bad, because if the connection is lost now, 688 * this is bad, because if the connection is lost now,
@@ -643,7 +692,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
643 } 692 }
644 D_ASSERT(req->rq_state & RQ_NET_SENT); 693 D_ASSERT(req->rq_state & RQ_NET_SENT);
645 req->rq_state |= RQ_NET_DONE; 694 req->rq_state |= RQ_NET_DONE;
646 _req_may_be_done(req, m); 695 _req_may_be_done(req, m); /* Allowed while state.susp */
647 break; 696 break;
648 697
649 case data_received: 698 case data_received:
@@ -651,9 +700,11 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
651 dec_ap_pending(mdev); 700 dec_ap_pending(mdev);
652 req->rq_state &= ~RQ_NET_PENDING; 701 req->rq_state &= ~RQ_NET_PENDING;
653 req->rq_state |= (RQ_NET_OK|RQ_NET_DONE); 702 req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
654 _req_may_be_done(req, m); 703 _req_may_be_done_not_susp(req, m);
655 break; 704 break;
656 }; 705 };
706
707 return rv;
657} 708}
658 709
659/* we may do a local read if: 710/* we may do a local read if:
@@ -752,14 +803,16 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
752 * resync extent to finish, and, if necessary, pulls in the target 803 * resync extent to finish, and, if necessary, pulls in the target
753 * extent into the activity log, which involves further disk io because 804 * extent into the activity log, which involves further disk io because
754 * of transactional on-disk meta data updates. */ 805 * of transactional on-disk meta data updates. */
755 if (rw == WRITE && local) 806 if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) {
807 req->rq_state |= RQ_IN_ACT_LOG;
756 drbd_al_begin_io(mdev, sector); 808 drbd_al_begin_io(mdev, sector);
809 }
757 810
758 remote = remote && (mdev->state.pdsk == D_UP_TO_DATE || 811 remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
759 (mdev->state.pdsk == D_INCONSISTENT && 812 (mdev->state.pdsk == D_INCONSISTENT &&
760 mdev->state.conn >= C_CONNECTED)); 813 mdev->state.conn >= C_CONNECTED));
761 814
762 if (!(local || remote) && !mdev->state.susp) { 815 if (!(local || remote) && !is_susp(mdev->state)) {
763 dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); 816 dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
764 goto fail_free_complete; 817 goto fail_free_complete;
765 } 818 }
@@ -785,7 +838,7 @@ allocate_barrier:
785 /* GOOD, everything prepared, grab the spin_lock */ 838 /* GOOD, everything prepared, grab the spin_lock */
786 spin_lock_irq(&mdev->req_lock); 839 spin_lock_irq(&mdev->req_lock);
787 840
788 if (mdev->state.susp) { 841 if (is_susp(mdev->state)) {
789 /* If we got suspended, use the retry mechanism of 842 /* If we got suspended, use the retry mechanism of
790 generic_make_request() to restart processing of this 843 generic_make_request() to restart processing of this
791 bio. In the next call to drbd_make_request_26 844 bio. In the next call to drbd_make_request_26
@@ -867,30 +920,10 @@ allocate_barrier:
867 /* check this request on the collision detection hash tables. 920 /* check this request on the collision detection hash tables.
868 * if we have a conflict, just complete it here. 921 * if we have a conflict, just complete it here.
869 * THINK do we want to check reads, too? (I don't think so...) */ 922 * THINK do we want to check reads, too? (I don't think so...) */
870 if (rw == WRITE && _req_conflicts(req)) { 923 if (rw == WRITE && _req_conflicts(req))
871 /* this is a conflicting request. 924 goto fail_conflicting;
872 * even though it may have been only _partially_ 925
873 * overlapping with one of the currently pending requests, 926 list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);
874 * without even submitting or sending it, we will
875 * pretend that it was successfully served right now.
876 */
877 if (local) {
878 bio_put(req->private_bio);
879 req->private_bio = NULL;
880 drbd_al_complete_io(mdev, req->sector);
881 put_ldev(mdev);
882 local = 0;
883 }
884 if (remote)
885 dec_ap_pending(mdev);
886 _drbd_end_io_acct(mdev, req);
887 /* THINK: do we want to fail it (-EIO), or pretend success? */
888 bio_endio(req->master_bio, 0);
889 req->master_bio = NULL;
890 dec_ap_bio(mdev);
891 drbd_req_free(req);
892 remote = 0;
893 }
894 927
895 /* NOTE remote first: to get the concurrent write detection right, 928 /* NOTE remote first: to get the concurrent write detection right,
896 * we must register the request before start of local IO. */ 929 * we must register the request before start of local IO. */
@@ -923,6 +956,21 @@ allocate_barrier:
923 956
924 return 0; 957 return 0;
925 958
959fail_conflicting:
960 /* this is a conflicting request.
961 * even though it may have been only _partially_
962 * overlapping with one of the currently pending requests,
963 * without even submitting or sending it, we will
964 * pretend that it was successfully served right now.
965 */
966 _drbd_end_io_acct(mdev, req);
967 spin_unlock_irq(&mdev->req_lock);
968 if (remote)
969 dec_ap_pending(mdev);
970 /* THINK: do we want to fail it (-EIO), or pretend success?
971 * this pretends success. */
972 err = 0;
973
926fail_free_complete: 974fail_free_complete:
927 if (rw == WRITE && local) 975 if (rw == WRITE && local)
928 drbd_al_complete_io(mdev, sector); 976 drbd_al_complete_io(mdev, sector);
@@ -961,21 +1009,6 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
961 return 1; 1009 return 1;
962 } 1010 }
963 1011
964 /*
965 * Paranoia: we might have been primary, but sync target, or
966 * even diskless, then lost the connection.
967 * This should have been handled (panic? suspend?) somewhere
968 * else. But maybe it was not, so check again here.
969 * Caution: as long as we do not have a read/write lock on mdev,
970 * to serialize state changes, this is racy, since we may lose
971 * the connection *after* we test for the cstate.
972 */
973 if (mdev->state.disk < D_UP_TO_DATE && mdev->state.pdsk < D_UP_TO_DATE) {
974 if (__ratelimit(&drbd_ratelimit_state))
975 dev_err(DEV, "Sorry, I have no access to good data anymore.\n");
976 return 1;
977 }
978
979 return 0; 1012 return 0;
980} 1013}
981 1014
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 02d575d24518..181ea0364822 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -104,6 +104,9 @@ enum drbd_req_event {
104 read_ahead_completed_with_error, 104 read_ahead_completed_with_error,
105 write_completed_with_error, 105 write_completed_with_error,
106 completed_ok, 106 completed_ok,
107 resend,
108 fail_frozen_disk_io,
109 restart_frozen_disk_io,
107 nothing, /* for tracing only */ 110 nothing, /* for tracing only */
108}; 111};
109 112
@@ -183,6 +186,12 @@ enum drbd_req_state_bits {
183 186
184 /* keep this last, its for the RQ_NET_MASK */ 187 /* keep this last, its for the RQ_NET_MASK */
185 __RQ_NET_MAX, 188 __RQ_NET_MAX,
189
190 /* Set when this is a write, clear for a read */
191 __RQ_WRITE,
192
193 /* Should call drbd_al_complete_io() for this request... */
194 __RQ_IN_ACT_LOG,
186}; 195};
187 196
188#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) 197#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING)
@@ -201,6 +210,16 @@ enum drbd_req_state_bits {
201/* 0x1f8 */ 210/* 0x1f8 */
202#define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK) 211#define RQ_NET_MASK (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
203 212
213#define RQ_WRITE (1UL << __RQ_WRITE)
214#define RQ_IN_ACT_LOG (1UL << __RQ_IN_ACT_LOG)
215
216/* For waking up the frozen transfer log mod_req() has to return if the request
217 should be counted in the epoch object*/
218#define MR_WRITE_SHIFT 0
219#define MR_WRITE (1 << MR_WRITE_SHIFT)
220#define MR_READ_SHIFT 1
221#define MR_READ (1 << MR_READ_SHIFT)
222
204/* epoch entries */ 223/* epoch entries */
205static inline 224static inline
206struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector) 225struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
@@ -244,30 +263,36 @@ static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
244 return NULL; 263 return NULL;
245} 264}
246 265
266static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
267{
268 struct bio *bio;
269 bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
270
271 req->private_bio = bio;
272
273 bio->bi_private = req;
274 bio->bi_end_io = drbd_endio_pri;
275 bio->bi_next = NULL;
276}
277
247static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev, 278static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
248 struct bio *bio_src) 279 struct bio *bio_src)
249{ 280{
250 struct bio *bio;
251 struct drbd_request *req = 281 struct drbd_request *req =
252 mempool_alloc(drbd_request_mempool, GFP_NOIO); 282 mempool_alloc(drbd_request_mempool, GFP_NOIO);
253 if (likely(req)) { 283 if (likely(req)) {
254 bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */ 284 drbd_req_make_private_bio(req, bio_src);
255 285
256 req->rq_state = 0; 286 req->rq_state = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
257 req->mdev = mdev; 287 req->mdev = mdev;
258 req->master_bio = bio_src; 288 req->master_bio = bio_src;
259 req->private_bio = bio;
260 req->epoch = 0; 289 req->epoch = 0;
261 req->sector = bio->bi_sector; 290 req->sector = bio_src->bi_sector;
262 req->size = bio->bi_size; 291 req->size = bio_src->bi_size;
263 req->start_time = jiffies; 292 req->start_time = jiffies;
264 INIT_HLIST_NODE(&req->colision); 293 INIT_HLIST_NODE(&req->colision);
265 INIT_LIST_HEAD(&req->tl_requests); 294 INIT_LIST_HEAD(&req->tl_requests);
266 INIT_LIST_HEAD(&req->w.list); 295 INIT_LIST_HEAD(&req->w.list);
267
268 bio->bi_private = req;
269 bio->bi_end_io = drbd_endio_pri;
270 bio->bi_next = NULL;
271 } 296 }
272 return req; 297 return req;
273} 298}
@@ -292,36 +317,43 @@ struct bio_and_error {
292 317
293extern void _req_may_be_done(struct drbd_request *req, 318extern void _req_may_be_done(struct drbd_request *req,
294 struct bio_and_error *m); 319 struct bio_and_error *m);
295extern void __req_mod(struct drbd_request *req, enum drbd_req_event what, 320extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
296 struct bio_and_error *m); 321 struct bio_and_error *m);
297extern void complete_master_bio(struct drbd_conf *mdev, 322extern void complete_master_bio(struct drbd_conf *mdev,
298 struct bio_and_error *m); 323 struct bio_and_error *m);
299 324
300/* use this if you don't want to deal with calling complete_master_bio() 325/* use this if you don't want to deal with calling complete_master_bio()
301 * outside the spinlock, e.g. when walking some list on cleanup. */ 326 * outside the spinlock, e.g. when walking some list on cleanup. */
302static inline void _req_mod(struct drbd_request *req, enum drbd_req_event what) 327static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
303{ 328{
304 struct drbd_conf *mdev = req->mdev; 329 struct drbd_conf *mdev = req->mdev;
305 struct bio_and_error m; 330 struct bio_and_error m;
331 int rv;
306 332
307 /* __req_mod possibly frees req, do not touch req after that! */ 333 /* __req_mod possibly frees req, do not touch req after that! */
308 __req_mod(req, what, &m); 334 rv = __req_mod(req, what, &m);
309 if (m.bio) 335 if (m.bio)
310 complete_master_bio(mdev, &m); 336 complete_master_bio(mdev, &m);
337
338 return rv;
311} 339}
312 340
313/* completion of master bio is outside of spinlock. 341/* completion of master bio is outside of spinlock.
314 * If you need it irqsave, do it your self! */ 342 * If you need it irqsave, do it your self! */
315static inline void req_mod(struct drbd_request *req, 343static inline int req_mod(struct drbd_request *req,
316 enum drbd_req_event what) 344 enum drbd_req_event what)
317{ 345{
318 struct drbd_conf *mdev = req->mdev; 346 struct drbd_conf *mdev = req->mdev;
319 struct bio_and_error m; 347 struct bio_and_error m;
348 int rv;
349
320 spin_lock_irq(&mdev->req_lock); 350 spin_lock_irq(&mdev->req_lock);
321 __req_mod(req, what, &m); 351 rv = __req_mod(req, what, &m);
322 spin_unlock_irq(&mdev->req_lock); 352 spin_unlock_irq(&mdev->req_lock);
323 353
324 if (m.bio) 354 if (m.bio)
325 complete_master_bio(mdev, &m); 355 complete_master_bio(mdev, &m);
356
357 return rv;
326} 358}
327#endif 359#endif
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index ca4a16cea2d8..108d58015cd1 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -39,8 +39,6 @@
39#include "drbd_int.h" 39#include "drbd_int.h"
40#include "drbd_req.h" 40#include "drbd_req.h"
41 41
42#define SLEEP_TIME (HZ/10)
43
44static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel); 42static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
45 43
46 44
@@ -217,10 +215,8 @@ void drbd_endio_sec(struct bio *bio, int error)
217 */ 215 */
218void drbd_endio_pri(struct bio *bio, int error) 216void drbd_endio_pri(struct bio *bio, int error)
219{ 217{
220 unsigned long flags;
221 struct drbd_request *req = bio->bi_private; 218 struct drbd_request *req = bio->bi_private;
222 struct drbd_conf *mdev = req->mdev; 219 struct drbd_conf *mdev = req->mdev;
223 struct bio_and_error m;
224 enum drbd_req_event what; 220 enum drbd_req_event what;
225 int uptodate = bio_flagged(bio, BIO_UPTODATE); 221 int uptodate = bio_flagged(bio, BIO_UPTODATE);
226 222
@@ -246,12 +242,7 @@ void drbd_endio_pri(struct bio *bio, int error)
246 bio_put(req->private_bio); 242 bio_put(req->private_bio);
247 req->private_bio = ERR_PTR(error); 243 req->private_bio = ERR_PTR(error);
248 244
249 spin_lock_irqsave(&mdev->req_lock, flags); 245 req_mod(req, what);
250 __req_mod(req, what, &m);
251 spin_unlock_irqrestore(&mdev->req_lock, flags);
252
253 if (m.bio)
254 complete_master_bio(mdev, &m);
255} 246}
256 247
257int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 248int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
@@ -376,54 +367,145 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
376 struct drbd_epoch_entry *e; 367 struct drbd_epoch_entry *e;
377 368
378 if (!get_ldev(mdev)) 369 if (!get_ldev(mdev))
379 return 0; 370 return -EIO;
371
372 if (drbd_rs_should_slow_down(mdev))
373 goto defer;
380 374
381 /* GFP_TRY, because if there is no memory available right now, this may 375 /* GFP_TRY, because if there is no memory available right now, this may
382 * be rescheduled for later. It is "only" background resync, after all. */ 376 * be rescheduled for later. It is "only" background resync, after all. */
383 e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY); 377 e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY);
384 if (!e) 378 if (!e)
385 goto fail; 379 goto defer;
386 380
381 e->w.cb = w_e_send_csum;
387 spin_lock_irq(&mdev->req_lock); 382 spin_lock_irq(&mdev->req_lock);
388 list_add(&e->w.list, &mdev->read_ee); 383 list_add(&e->w.list, &mdev->read_ee);
389 spin_unlock_irq(&mdev->req_lock); 384 spin_unlock_irq(&mdev->req_lock);
390 385
391 e->w.cb = w_e_send_csum; 386 atomic_add(size >> 9, &mdev->rs_sect_ev);
392 if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0) 387 if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
393 return 1; 388 return 0;
389
390 /* drbd_submit_ee currently fails for one reason only:
391 * not being able to allocate enough bios.
392 * Is dropping the connection going to help? */
393 spin_lock_irq(&mdev->req_lock);
394 list_del(&e->w.list);
395 spin_unlock_irq(&mdev->req_lock);
394 396
395 drbd_free_ee(mdev, e); 397 drbd_free_ee(mdev, e);
396fail: 398defer:
397 put_ldev(mdev); 399 put_ldev(mdev);
398 return 2; 400 return -EAGAIN;
399} 401}
400 402
401void resync_timer_fn(unsigned long data) 403void resync_timer_fn(unsigned long data)
402{ 404{
403 unsigned long flags;
404 struct drbd_conf *mdev = (struct drbd_conf *) data; 405 struct drbd_conf *mdev = (struct drbd_conf *) data;
405 int queue; 406 int queue;
406 407
407 spin_lock_irqsave(&mdev->req_lock, flags); 408 queue = 1;
408 409 switch (mdev->state.conn) {
409 if (likely(!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))) { 410 case C_VERIFY_S:
410 queue = 1; 411 mdev->resync_work.cb = w_make_ov_request;
411 if (mdev->state.conn == C_VERIFY_S) 412 break;
412 mdev->resync_work.cb = w_make_ov_request; 413 case C_SYNC_TARGET:
413 else 414 mdev->resync_work.cb = w_make_resync_request;
414 mdev->resync_work.cb = w_make_resync_request; 415 break;
415 } else { 416 default:
416 queue = 0; 417 queue = 0;
417 mdev->resync_work.cb = w_resync_inactive; 418 mdev->resync_work.cb = w_resync_inactive;
418 } 419 }
419 420
420 spin_unlock_irqrestore(&mdev->req_lock, flags);
421
422 /* harmless race: list_empty outside data.work.q_lock */ 421 /* harmless race: list_empty outside data.work.q_lock */
423 if (list_empty(&mdev->resync_work.list) && queue) 422 if (list_empty(&mdev->resync_work.list) && queue)
424 drbd_queue_work(&mdev->data.work, &mdev->resync_work); 423 drbd_queue_work(&mdev->data.work, &mdev->resync_work);
425} 424}
426 425
426static void fifo_set(struct fifo_buffer *fb, int value)
427{
428 int i;
429
430 for (i = 0; i < fb->size; i++)
431 fb->values[i] = value;
432}
433
434static int fifo_push(struct fifo_buffer *fb, int value)
435{
436 int ov;
437
438 ov = fb->values[fb->head_index];
439 fb->values[fb->head_index++] = value;
440
441 if (fb->head_index >= fb->size)
442 fb->head_index = 0;
443
444 return ov;
445}
446
447static void fifo_add_val(struct fifo_buffer *fb, int value)
448{
449 int i;
450
451 for (i = 0; i < fb->size; i++)
452 fb->values[i] += value;
453}
454
455int drbd_rs_controller(struct drbd_conf *mdev)
456{
457 unsigned int sect_in; /* Number of sectors that came in since the last turn */
458 unsigned int want; /* The number of sectors we want in the proxy */
459 int req_sect; /* Number of sectors to request in this turn */
460 int correction; /* Number of sectors more we need in the proxy*/
461 int cps; /* correction per invocation of drbd_rs_controller() */
462 int steps; /* Number of time steps to plan ahead */
463 int curr_corr;
464 int max_sect;
465
466 sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
467 mdev->rs_in_flight -= sect_in;
468
469 spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */
470
471 steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
472
473 if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
474 want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps;
475 } else { /* normal path */
476 want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target :
477 sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10);
478 }
479
480 correction = want - mdev->rs_in_flight - mdev->rs_planed;
481
482 /* Plan ahead */
483 cps = correction / steps;
484 fifo_add_val(&mdev->rs_plan_s, cps);
485 mdev->rs_planed += cps * steps;
486
487 /* What we do in this step */
488 curr_corr = fifo_push(&mdev->rs_plan_s, 0);
489 spin_unlock(&mdev->peer_seq_lock);
490 mdev->rs_planed -= curr_corr;
491
492 req_sect = sect_in + curr_corr;
493 if (req_sect < 0)
494 req_sect = 0;
495
496 max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ;
497 if (req_sect > max_sect)
498 req_sect = max_sect;
499
500 /*
501 dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
502 sect_in, mdev->rs_in_flight, want, correction,
503 steps, cps, mdev->rs_planed, curr_corr, req_sect);
504 */
505
506 return req_sect;
507}
508
427int w_make_resync_request(struct drbd_conf *mdev, 509int w_make_resync_request(struct drbd_conf *mdev,
428 struct drbd_work *w, int cancel) 510 struct drbd_work *w, int cancel)
429{ 511{
@@ -431,8 +513,9 @@ int w_make_resync_request(struct drbd_conf *mdev,
431 sector_t sector; 513 sector_t sector;
432 const sector_t capacity = drbd_get_capacity(mdev->this_bdev); 514 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
433 int max_segment_size; 515 int max_segment_size;
434 int number, i, size, pe, mx; 516 int number, rollback_i, size, pe, mx;
435 int align, queued, sndbuf; 517 int align, queued, sndbuf;
518 int i = 0;
436 519
437 if (unlikely(cancel)) 520 if (unlikely(cancel))
438 return 1; 521 return 1;
@@ -446,6 +529,12 @@ int w_make_resync_request(struct drbd_conf *mdev,
446 dev_err(DEV, "%s in w_make_resync_request\n", 529 dev_err(DEV, "%s in w_make_resync_request\n",
447 drbd_conn_str(mdev->state.conn)); 530 drbd_conn_str(mdev->state.conn));
448 531
532 if (mdev->rs_total == 0) {
533 /* empty resync? */
534 drbd_resync_finished(mdev);
535 return 1;
536 }
537
449 if (!get_ldev(mdev)) { 538 if (!get_ldev(mdev)) {
450 /* Since we only need to access mdev->rsync a 539 /* Since we only need to access mdev->rsync a
451 get_ldev_if_state(mdev,D_FAILED) would be sufficient, but 540 get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
@@ -458,11 +547,25 @@ int w_make_resync_request(struct drbd_conf *mdev,
458 547
459 /* starting with drbd 8.3.8, we can handle multi-bio EEs, 548 /* starting with drbd 8.3.8, we can handle multi-bio EEs,
460 * if it should be necessary */ 549 * if it should be necessary */
461 max_segment_size = mdev->agreed_pro_version < 94 ? 550 max_segment_size =
462 queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE; 551 mdev->agreed_pro_version < 94 ? queue_max_segment_size(mdev->rq_queue) :
552 mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_SEGMENT_SIZE;
463 553
464 number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE / 1024) * HZ); 554 if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
465 pe = atomic_read(&mdev->rs_pending_cnt); 555 number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
556 mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
557 } else {
558 mdev->c_sync_rate = mdev->sync_conf.rate;
559 number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
560 }
561
562 /* Throttle resync on lower level disk activity, which may also be
563 * caused by application IO on Primary/SyncTarget.
564 * Keep this after the call to drbd_rs_controller, as that assumes
565 * to be called as precisely as possible every SLEEP_TIME,
566 * and would be confused otherwise. */
567 if (drbd_rs_should_slow_down(mdev))
568 goto requeue;
466 569
467 mutex_lock(&mdev->data.mutex); 570 mutex_lock(&mdev->data.mutex);
468 if (mdev->data.socket) 571 if (mdev->data.socket)
@@ -476,6 +579,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
476 mx = number; 579 mx = number;
477 580
478 /* Limit the number of pending RS requests to no more than the peer's receive buffer */ 581 /* Limit the number of pending RS requests to no more than the peer's receive buffer */
582 pe = atomic_read(&mdev->rs_pending_cnt);
479 if ((pe + number) > mx) { 583 if ((pe + number) > mx) {
480 number = mx - pe; 584 number = mx - pe;
481 } 585 }
@@ -526,6 +630,7 @@ next_sector:
526 * be prepared for all stripe sizes of software RAIDs. 630 * be prepared for all stripe sizes of software RAIDs.
527 */ 631 */
528 align = 1; 632 align = 1;
633 rollback_i = i;
529 for (;;) { 634 for (;;) {
530 if (size + BM_BLOCK_SIZE > max_segment_size) 635 if (size + BM_BLOCK_SIZE > max_segment_size)
531 break; 636 break;
@@ -561,14 +666,19 @@ next_sector:
561 size = (capacity-sector)<<9; 666 size = (capacity-sector)<<9;
562 if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) { 667 if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) {
563 switch (read_for_csum(mdev, sector, size)) { 668 switch (read_for_csum(mdev, sector, size)) {
564 case 0: /* Disk failure*/ 669 case -EIO: /* Disk failure */
565 put_ldev(mdev); 670 put_ldev(mdev);
566 return 0; 671 return 0;
567 case 2: /* Allocation failed */ 672 case -EAGAIN: /* allocation failed, or ldev busy */
568 drbd_rs_complete_io(mdev, sector); 673 drbd_rs_complete_io(mdev, sector);
569 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector); 674 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
675 i = rollback_i;
570 goto requeue; 676 goto requeue;
571 /* case 1: everything ok */ 677 case 0:
678 /* everything ok */
679 break;
680 default:
681 BUG();
572 } 682 }
573 } else { 683 } else {
574 inc_rs_pending(mdev); 684 inc_rs_pending(mdev);
@@ -595,6 +705,7 @@ next_sector:
595 } 705 }
596 706
597 requeue: 707 requeue:
708 mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
598 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME); 709 mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
599 put_ldev(mdev); 710 put_ldev(mdev);
600 return 1; 711 return 1;
@@ -670,6 +781,14 @@ static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int ca
670 return 1; 781 return 1;
671} 782}
672 783
784static void ping_peer(struct drbd_conf *mdev)
785{
786 clear_bit(GOT_PING_ACK, &mdev->flags);
787 request_ping(mdev);
788 wait_event(mdev->misc_wait,
789 test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED);
790}
791
673int drbd_resync_finished(struct drbd_conf *mdev) 792int drbd_resync_finished(struct drbd_conf *mdev)
674{ 793{
675 unsigned long db, dt, dbdt; 794 unsigned long db, dt, dbdt;
@@ -709,6 +828,8 @@ int drbd_resync_finished(struct drbd_conf *mdev)
709 if (!get_ldev(mdev)) 828 if (!get_ldev(mdev))
710 goto out; 829 goto out;
711 830
831 ping_peer(mdev);
832
712 spin_lock_irq(&mdev->req_lock); 833 spin_lock_irq(&mdev->req_lock);
713 os = mdev->state; 834 os = mdev->state;
714 835
@@ -801,6 +922,8 @@ out:
801 mdev->rs_paused = 0; 922 mdev->rs_paused = 0;
802 mdev->ov_start_sector = 0; 923 mdev->ov_start_sector = 0;
803 924
925 drbd_md_sync(mdev);
926
804 if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) { 927 if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
805 dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n"); 928 dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n");
806 drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished"); 929 drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
@@ -817,9 +940,13 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_ent
817{ 940{
818 if (drbd_ee_has_active_page(e)) { 941 if (drbd_ee_has_active_page(e)) {
819 /* This might happen if sendpage() has not finished */ 942 /* This might happen if sendpage() has not finished */
943 int i = (e->size + PAGE_SIZE -1) >> PAGE_SHIFT;
944 atomic_add(i, &mdev->pp_in_use_by_net);
945 atomic_sub(i, &mdev->pp_in_use);
820 spin_lock_irq(&mdev->req_lock); 946 spin_lock_irq(&mdev->req_lock);
821 list_add_tail(&e->w.list, &mdev->net_ee); 947 list_add_tail(&e->w.list, &mdev->net_ee);
822 spin_unlock_irq(&mdev->req_lock); 948 spin_unlock_irq(&mdev->req_lock);
949 wake_up(&drbd_pp_wait);
823 } else 950 } else
824 drbd_free_ee(mdev, e); 951 drbd_free_ee(mdev, e);
825} 952}
@@ -926,9 +1053,12 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
926 return 1; 1053 return 1;
927 } 1054 }
928 1055
929 drbd_rs_complete_io(mdev, e->sector); 1056 if (get_ldev(mdev)) {
1057 drbd_rs_complete_io(mdev, e->sector);
1058 put_ldev(mdev);
1059 }
930 1060
931 di = (struct digest_info *)(unsigned long)e->block_id; 1061 di = e->digest;
932 1062
933 if (likely((e->flags & EE_WAS_ERROR) == 0)) { 1063 if (likely((e->flags & EE_WAS_ERROR) == 0)) {
934 /* quick hack to try to avoid a race against reconfiguration. 1064 /* quick hack to try to avoid a race against reconfiguration.
@@ -952,7 +1082,9 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
952 ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e); 1082 ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
953 } else { 1083 } else {
954 inc_rs_pending(mdev); 1084 inc_rs_pending(mdev);
955 e->block_id = ID_SYNCER; 1085 e->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1086 e->flags &= ~EE_HAS_DIGEST; /* This e no longer has a digest pointer */
1087 kfree(di);
956 ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e); 1088 ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
957 } 1089 }
958 } else { 1090 } else {
@@ -962,9 +1094,6 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
962 } 1094 }
963 1095
964 dec_unacked(mdev); 1096 dec_unacked(mdev);
965
966 kfree(di);
967
968 move_to_net_ee_or_free(mdev, e); 1097 move_to_net_ee_or_free(mdev, e);
969 1098
970 if (unlikely(!ok)) 1099 if (unlikely(!ok))
@@ -1034,9 +1163,12 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1034 1163
1035 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all 1164 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1036 * the resync lru has been cleaned up already */ 1165 * the resync lru has been cleaned up already */
1037 drbd_rs_complete_io(mdev, e->sector); 1166 if (get_ldev(mdev)) {
1167 drbd_rs_complete_io(mdev, e->sector);
1168 put_ldev(mdev);
1169 }
1038 1170
1039 di = (struct digest_info *)(unsigned long)e->block_id; 1171 di = e->digest;
1040 1172
1041 if (likely((e->flags & EE_WAS_ERROR) == 0)) { 1173 if (likely((e->flags & EE_WAS_ERROR) == 0)) {
1042 digest_size = crypto_hash_digestsize(mdev->verify_tfm); 1174 digest_size = crypto_hash_digestsize(mdev->verify_tfm);
@@ -1055,9 +1187,6 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1055 } 1187 }
1056 1188
1057 dec_unacked(mdev); 1189 dec_unacked(mdev);
1058
1059 kfree(di);
1060
1061 if (!eq) 1190 if (!eq)
1062 drbd_ov_oos_found(mdev, e->sector, e->size); 1191 drbd_ov_oos_found(mdev, e->sector, e->size);
1063 else 1192 else
@@ -1108,7 +1237,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1108 * dec_ap_pending will be done in got_BarrierAck 1237 * dec_ap_pending will be done in got_BarrierAck
1109 * or (on connection loss) in w_clear_epoch. */ 1238 * or (on connection loss) in w_clear_epoch. */
1110 ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER, 1239 ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER,
1111 (struct p_header *)p, sizeof(*p), 0); 1240 (struct p_header80 *)p, sizeof(*p), 0);
1112 drbd_put_data_sock(mdev); 1241 drbd_put_data_sock(mdev);
1113 1242
1114 return ok; 1243 return ok;
@@ -1173,6 +1302,24 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1173 return ok; 1302 return ok;
1174} 1303}
1175 1304
1305int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1306{
1307 struct drbd_request *req = container_of(w, struct drbd_request, w);
1308
1309 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
1310 drbd_al_begin_io(mdev, req->sector);
1311 /* Calling drbd_al_begin_io() out of the worker might deadlocks
1312 theoretically. Practically it can not deadlock, since this is
1313 only used when unfreezing IOs. All the extents of the requests
1314 that made it into the TL are already active */
1315
1316 drbd_req_make_private_bio(req, req->master_bio);
1317 req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
1318 generic_make_request(req->private_bio);
1319
1320 return 1;
1321}
1322
1176static int _drbd_may_sync_now(struct drbd_conf *mdev) 1323static int _drbd_may_sync_now(struct drbd_conf *mdev)
1177{ 1324{
1178 struct drbd_conf *odev = mdev; 1325 struct drbd_conf *odev = mdev;
@@ -1298,14 +1445,6 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na)
1298 return retcode; 1445 return retcode;
1299} 1446}
1300 1447
1301static void ping_peer(struct drbd_conf *mdev)
1302{
1303 clear_bit(GOT_PING_ACK, &mdev->flags);
1304 request_ping(mdev);
1305 wait_event(mdev->misc_wait,
1306 test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED);
1307}
1308
1309/** 1448/**
1310 * drbd_start_resync() - Start the resync process 1449 * drbd_start_resync() - Start the resync process
1311 * @mdev: DRBD device. 1450 * @mdev: DRBD device.
@@ -1379,13 +1518,21 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1379 r = SS_UNKNOWN_ERROR; 1518 r = SS_UNKNOWN_ERROR;
1380 1519
1381 if (r == SS_SUCCESS) { 1520 if (r == SS_SUCCESS) {
1382 mdev->rs_total = 1521 unsigned long tw = drbd_bm_total_weight(mdev);
1383 mdev->rs_mark_left = drbd_bm_total_weight(mdev); 1522 unsigned long now = jiffies;
1523 int i;
1524
1384 mdev->rs_failed = 0; 1525 mdev->rs_failed = 0;
1385 mdev->rs_paused = 0; 1526 mdev->rs_paused = 0;
1386 mdev->rs_start =
1387 mdev->rs_mark_time = jiffies;
1388 mdev->rs_same_csum = 0; 1527 mdev->rs_same_csum = 0;
1528 mdev->rs_last_events = 0;
1529 mdev->rs_last_sect_ev = 0;
1530 mdev->rs_total = tw;
1531 mdev->rs_start = now;
1532 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
1533 mdev->rs_mark_left[i] = tw;
1534 mdev->rs_mark_time[i] = now;
1535 }
1389 _drbd_pause_after(mdev); 1536 _drbd_pause_after(mdev);
1390 } 1537 }
1391 write_unlock_irq(&global_state_lock); 1538 write_unlock_irq(&global_state_lock);
@@ -1397,12 +1544,31 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
1397 (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10), 1544 (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
1398 (unsigned long) mdev->rs_total); 1545 (unsigned long) mdev->rs_total);
1399 1546
1400 if (mdev->rs_total == 0) { 1547 if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) {
1401 /* Peer still reachable? Beware of failing before-resync-target handlers! */ 1548 /* This still has a race (about when exactly the peers
1402 ping_peer(mdev); 1549 * detect connection loss) that can lead to a full sync
1550 * on next handshake. In 8.3.9 we fixed this with explicit
1551 * resync-finished notifications, but the fix
1552 * introduces a protocol change. Sleeping for some
1553 * time longer than the ping interval + timeout on the
1554 * SyncSource, to give the SyncTarget the chance to
1555 * detect connection loss, then waiting for a ping
1556 * response (implicit in drbd_resync_finished) reduces
1557 * the race considerably, but does not solve it. */
1558 if (side == C_SYNC_SOURCE)
1559 schedule_timeout_interruptible(
1560 mdev->net_conf->ping_int * HZ +
1561 mdev->net_conf->ping_timeo*HZ/9);
1403 drbd_resync_finished(mdev); 1562 drbd_resync_finished(mdev);
1404 } 1563 }
1405 1564
1565 atomic_set(&mdev->rs_sect_in, 0);
1566 atomic_set(&mdev->rs_sect_ev, 0);
1567 mdev->rs_in_flight = 0;
1568 mdev->rs_planed = 0;
1569 spin_lock(&mdev->peer_seq_lock);
1570 fifo_set(&mdev->rs_plan_s, 0);
1571 spin_unlock(&mdev->peer_seq_lock);
1406 /* ns.conn may already be != mdev->state.conn, 1572 /* ns.conn may already be != mdev->state.conn,
1407 * we may have been paused in between, or become paused until 1573 * we may have been paused in between, or become paused until
1408 * the timer triggers. 1574 * the timer triggers.
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 3b57459bb745..767107cce982 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -258,8 +258,8 @@ static int irqdma_allocated;
258#include <linux/completion.h> 258#include <linux/completion.h>
259 259
260static struct request *current_req; 260static struct request *current_req;
261static struct request_queue *floppy_queue;
262static void do_fd_request(struct request_queue *q); 261static void do_fd_request(struct request_queue *q);
262static int set_next_request(void);
263 263
264#ifndef fd_get_dma_residue 264#ifndef fd_get_dma_residue
265#define fd_get_dma_residue() get_dma_residue(FLOPPY_DMA) 265#define fd_get_dma_residue() get_dma_residue(FLOPPY_DMA)
@@ -413,6 +413,7 @@ static struct gendisk *disks[N_DRIVE];
413static struct block_device *opened_bdev[N_DRIVE]; 413static struct block_device *opened_bdev[N_DRIVE];
414static DEFINE_MUTEX(open_lock); 414static DEFINE_MUTEX(open_lock);
415static struct floppy_raw_cmd *raw_cmd, default_raw_cmd; 415static struct floppy_raw_cmd *raw_cmd, default_raw_cmd;
416static int fdc_queue;
416 417
417/* 418/*
418 * This struct defines the different floppy types. 419 * This struct defines the different floppy types.
@@ -890,8 +891,8 @@ static void unlock_fdc(void)
890 del_timer(&fd_timeout); 891 del_timer(&fd_timeout);
891 cont = NULL; 892 cont = NULL;
892 clear_bit(0, &fdc_busy); 893 clear_bit(0, &fdc_busy);
893 if (current_req || blk_peek_request(floppy_queue)) 894 if (current_req || set_next_request())
894 do_fd_request(floppy_queue); 895 do_fd_request(current_req->q);
895 spin_unlock_irqrestore(&floppy_lock, flags); 896 spin_unlock_irqrestore(&floppy_lock, flags);
896 wake_up(&fdc_wait); 897 wake_up(&fdc_wait);
897} 898}
@@ -2243,8 +2244,8 @@ static void floppy_end_request(struct request *req, int error)
2243 * logical buffer */ 2244 * logical buffer */
2244static void request_done(int uptodate) 2245static void request_done(int uptodate)
2245{ 2246{
2246 struct request_queue *q = floppy_queue;
2247 struct request *req = current_req; 2247 struct request *req = current_req;
2248 struct request_queue *q;
2248 unsigned long flags; 2249 unsigned long flags;
2249 int block; 2250 int block;
2250 char msg[sizeof("request done ") + sizeof(int) * 3]; 2251 char msg[sizeof("request done ") + sizeof(int) * 3];
@@ -2258,6 +2259,8 @@ static void request_done(int uptodate)
2258 return; 2259 return;
2259 } 2260 }
2260 2261
2262 q = req->q;
2263
2261 if (uptodate) { 2264 if (uptodate) {
2262 /* maintain values for invalidation on geometry 2265 /* maintain values for invalidation on geometry
2263 * change */ 2266 * change */
@@ -2811,6 +2814,28 @@ static int make_raw_rw_request(void)
2811 return 2; 2814 return 2;
2812} 2815}
2813 2816
2817/*
2818 * Round-robin between our available drives, doing one request from each
2819 */
2820static int set_next_request(void)
2821{
2822 struct request_queue *q;
2823 int old_pos = fdc_queue;
2824
2825 do {
2826 q = disks[fdc_queue]->queue;
2827 if (++fdc_queue == N_DRIVE)
2828 fdc_queue = 0;
2829 if (q) {
2830 current_req = blk_fetch_request(q);
2831 if (current_req)
2832 break;
2833 }
2834 } while (fdc_queue != old_pos);
2835
2836 return current_req != NULL;
2837}
2838
2814static void redo_fd_request(void) 2839static void redo_fd_request(void)
2815{ 2840{
2816 int drive; 2841 int drive;
@@ -2822,17 +2847,17 @@ static void redo_fd_request(void)
2822 2847
2823do_request: 2848do_request:
2824 if (!current_req) { 2849 if (!current_req) {
2825 struct request *req; 2850 int pending;
2851
2852 spin_lock_irq(&floppy_lock);
2853 pending = set_next_request();
2854 spin_unlock_irq(&floppy_lock);
2826 2855
2827 spin_lock_irq(floppy_queue->queue_lock); 2856 if (!pending) {
2828 req = blk_fetch_request(floppy_queue);
2829 spin_unlock_irq(floppy_queue->queue_lock);
2830 if (!req) {
2831 do_floppy = NULL; 2857 do_floppy = NULL;
2832 unlock_fdc(); 2858 unlock_fdc();
2833 return; 2859 return;
2834 } 2860 }
2835 current_req = req;
2836 } 2861 }
2837 drive = (long)current_req->rq_disk->private_data; 2862 drive = (long)current_req->rq_disk->private_data;
2838 set_fdc(drive); 2863 set_fdc(drive);
@@ -4165,6 +4190,13 @@ static int __init floppy_init(void)
4165 goto out_put_disk; 4190 goto out_put_disk;
4166 } 4191 }
4167 4192
4193 disks[dr]->queue = blk_init_queue(do_fd_request, &floppy_lock);
4194 if (!disks[dr]->queue) {
4195 err = -ENOMEM;
4196 goto out_put_disk;
4197 }
4198
4199 blk_queue_max_hw_sectors(disks[dr]->queue, 64);
4168 disks[dr]->major = FLOPPY_MAJOR; 4200 disks[dr]->major = FLOPPY_MAJOR;
4169 disks[dr]->first_minor = TOMINOR(dr); 4201 disks[dr]->first_minor = TOMINOR(dr);
4170 disks[dr]->fops = &floppy_fops; 4202 disks[dr]->fops = &floppy_fops;
@@ -4183,13 +4215,6 @@ static int __init floppy_init(void)
4183 if (err) 4215 if (err)
4184 goto out_unreg_blkdev; 4216 goto out_unreg_blkdev;
4185 4217
4186 floppy_queue = blk_init_queue(do_fd_request, &floppy_lock);
4187 if (!floppy_queue) {
4188 err = -ENOMEM;
4189 goto out_unreg_driver;
4190 }
4191 blk_queue_max_hw_sectors(floppy_queue, 64);
4192
4193 blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, 4218 blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE,
4194 floppy_find, NULL, NULL); 4219 floppy_find, NULL, NULL);
4195 4220
@@ -4317,7 +4342,6 @@ static int __init floppy_init(void)
4317 4342
4318 /* to be cleaned up... */ 4343 /* to be cleaned up... */
4319 disks[drive]->private_data = (void *)(long)drive; 4344 disks[drive]->private_data = (void *)(long)drive;
4320 disks[drive]->queue = floppy_queue;
4321 disks[drive]->flags |= GENHD_FL_REMOVABLE; 4345 disks[drive]->flags |= GENHD_FL_REMOVABLE;
4322 disks[drive]->driverfs_dev = &floppy_device[drive].dev; 4346 disks[drive]->driverfs_dev = &floppy_device[drive].dev;
4323 add_disk(disks[drive]); 4347 add_disk(disks[drive]);
@@ -4333,8 +4357,6 @@ out_flush_work:
4333 floppy_release_irq_and_dma(); 4357 floppy_release_irq_and_dma();
4334out_unreg_region: 4358out_unreg_region:
4335 blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); 4359 blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
4336 blk_cleanup_queue(floppy_queue);
4337out_unreg_driver:
4338 platform_driver_unregister(&floppy_driver); 4360 platform_driver_unregister(&floppy_driver);
4339out_unreg_blkdev: 4361out_unreg_blkdev:
4340 unregister_blkdev(FLOPPY_MAJOR, "fd"); 4362 unregister_blkdev(FLOPPY_MAJOR, "fd");
@@ -4342,6 +4364,8 @@ out_put_disk:
4342 while (dr--) { 4364 while (dr--) {
4343 del_timer(&motor_off_timer[dr]); 4365 del_timer(&motor_off_timer[dr]);
4344 put_disk(disks[dr]); 4366 put_disk(disks[dr]);
4367 if (disks[dr]->queue)
4368 blk_cleanup_queue(disks[dr]->queue);
4345 } 4369 }
4346 return err; 4370 return err;
4347} 4371}
@@ -4550,11 +4574,11 @@ static void __exit floppy_module_exit(void)
4550 platform_device_unregister(&floppy_device[drive]); 4574 platform_device_unregister(&floppy_device[drive]);
4551 } 4575 }
4552 put_disk(disks[drive]); 4576 put_disk(disks[drive]);
4577 blk_cleanup_queue(disks[drive]->queue);
4553 } 4578 }
4554 4579
4555 del_timer_sync(&fd_timeout); 4580 del_timer_sync(&fd_timeout);
4556 del_timer_sync(&fd_timer); 4581 del_timer_sync(&fd_timer);
4557 blk_cleanup_queue(floppy_queue);
4558 4582
4559 if (atomic_read(&usage_count)) 4583 if (atomic_read(&usage_count))
4560 floppy_release_irq_and_dma(); 4584 floppy_release_irq_and_dma();
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index a10c8c9b6b78..de3083b0a4f5 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -74,6 +74,7 @@
74#include <linux/highmem.h> 74#include <linux/highmem.h>
75#include <linux/kthread.h> 75#include <linux/kthread.h>
76#include <linux/splice.h> 76#include <linux/splice.h>
77#include <linux/sysfs.h>
77 78
78#include <asm/uaccess.h> 79#include <asm/uaccess.h>
79 80
@@ -738,6 +739,103 @@ static inline int is_loop_device(struct file *file)
738 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR; 739 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
739} 740}
740 741
742/* loop sysfs attributes */
743
744static ssize_t loop_attr_show(struct device *dev, char *page,
745 ssize_t (*callback)(struct loop_device *, char *))
746{
747 struct loop_device *l, *lo = NULL;
748
749 mutex_lock(&loop_devices_mutex);
750 list_for_each_entry(l, &loop_devices, lo_list)
751 if (disk_to_dev(l->lo_disk) == dev) {
752 lo = l;
753 break;
754 }
755 mutex_unlock(&loop_devices_mutex);
756
757 return lo ? callback(lo, page) : -EIO;
758}
759
760#define LOOP_ATTR_RO(_name) \
761static ssize_t loop_attr_##_name##_show(struct loop_device *, char *); \
762static ssize_t loop_attr_do_show_##_name(struct device *d, \
763 struct device_attribute *attr, char *b) \
764{ \
765 return loop_attr_show(d, b, loop_attr_##_name##_show); \
766} \
767static struct device_attribute loop_attr_##_name = \
768 __ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
769
770static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
771{
772 ssize_t ret;
773 char *p = NULL;
774
775 mutex_lock(&lo->lo_ctl_mutex);
776 if (lo->lo_backing_file)
777 p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
778 mutex_unlock(&lo->lo_ctl_mutex);
779
780 if (IS_ERR_OR_NULL(p))
781 ret = PTR_ERR(p);
782 else {
783 ret = strlen(p);
784 memmove(buf, p, ret);
785 buf[ret++] = '\n';
786 buf[ret] = 0;
787 }
788
789 return ret;
790}
791
792static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
793{
794 return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
795}
796
797static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
798{
799 return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
800}
801
802static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
803{
804 int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
805
806 return sprintf(buf, "%s\n", autoclear ? "1" : "0");
807}
808
809LOOP_ATTR_RO(backing_file);
810LOOP_ATTR_RO(offset);
811LOOP_ATTR_RO(sizelimit);
812LOOP_ATTR_RO(autoclear);
813
814static struct attribute *loop_attrs[] = {
815 &loop_attr_backing_file.attr,
816 &loop_attr_offset.attr,
817 &loop_attr_sizelimit.attr,
818 &loop_attr_autoclear.attr,
819 NULL,
820};
821
822static struct attribute_group loop_attribute_group = {
823 .name = "loop",
824 .attrs= loop_attrs,
825};
826
827static int loop_sysfs_init(struct loop_device *lo)
828{
829 return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
830 &loop_attribute_group);
831}
832
833static void loop_sysfs_exit(struct loop_device *lo)
834{
835 sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
836 &loop_attribute_group);
837}
838
741static int loop_set_fd(struct loop_device *lo, fmode_t mode, 839static int loop_set_fd(struct loop_device *lo, fmode_t mode,
742 struct block_device *bdev, unsigned int arg) 840 struct block_device *bdev, unsigned int arg)
743{ 841{
@@ -837,6 +935,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
837 935
838 set_capacity(lo->lo_disk, size); 936 set_capacity(lo->lo_disk, size);
839 bd_set_size(bdev, size << 9); 937 bd_set_size(bdev, size << 9);
938 loop_sysfs_init(lo);
840 /* let user-space know about the new size */ 939 /* let user-space know about the new size */
841 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 940 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
842 941
@@ -855,6 +954,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
855 return 0; 954 return 0;
856 955
857out_clr: 956out_clr:
957 loop_sysfs_exit(lo);
858 lo->lo_thread = NULL; 958 lo->lo_thread = NULL;
859 lo->lo_device = NULL; 959 lo->lo_device = NULL;
860 lo->lo_backing_file = NULL; 960 lo->lo_backing_file = NULL;
@@ -951,6 +1051,7 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
951 set_capacity(lo->lo_disk, 0); 1051 set_capacity(lo->lo_disk, 0);
952 if (bdev) { 1052 if (bdev) {
953 bd_set_size(bdev, 0); 1053 bd_set_size(bdev, 0);
1054 loop_sysfs_exit(lo);
954 /* let user-space know about this change */ 1055 /* let user-space know about this change */
955 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 1056 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
956 } 1057 }