aboutsummaryrefslogtreecommitdiffstats
path: root/fs/cifs/cifssmb.c
diff options
context:
space:
mode:
authorJeff Layton <jlayton@redhat.com>2011-10-19 15:30:07 -0400
committerJeff Layton <jlayton@redhat.com>2011-10-19 15:30:07 -0400
commite28bc5b1fdbd6e850488234d6072e6b66fc46146 (patch)
tree7d5292bb0389b1153fd11f738fe8644cdfb040d1 /fs/cifs/cifssmb.c
parent2ab2593f4b8953ff951f5531e695e487dfe0b51f (diff)
cifs: add cifs_async_readv
...which will allow cifs to do an asynchronous read call to the server. The caller will allocate and set up cifs_readdata for each READ_AND_X call that should be issued on the wire. The pages passed in are added to the pagecache, but not placed on the LRU list yet (as we need the page->lru to keep the pages on the list in the readdata). When cifsd identifies the mid, it will see that there is a special receive handler for the call, and use that to receive the rest of the frame. cifs_readv_receive will then marshal up a kvec array with kmapped pages from the pagecache, which eliminates one copy of the data. Once the data is received, the pages are added to the LRU list, set uptodate, and unlocked. Reviewed-and-Tested-by: Pavel Shilovsky <piastry@etersoft.ru> Signed-off-by: Jeff Layton <jlayton@redhat.com>
Diffstat (limited to 'fs/cifs/cifssmb.c')
-rw-r--r--fs/cifs/cifssmb.c359
1 files changed, 359 insertions, 0 deletions
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 0613df4d8e74..aaad4ce6e6c5 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -33,6 +33,8 @@
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include <linux/posix_acl_xattr.h> 34#include <linux/posix_acl_xattr.h>
35#include <linux/pagemap.h> 35#include <linux/pagemap.h>
36#include <linux/swap.h>
37#include <linux/task_io_accounting_ops.h>
36#include <asm/uaccess.h> 38#include <asm/uaccess.h>
37#include "cifspdu.h" 39#include "cifspdu.h"
38#include "cifsglob.h" 40#include "cifsglob.h"
@@ -40,6 +42,7 @@
40#include "cifsproto.h" 42#include "cifsproto.h"
41#include "cifs_unicode.h" 43#include "cifs_unicode.h"
42#include "cifs_debug.h" 44#include "cifs_debug.h"
45#include "fscache.h"
43 46
44#ifdef CONFIG_CIFS_POSIX 47#ifdef CONFIG_CIFS_POSIX
45static struct { 48static struct {
@@ -83,6 +86,9 @@ static struct {
83#endif /* CONFIG_CIFS_WEAK_PW_HASH */ 86#endif /* CONFIG_CIFS_WEAK_PW_HASH */
84#endif /* CIFS_POSIX */ 87#endif /* CIFS_POSIX */
85 88
89/* Forward declarations */
90static void cifs_readv_complete(struct work_struct *work);
91
86/* Mark as invalid, all open files on tree connections since they 92/* Mark as invalid, all open files on tree connections since they
87 were closed when session to server was lost */ 93 were closed when session to server was lost */
88static void mark_open_files_invalid(struct cifs_tcon *pTcon) 94static void mark_open_files_invalid(struct cifs_tcon *pTcon)
@@ -1375,6 +1381,359 @@ openRetry:
1375 return rc; 1381 return rc;
1376} 1382}
1377 1383
1384struct cifs_readdata *
1385cifs_readdata_alloc(unsigned int nr_pages)
1386{
1387 struct cifs_readdata *rdata;
1388
1389 /* readdata + 1 kvec for each page */
1390 rdata = kzalloc(sizeof(*rdata) +
1391 sizeof(struct kvec) * nr_pages, GFP_KERNEL);
1392 if (rdata != NULL) {
1393 INIT_WORK(&rdata->work, cifs_readv_complete);
1394 INIT_LIST_HEAD(&rdata->pages);
1395 }
1396 return rdata;
1397}
1398
1399void
1400cifs_readdata_free(struct cifs_readdata *rdata)
1401{
1402 cifsFileInfo_put(rdata->cfile);
1403 kfree(rdata);
1404}
1405
1406/*
1407 * Discard any remaining data in the current SMB. To do this, we borrow the
1408 * current bigbuf.
1409 */
1410static int
1411cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1412{
1413 READ_RSP *rsp = (READ_RSP *)server->smallbuf;
1414 unsigned int rfclen = be32_to_cpu(rsp->hdr.smb_buf_length);
1415 int remaining = rfclen + 4 - server->total_read;
1416 struct cifs_readdata *rdata = mid->callback_data;
1417
1418 while (remaining > 0) {
1419 int length;
1420
1421 length = cifs_read_from_socket(server, server->bigbuf,
1422 min_t(unsigned int, remaining,
1423 CIFSMaxBufSize + MAX_CIFS_HDR_SIZE));
1424 if (length < 0)
1425 return length;
1426 server->total_read += length;
1427 remaining -= length;
1428 }
1429
1430 dequeue_mid(mid, rdata->result);
1431 return 0;
1432}
1433
1434static int
1435cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1436{
1437 int length, len;
1438 unsigned int data_offset, remaining, data_len;
1439 struct cifs_readdata *rdata = mid->callback_data;
1440 READ_RSP *rsp = (READ_RSP *)server->smallbuf;
1441 unsigned int rfclen = be32_to_cpu(rsp->hdr.smb_buf_length) + 4;
1442 u64 eof;
1443 pgoff_t eof_index;
1444 struct page *page, *tpage;
1445
1446 cFYI(1, "%s: mid=%u offset=%llu bytes=%u", __func__,
1447 mid->mid, rdata->offset, rdata->bytes);
1448
1449 /*
1450 * read the rest of READ_RSP header (sans Data array), or whatever we
1451 * can if there's not enough data. At this point, we've read down to
1452 * the Mid.
1453 */
1454 len = min_t(unsigned int, rfclen, sizeof(*rsp)) -
1455 sizeof(struct smb_hdr) + 1;
1456
1457 rdata->iov[0].iov_base = server->smallbuf + sizeof(struct smb_hdr) - 1;
1458 rdata->iov[0].iov_len = len;
1459
1460 length = cifs_readv_from_socket(server, rdata->iov, 1, len);
1461 if (length < 0)
1462 return length;
1463 server->total_read += length;
1464
1465 /* Was the SMB read successful? */
1466 rdata->result = map_smb_to_linux_error(&rsp->hdr, false);
1467 if (rdata->result != 0) {
1468 cFYI(1, "%s: server returned error %d", __func__,
1469 rdata->result);
1470 return cifs_readv_discard(server, mid);
1471 }
1472
1473 /* Is there enough to get to the rest of the READ_RSP header? */
1474 if (server->total_read < sizeof(READ_RSP)) {
1475 cFYI(1, "%s: server returned short header. got=%u expected=%zu",
1476 __func__, server->total_read, sizeof(READ_RSP));
1477 rdata->result = -EIO;
1478 return cifs_readv_discard(server, mid);
1479 }
1480
1481 data_offset = le16_to_cpu(rsp->DataOffset) + 4;
1482 if (data_offset < server->total_read) {
1483 /*
1484 * win2k8 sometimes sends an offset of 0 when the read
1485 * is beyond the EOF. Treat it as if the data starts just after
1486 * the header.
1487 */
1488 cFYI(1, "%s: data offset (%u) inside read response header",
1489 __func__, data_offset);
1490 data_offset = server->total_read;
1491 } else if (data_offset > MAX_CIFS_SMALL_BUFFER_SIZE) {
1492 /* data_offset is beyond the end of smallbuf */
1493 cFYI(1, "%s: data offset (%u) beyond end of smallbuf",
1494 __func__, data_offset);
1495 rdata->result = -EIO;
1496 return cifs_readv_discard(server, mid);
1497 }
1498
1499 cFYI(1, "%s: total_read=%u data_offset=%u", __func__,
1500 server->total_read, data_offset);
1501
1502 len = data_offset - server->total_read;
1503 if (len > 0) {
1504 /* read any junk before data into the rest of smallbuf */
1505 rdata->iov[0].iov_base = server->smallbuf + server->total_read;
1506 rdata->iov[0].iov_len = len;
1507 length = cifs_readv_from_socket(server, rdata->iov, 1, len);
1508 if (length < 0)
1509 return length;
1510 server->total_read += length;
1511 }
1512
1513 /* set up first iov for signature check */
1514 rdata->iov[0].iov_base = server->smallbuf;
1515 rdata->iov[0].iov_len = server->total_read;
1516 cFYI(1, "0: iov_base=%p iov_len=%zu",
1517 rdata->iov[0].iov_base, rdata->iov[0].iov_len);
1518
1519 /* how much data is in the response? */
1520 data_len = le16_to_cpu(rsp->DataLengthHigh) << 16;
1521 data_len += le16_to_cpu(rsp->DataLength);
1522 if (data_offset + data_len > rfclen) {
1523 /* data_len is corrupt -- discard frame */
1524 rdata->result = -EIO;
1525 return cifs_readv_discard(server, mid);
1526 }
1527
1528 /* marshal up the page array */
1529 len = 0;
1530 remaining = data_len;
1531 rdata->nr_iov = 1;
1532
1533 /* determine the eof that the server (probably) has */
1534 eof = CIFS_I(rdata->mapping->host)->server_eof;
1535 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
1536 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
1537
1538 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
1539 if (remaining >= PAGE_CACHE_SIZE) {
1540 /* enough data to fill the page */
1541 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
1542 rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
1543 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
1544 rdata->nr_iov, page->index,
1545 rdata->iov[rdata->nr_iov].iov_base,
1546 rdata->iov[rdata->nr_iov].iov_len);
1547 ++rdata->nr_iov;
1548 len += PAGE_CACHE_SIZE;
1549 remaining -= PAGE_CACHE_SIZE;
1550 } else if (remaining > 0) {
1551 /* enough for partial page, fill and zero the rest */
1552 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
1553 rdata->iov[rdata->nr_iov].iov_len = remaining;
1554 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
1555 rdata->nr_iov, page->index,
1556 rdata->iov[rdata->nr_iov].iov_base,
1557 rdata->iov[rdata->nr_iov].iov_len);
1558 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
1559 '\0', PAGE_CACHE_SIZE - remaining);
1560 ++rdata->nr_iov;
1561 len += remaining;
1562 remaining = 0;
1563 } else if (page->index > eof_index) {
1564 /*
1565 * The VFS will not try to do readahead past the
1566 * i_size, but it's possible that we have outstanding
1567 * writes with gaps in the middle and the i_size hasn't
1568 * caught up yet. Populate those with zeroed out pages
1569 * to prevent the VFS from repeatedly attempting to
1570 * fill them until the writes are flushed.
1571 */
1572 zero_user(page, 0, PAGE_CACHE_SIZE);
1573 list_del(&page->lru);
1574 lru_cache_add_file(page);
1575 flush_dcache_page(page);
1576 SetPageUptodate(page);
1577 unlock_page(page);
1578 page_cache_release(page);
1579 } else {
1580 /* no need to hold page hostage */
1581 list_del(&page->lru);
1582 lru_cache_add_file(page);
1583 unlock_page(page);
1584 page_cache_release(page);
1585 }
1586 }
1587
1588 /* issue the read if we have any iovecs left to fill */
1589 if (rdata->nr_iov > 1) {
1590 length = cifs_readv_from_socket(server, &rdata->iov[1],
1591 rdata->nr_iov - 1, len);
1592 if (length < 0)
1593 return length;
1594 server->total_read += length;
1595 } else {
1596 length = 0;
1597 }
1598
1599 rdata->bytes = length;
1600
1601 cFYI(1, "total_read=%u rfclen=%u remaining=%u", server->total_read,
1602 rfclen, remaining);
1603
1604 /* discard anything left over */
1605 if (server->total_read < rfclen)
1606 return cifs_readv_discard(server, mid);
1607
1608 dequeue_mid(mid, false);
1609 return length;
1610}
1611
1612static void
1613cifs_readv_complete(struct work_struct *work)
1614{
1615 struct cifs_readdata *rdata = container_of(work,
1616 struct cifs_readdata, work);
1617 struct page *page, *tpage;
1618
1619 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
1620 list_del(&page->lru);
1621 lru_cache_add_file(page);
1622 kunmap(page);
1623
1624 if (rdata->result == 0) {
1625 flush_dcache_page(page);
1626 SetPageUptodate(page);
1627 }
1628
1629 unlock_page(page);
1630
1631 if (rdata->result == 0)
1632 cifs_readpage_to_fscache(rdata->mapping->host, page);
1633
1634 page_cache_release(page);
1635 }
1636 cifs_readdata_free(rdata);
1637}
1638
1639static void
1640cifs_readv_callback(struct mid_q_entry *mid)
1641{
1642 struct cifs_readdata *rdata = mid->callback_data;
1643 struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
1644 struct TCP_Server_Info *server = tcon->ses->server;
1645
1646 cFYI(1, "%s: mid=%u state=%d result=%d bytes=%u", __func__,
1647 mid->mid, mid->midState, rdata->result, rdata->bytes);
1648
1649 switch (mid->midState) {
1650 case MID_RESPONSE_RECEIVED:
1651 /* result already set, check signature */
1652 if (server->sec_mode &
1653 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1654 if (cifs_verify_signature(rdata->iov, rdata->nr_iov,
1655 server, mid->sequence_number + 1))
1656 cERROR(1, "Unexpected SMB signature");
1657 }
1658 /* FIXME: should this be counted toward the initiating task? */
1659 task_io_account_read(rdata->bytes);
1660 cifs_stats_bytes_read(tcon, rdata->bytes);
1661 break;
1662 case MID_REQUEST_SUBMITTED:
1663 case MID_RETRY_NEEDED:
1664 rdata->result = -EAGAIN;
1665 break;
1666 default:
1667 rdata->result = -EIO;
1668 }
1669
1670 queue_work(system_nrt_wq, &rdata->work);
1671 DeleteMidQEntry(mid);
1672 atomic_dec(&server->inFlight);
1673 wake_up(&server->request_q);
1674}
1675
1676/* cifs_async_readv - send an async write, and set up mid to handle result */
1677int
1678cifs_async_readv(struct cifs_readdata *rdata)
1679{
1680 int rc;
1681 READ_REQ *smb = NULL;
1682 int wct;
1683 struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
1684
1685 cFYI(1, "%s: offset=%llu bytes=%u", __func__,
1686 rdata->offset, rdata->bytes);
1687
1688 if (tcon->ses->capabilities & CAP_LARGE_FILES)
1689 wct = 12;
1690 else {
1691 wct = 10; /* old style read */
1692 if ((rdata->offset >> 32) > 0) {
1693 /* can not handle this big offset for old */
1694 return -EIO;
1695 }
1696 }
1697
1698 rc = small_smb_init(SMB_COM_READ_ANDX, wct, tcon, (void **)&smb);
1699 if (rc)
1700 return rc;
1701
1702 smb->hdr.Pid = cpu_to_le16((__u16)rdata->pid);
1703 smb->hdr.PidHigh = cpu_to_le16((__u16)(rdata->pid >> 16));
1704
1705 smb->AndXCommand = 0xFF; /* none */
1706 smb->Fid = rdata->cfile->netfid;
1707 smb->OffsetLow = cpu_to_le32(rdata->offset & 0xFFFFFFFF);
1708 if (wct == 12)
1709 smb->OffsetHigh = cpu_to_le32(rdata->offset >> 32);
1710 smb->Remaining = 0;
1711 smb->MaxCount = cpu_to_le16(rdata->bytes & 0xFFFF);
1712 smb->MaxCountHigh = cpu_to_le32(rdata->bytes >> 16);
1713 if (wct == 12)
1714 smb->ByteCount = 0;
1715 else {
1716 /* old style read */
1717 struct smb_com_readx_req *smbr =
1718 (struct smb_com_readx_req *)smb;
1719 smbr->ByteCount = 0;
1720 }
1721
1722 /* 4 for RFC1001 length + 1 for BCC */
1723 rdata->iov[0].iov_base = smb;
1724 rdata->iov[0].iov_len = be32_to_cpu(smb->hdr.smb_buf_length) + 4;
1725
1726 rc = cifs_call_async(tcon->ses->server, rdata->iov, 1,
1727 cifs_readv_receive, cifs_readv_callback,
1728 rdata, false);
1729
1730 if (rc == 0)
1731 cifs_stats_inc(&tcon->num_reads);
1732
1733 cifs_small_buf_release(smb);
1734 return rc;
1735}
1736
1378int 1737int
1379CIFSSMBRead(const int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes, 1738CIFSSMBRead(const int xid, struct cifs_io_parms *io_parms, unsigned int *nbytes,
1380 char **buf, int *pbuf_type) 1739 char **buf, int *pbuf_type)