summaryrefslogtreecommitdiffstats
path: root/drivers/mailbox
diff options
context:
space:
mode:
authorRob Rice <rob.rice@broadcom.com>2016-11-14 13:25:58 -0500
committerJassi Brar <jaswinder.singh@linaro.org>2016-12-19 09:40:19 -0500
commitab8d1b2d564f6649547b97e65806556c42f93a26 (patch)
treef32f34cb719bdcba8bf7c8f7a9e5ae810497cbeb /drivers/mailbox
parent9310f1ded44067b2da61fa0471ca5b7768dd28ae (diff)
mailbox: bcm-pdc: Convert from interrupts to poll for tx done
The PDC driver is a mailbox controller. A mailbox controller can report that a mailbox message has been "transmitted" either when a tx interrupt fires or by having the mailbox framework poll. This commit converts the PDC driver to the poll method. We found that the tx interrupt happens when the descriptors are read by the SPU hw. Thus, the interrupt method does not allow more than one tx message in the PDC tx DMA ring at a time. To keep the SPU hw busy, we would like to keep the tx ring full under heavy load. With the poll method, the PDC driver responds that the previous message has been transmitted if the tx ring has space for another message. SPU request messages take a variable number of descriptors. If 15 descriptors are available, there is a good chance another message will fit. Also increased the ring size from 128 to 512 descriptors. With this change, I found the PDC driver hangs on its spinlock under heavy load. The PDC spinlock is not required; so I removed it. Calls to pdc_send_data() are already synchronized because of the channel spinlock in the mailbox framework. Other references to ring indexes should not require locking because they only written on either the tx or rx side. Signed-off-by: Rob Rice <rob.rice@broadcom.com> Reviewed-by: Andy Gospodarek <gospo@broadcom.com> Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
Diffstat (limited to 'drivers/mailbox')
-rw-r--r--drivers/mailbox/bcm-pdc-mailbox.c207
1 files changed, 145 insertions, 62 deletions
diff --git a/drivers/mailbox/bcm-pdc-mailbox.c b/drivers/mailbox/bcm-pdc-mailbox.c
index c9434a756bf3..fa3f484d3771 100644
--- a/drivers/mailbox/bcm-pdc-mailbox.c
+++ b/drivers/mailbox/bcm-pdc-mailbox.c
@@ -60,7 +60,13 @@
60#define RING_ENTRY_SIZE sizeof(struct dma64dd) 60#define RING_ENTRY_SIZE sizeof(struct dma64dd)
61 61
62/* # entries in PDC dma ring */ 62/* # entries in PDC dma ring */
63#define PDC_RING_ENTRIES 128 63#define PDC_RING_ENTRIES 512
64/*
65 * Minimum number of ring descriptor entries that must be free to tell mailbox
66 * framework that it can submit another request
67 */
68#define PDC_RING_SPACE_MIN 15
69
64#define PDC_RING_SIZE (PDC_RING_ENTRIES * RING_ENTRY_SIZE) 70#define PDC_RING_SIZE (PDC_RING_ENTRIES * RING_ENTRY_SIZE)
65/* Rings are 8k aligned */ 71/* Rings are 8k aligned */
66#define RING_ALIGN_ORDER 13 72#define RING_ALIGN_ORDER 13
@@ -93,11 +99,9 @@
93 * Interrupt mask and status definitions. Enable interrupts for tx and rx on 99 * Interrupt mask and status definitions. Enable interrupts for tx and rx on
94 * ring 0 100 * ring 0
95 */ 101 */
96#define PDC_XMTINT_0 (24 + PDC_RINGSET)
97#define PDC_RCVINT_0 (16 + PDC_RINGSET) 102#define PDC_RCVINT_0 (16 + PDC_RINGSET)
98#define PDC_XMTINTEN_0 BIT(PDC_XMTINT_0)
99#define PDC_RCVINTEN_0 BIT(PDC_RCVINT_0) 103#define PDC_RCVINTEN_0 BIT(PDC_RCVINT_0)
100#define PDC_INTMASK (PDC_XMTINTEN_0 | PDC_RCVINTEN_0) 104#define PDC_INTMASK (PDC_RCVINTEN_0)
101#define PDC_LAZY_FRAMECOUNT 1 105#define PDC_LAZY_FRAMECOUNT 1
102#define PDC_LAZY_TIMEOUT 10000 106#define PDC_LAZY_TIMEOUT 10000
103#define PDC_LAZY_INT (PDC_LAZY_TIMEOUT | (PDC_LAZY_FRAMECOUNT << 24)) 107#define PDC_LAZY_INT (PDC_LAZY_TIMEOUT | (PDC_LAZY_FRAMECOUNT << 24))
@@ -258,9 +262,6 @@ struct pdc_ring_alloc {
258 262
259/* PDC state structure */ 263/* PDC state structure */
260struct pdc_state { 264struct pdc_state {
261 /* synchronize access to this PDC state structure */
262 spinlock_t pdc_lock;
263
264 /* Index of the PDC whose state is in this structure instance */ 265 /* Index of the PDC whose state is in this structure instance */
265 u8 pdc_idx; 266 u8 pdc_idx;
266 267
@@ -401,11 +402,14 @@ struct pdc_state {
401 struct dentry *debugfs_stats; /* debug FS stats file for this PDC */ 402 struct dentry *debugfs_stats; /* debug FS stats file for this PDC */
402 403
403 /* counters */ 404 /* counters */
404 u32 pdc_requests; /* number of request messages submitted */ 405 u32 pdc_requests; /* number of request messages submitted */
405 u32 pdc_replies; /* number of reply messages received */ 406 u32 pdc_replies; /* number of reply messages received */
406 u32 txnobuf; /* count of tx ring full */ 407 u32 last_tx_not_done; /* too few tx descriptors to indicate done */
407 u32 rxnobuf; /* count of rx ring full */ 408 u32 tx_ring_full; /* unable to accept msg because tx ring full */
408 u32 rx_oflow; /* count of rx overflows */ 409 u32 rx_ring_full; /* unable to accept msg because rx ring full */
410 u32 txnobuf; /* unable to create tx descriptor */
411 u32 rxnobuf; /* unable to create rx descriptor */
412 u32 rx_oflow; /* count of rx overflows */
409}; 413};
410 414
411/* Global variables */ 415/* Global variables */
@@ -438,20 +442,33 @@ static ssize_t pdc_debugfs_read(struct file *filp, char __user *ubuf,
438 out_offset += snprintf(buf + out_offset, out_count - out_offset, 442 out_offset += snprintf(buf + out_offset, out_count - out_offset,
439 "SPU %u stats:\n", pdcs->pdc_idx); 443 "SPU %u stats:\n", pdcs->pdc_idx);
440 out_offset += snprintf(buf + out_offset, out_count - out_offset, 444 out_offset += snprintf(buf + out_offset, out_count - out_offset,
441 "PDC requests............%u\n", 445 "PDC requests....................%u\n",
442 pdcs->pdc_requests); 446 pdcs->pdc_requests);
443 out_offset += snprintf(buf + out_offset, out_count - out_offset, 447 out_offset += snprintf(buf + out_offset, out_count - out_offset,
444 "PDC responses...........%u\n", 448 "PDC responses...................%u\n",
445 pdcs->pdc_replies); 449 pdcs->pdc_replies);
446 out_offset += snprintf(buf + out_offset, out_count - out_offset, 450 out_offset += snprintf(buf + out_offset, out_count - out_offset,
447 "Tx err ring full........%u\n", 451 "Tx not done.....................%u\n",
452 pdcs->last_tx_not_done);
453 out_offset += snprintf(buf + out_offset, out_count - out_offset,
454 "Tx ring full....................%u\n",
455 pdcs->tx_ring_full);
456 out_offset += snprintf(buf + out_offset, out_count - out_offset,
457 "Rx ring full....................%u\n",
458 pdcs->rx_ring_full);
459 out_offset += snprintf(buf + out_offset, out_count - out_offset,
460 "Tx desc write fail. Ring full...%u\n",
448 pdcs->txnobuf); 461 pdcs->txnobuf);
449 out_offset += snprintf(buf + out_offset, out_count - out_offset, 462 out_offset += snprintf(buf + out_offset, out_count - out_offset,
450 "Rx err ring full........%u\n", 463 "Rx desc write fail. Ring full...%u\n",
451 pdcs->rxnobuf); 464 pdcs->rxnobuf);
452 out_offset += snprintf(buf + out_offset, out_count - out_offset, 465 out_offset += snprintf(buf + out_offset, out_count - out_offset,
453 "Receive overflow........%u\n", 466 "Receive overflow................%u\n",
454 pdcs->rx_oflow); 467 pdcs->rx_oflow);
468 out_offset += snprintf(buf + out_offset, out_count - out_offset,
469 "Num frags in rx ring............%u\n",
470 NRXDACTIVE(pdcs->rxin, pdcs->last_rx_curr,
471 pdcs->nrxpost));
455 472
456 if (out_offset > out_count) 473 if (out_offset > out_count)
457 out_offset = out_count; 474 out_offset = out_count;
@@ -582,8 +599,6 @@ pdc_receive(struct pdc_state *pdcs, struct brcm_message *mssg)
582 u32 rx_idx; /* ring index of start of receive frame */ 599 u32 rx_idx; /* ring index of start of receive frame */
583 dma_addr_t resp_hdr_daddr; 600 dma_addr_t resp_hdr_daddr;
584 601
585 spin_lock(&pdcs->pdc_lock);
586
587 /* 602 /*
588 * return if a complete response message is not yet ready. 603 * return if a complete response message is not yet ready.
589 * rxin_numd[rxin] is the number of fragments in the next msg 604 * rxin_numd[rxin] is the number of fragments in the next msg
@@ -600,7 +615,6 @@ pdc_receive(struct pdc_state *pdcs, struct brcm_message *mssg)
600 if ((frags_rdy == 0) || 615 if ((frags_rdy == 0) ||
601 (frags_rdy < pdcs->rxin_numd[pdcs->rxin])) { 616 (frags_rdy < pdcs->rxin_numd[pdcs->rxin])) {
602 /* No response ready */ 617 /* No response ready */
603 spin_unlock(&pdcs->pdc_lock);
604 return -EAGAIN; 618 return -EAGAIN;
605 } 619 }
606 /* can't read descriptors/data until write index is read */ 620 /* can't read descriptors/data until write index is read */
@@ -630,8 +644,6 @@ pdc_receive(struct pdc_state *pdcs, struct brcm_message *mssg)
630 for (i = 0; i < num_frags; i++) 644 for (i = 0; i < num_frags; i++)
631 pdcs->rxin = NEXTRXD(pdcs->rxin, pdcs->nrxpost); 645 pdcs->rxin = NEXTRXD(pdcs->rxin, pdcs->nrxpost);
632 646
633 spin_unlock(&pdcs->pdc_lock);
634
635 dev_dbg(dev, "PDC %u reclaimed %d rx descriptors", 647 dev_dbg(dev, "PDC %u reclaimed %d rx descriptors",
636 pdcs->pdc_idx, num_frags); 648 pdcs->pdc_idx, num_frags);
637 649
@@ -920,8 +932,6 @@ static irqreturn_t pdc_irq_handler(int irq, void *cookie)
920 struct pdc_state *pdcs = cookie; 932 struct pdc_state *pdcs = cookie;
921 u32 intstatus = ioread32(pdcs->pdc_reg_vbase + PDC_INTSTATUS_OFFSET); 933 u32 intstatus = ioread32(pdcs->pdc_reg_vbase + PDC_INTSTATUS_OFFSET);
922 934
923 if (intstatus & PDC_XMTINTEN_0)
924 set_bit(PDC_XMTINT_0, &pdcs->intstatus);
925 if (intstatus & PDC_RCVINTEN_0) 935 if (intstatus & PDC_RCVINTEN_0)
926 set_bit(PDC_RCVINT_0, &pdcs->intstatus); 936 set_bit(PDC_RCVINT_0, &pdcs->intstatus);
927 937
@@ -953,45 +963,35 @@ static irqreturn_t pdc_irq_thread(int irq, void *cookie)
953 struct pdc_state *pdcs = cookie; 963 struct pdc_state *pdcs = cookie;
954 struct mbox_controller *mbc; 964 struct mbox_controller *mbc;
955 struct mbox_chan *chan; 965 struct mbox_chan *chan;
956 bool tx_int;
957 bool rx_int; 966 bool rx_int;
958 int rx_status; 967 int rx_status;
959 struct brcm_message mssg; 968 struct brcm_message mssg;
960 969
961 tx_int = test_and_clear_bit(PDC_XMTINT_0, &pdcs->intstatus);
962 rx_int = test_and_clear_bit(PDC_RCVINT_0, &pdcs->intstatus); 970 rx_int = test_and_clear_bit(PDC_RCVINT_0, &pdcs->intstatus);
963 971
964 if (pdcs && (tx_int || rx_int)) { 972 if (pdcs && rx_int) {
965 dev_dbg(&pdcs->pdev->dev, 973 dev_dbg(&pdcs->pdev->dev,
966 "%s() got irq %d with tx_int %s, rx_int %s", 974 "%s() got irq %d with rx_int %s",
967 __func__, irq, 975 __func__, irq, rx_int ? "set" : "clear");
968 tx_int ? "set" : "clear", rx_int ? "set" : "clear");
969 976
970 mbc = &pdcs->mbc; 977 mbc = &pdcs->mbc;
971 chan = &mbc->chans[0]; 978 chan = &mbc->chans[0];
972 979
973 if (tx_int) { 980 while (1) {
974 dev_dbg(&pdcs->pdev->dev, "%s(): tx done", __func__); 981 /* Could be many frames ready */
975 /* only one frame in flight at a time */ 982 memset(&mssg, 0, sizeof(mssg));
976 mbox_chan_txdone(chan, PDC_SUCCESS); 983 mssg.type = BRCM_MESSAGE_SPU;
977 } 984 rx_status = pdc_receive(pdcs, &mssg);
978 if (rx_int) { 985 if (rx_status >= 0) {
979 while (1) { 986 dev_dbg(&pdcs->pdev->dev,
980 /* Could be many frames ready */ 987 "%s(): invoking client rx cb",
981 memset(&mssg, 0, sizeof(mssg)); 988 __func__);
982 mssg.type = BRCM_MESSAGE_SPU; 989 mbox_chan_received_data(chan, &mssg);
983 rx_status = pdc_receive(pdcs, &mssg); 990 } else {
984 if (rx_status >= 0) { 991 dev_dbg(&pdcs->pdev->dev,
985 dev_dbg(&pdcs->pdev->dev, 992 "%s(): no SPU response available",
986 "%s(): invoking client rx cb", 993 __func__);
987 __func__); 994 break;
988 mbox_chan_received_data(chan, &mssg);
989 } else {
990 dev_dbg(&pdcs->pdev->dev,
991 "%s(): no SPU response available",
992 __func__);
993 break;
994 }
995 } 995 }
996 } 996 }
997 return IRQ_HANDLED; 997 return IRQ_HANDLED;
@@ -1036,9 +1036,6 @@ static int pdc_ring_init(struct pdc_state *pdcs, int ringset)
1036 dev_dbg(dev, " - base DMA addr of rx ring %pad", &rx.dmabase); 1036 dev_dbg(dev, " - base DMA addr of rx ring %pad", &rx.dmabase);
1037 dev_dbg(dev, " - base virtual addr of rx ring %p", rx.vbase); 1037 dev_dbg(dev, " - base virtual addr of rx ring %p", rx.vbase);
1038 1038
1039 /* lock after ring allocation to avoid scheduling while atomic */
1040 spin_lock(&pdcs->pdc_lock);
1041
1042 memcpy(&pdcs->tx_ring_alloc, &tx, sizeof(tx)); 1039 memcpy(&pdcs->tx_ring_alloc, &tx, sizeof(tx));
1043 memcpy(&pdcs->rx_ring_alloc, &rx, sizeof(rx)); 1040 memcpy(&pdcs->rx_ring_alloc, &rx, sizeof(rx));
1044 1041
@@ -1103,7 +1100,6 @@ static int pdc_ring_init(struct pdc_state *pdcs, int ringset)
1103 (void *)&pdcs->rxd_64[i].ctrl1); 1100 (void *)&pdcs->rxd_64[i].ctrl1);
1104 } 1101 }
1105 } 1102 }
1106 spin_unlock(&pdcs->pdc_lock);
1107 return PDC_SUCCESS; 1103 return PDC_SUCCESS;
1108 1104
1109fail_dealloc: 1105fail_dealloc:
@@ -1128,6 +1124,80 @@ static void pdc_ring_free(struct pdc_state *pdcs)
1128} 1124}
1129 1125
1130/** 1126/**
1127 * pdc_desc_count() - Count the number of DMA descriptors that will be required
1128 * for a given scatterlist. Account for the max length of a DMA buffer.
1129 * @sg: Scatterlist to be DMA'd
1130 * Return: Number of descriptors required
1131 */
1132static u32 pdc_desc_count(struct scatterlist *sg)
1133{
1134 u32 cnt = 0;
1135
1136 while (sg) {
1137 cnt += ((sg->length / PDC_DMA_BUF_MAX) + 1);
1138 sg = sg_next(sg);
1139 }
1140 return cnt;
1141}
1142
1143/**
1144 * pdc_rings_full() - Check whether the tx ring has room for tx_cnt descriptors
1145 * and the rx ring has room for rx_cnt descriptors.
1146 * @pdcs: PDC state
1147 * @tx_cnt: The number of descriptors required in the tx ring
1148 * @rx_cnt: The number of descriptors required i the rx ring
1149 *
1150 * Return: true if one of the rings does not have enough space
1151 * false if sufficient space is available in both rings
1152 */
1153static bool pdc_rings_full(struct pdc_state *pdcs, int tx_cnt, int rx_cnt)
1154{
1155 u32 rx_avail;
1156 u32 tx_avail;
1157 bool full = false;
1158
1159 /* Check if the tx and rx rings are likely to have enough space */
1160 rx_avail = pdcs->nrxpost - NRXDACTIVE(pdcs->rxin, pdcs->rxout,
1161 pdcs->nrxpost);
1162 if (unlikely(rx_cnt > rx_avail)) {
1163 pdcs->rx_ring_full++;
1164 full = true;
1165 }
1166
1167 if (likely(!full)) {
1168 tx_avail = pdcs->ntxpost - NTXDACTIVE(pdcs->txin, pdcs->txout,
1169 pdcs->ntxpost);
1170 if (unlikely(tx_cnt > tx_avail)) {
1171 pdcs->tx_ring_full++;
1172 full = true;
1173 }
1174 }
1175 return full;
1176}
1177
1178/**
1179 * pdc_last_tx_done() - If both the tx and rx rings have at least
1180 * PDC_RING_SPACE_MIN descriptors available, then indicate that the mailbox
1181 * framework can submit another message.
1182 * @chan: mailbox channel to check
1183 * Return: true if PDC can accept another message on this channel
1184 */
1185static bool pdc_last_tx_done(struct mbox_chan *chan)
1186{
1187 struct pdc_state *pdcs = chan->con_priv;
1188 bool ret;
1189
1190 if (unlikely(pdc_rings_full(pdcs, PDC_RING_SPACE_MIN,
1191 PDC_RING_SPACE_MIN))) {
1192 pdcs->last_tx_not_done++;
1193 ret = false;
1194 } else {
1195 ret = true;
1196 }
1197 return ret;
1198}
1199
1200/**
1131 * pdc_send_data() - mailbox send_data function 1201 * pdc_send_data() - mailbox send_data function
1132 * @chan: The mailbox channel on which the data is sent. The channel 1202 * @chan: The mailbox channel on which the data is sent. The channel
1133 * corresponds to a DMA ringset. 1203 * corresponds to a DMA ringset.
@@ -1158,6 +1228,8 @@ static int pdc_send_data(struct mbox_chan *chan, void *data)
1158 int src_nent; 1228 int src_nent;
1159 int dst_nent; 1229 int dst_nent;
1160 int nent; 1230 int nent;
1231 u32 tx_desc_req;
1232 u32 rx_desc_req;
1161 1233
1162 if (mssg->type != BRCM_MESSAGE_SPU) 1234 if (mssg->type != BRCM_MESSAGE_SPU)
1163 return -ENOTSUPP; 1235 return -ENOTSUPP;
@@ -1180,7 +1252,19 @@ static int pdc_send_data(struct mbox_chan *chan, void *data)
1180 } 1252 }
1181 } 1253 }
1182 1254
1183 spin_lock(&pdcs->pdc_lock); 1255 /*
1256 * Check if the tx and rx rings have enough space. Do this prior to
1257 * writing any tx or rx descriptors. Need to ensure that we do not write
1258 * a partial set of descriptors, or write just rx descriptors but
1259 * corresponding tx descriptors don't fit. Note that we want this check
1260 * and the entire sequence of descriptor to happen without another
1261 * thread getting in. The channel spin lock in the mailbox framework
1262 * ensures this.
1263 */
1264 tx_desc_req = pdc_desc_count(mssg->spu.src);
1265 rx_desc_req = pdc_desc_count(mssg->spu.dst);
1266 if (pdc_rings_full(pdcs, tx_desc_req, rx_desc_req + 1))
1267 return -ENOSPC;
1184 1268
1185 /* Create rx descriptors to SPU catch response */ 1269 /* Create rx descriptors to SPU catch response */
1186 err = pdc_rx_list_init(pdcs, mssg->spu.dst, mssg->ctx); 1270 err = pdc_rx_list_init(pdcs, mssg->spu.dst, mssg->ctx);
@@ -1190,8 +1274,6 @@ static int pdc_send_data(struct mbox_chan *chan, void *data)
1190 err |= pdc_tx_list_sg_add(pdcs, mssg->spu.src); 1274 err |= pdc_tx_list_sg_add(pdcs, mssg->spu.src);
1191 err |= pdc_tx_list_final(pdcs); /* initiate transfer */ 1275 err |= pdc_tx_list_final(pdcs); /* initiate transfer */
1192 1276
1193 spin_unlock(&pdcs->pdc_lock);
1194
1195 if (err) 1277 if (err)
1196 dev_err(&pdcs->pdev->dev, 1278 dev_err(&pdcs->pdev->dev,
1197 "%s failed with error %d", __func__, err); 1279 "%s failed with error %d", __func__, err);
@@ -1359,6 +1441,7 @@ static int pdc_interrupts_init(struct pdc_state *pdcs)
1359 1441
1360static const struct mbox_chan_ops pdc_mbox_chan_ops = { 1442static const struct mbox_chan_ops pdc_mbox_chan_ops = {
1361 .send_data = pdc_send_data, 1443 .send_data = pdc_send_data,
1444 .last_tx_done = pdc_last_tx_done,
1362 .startup = pdc_startup, 1445 .startup = pdc_startup,
1363 .shutdown = pdc_shutdown 1446 .shutdown = pdc_shutdown
1364}; 1447};
@@ -1391,8 +1474,9 @@ static int pdc_mb_init(struct pdc_state *pdcs)
1391 if (!mbc->chans) 1474 if (!mbc->chans)
1392 return -ENOMEM; 1475 return -ENOMEM;
1393 1476
1394 mbc->txdone_irq = true; 1477 mbc->txdone_irq = false;
1395 mbc->txdone_poll = false; 1478 mbc->txdone_poll = true;
1479 mbc->txpoll_period = 1;
1396 for (chan_index = 0; chan_index < mbc->num_chans; chan_index++) 1480 for (chan_index = 0; chan_index < mbc->num_chans; chan_index++)
1397 mbc->chans[chan_index].con_priv = pdcs; 1481 mbc->chans[chan_index].con_priv = pdcs;
1398 1482
@@ -1462,7 +1546,6 @@ static int pdc_probe(struct platform_device *pdev)
1462 goto cleanup; 1546 goto cleanup;
1463 } 1547 }
1464 1548
1465 spin_lock_init(&pdcs->pdc_lock);
1466 pdcs->pdev = pdev; 1549 pdcs->pdev = pdev;
1467 platform_set_drvdata(pdev, pdcs); 1550 platform_set_drvdata(pdev, pdcs);
1468 pdcs->pdc_idx = pdcg.num_spu; 1551 pdcs->pdc_idx = pdcg.num_spu;