diff options
author | Ira Snyder <iws@ovro.caltech.edu> | 2012-01-26 06:00:14 -0500 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2012-02-26 19:33:59 -0500 |
commit | 6c15d7afbb2f9e2d3114b513306dae736b56f535 (patch) | |
tree | 17ad7aa4544ad1d83b50eb4929f77b7793b5f0f4 /drivers/misc/carma | |
parent | 75ff85a81680e5779383aa6210a4f89ed76e40ec (diff) |
carma-fpga: fix race between data dumping and DMA callback
When the system is under heavy load, we occasionally saw a problem where
the system would get a legitimate interrupt when they should be
disabled.
This was caused by the data_dma_cb() DMA callback unconditionally
re-enabling FPGA interrupts even when data dumping is disabled. When
data dumping was re-enabled, the irq handler would fire while a DMA was
in progress. The "BUG_ON(priv->inflight != NULL);" during the second
invocation of the DMA callback caused the system to crash.
To fix the issue, the priv->enabled boolean is moved under the
protection of the priv->lock spinlock. The DMA callback checks the
boolean to know whether to re-enable FPGA interrupts before it returns.
Now that it is fixed, the driver keeps FPGA interrupts disabled when it
expects that they are disabled, fixing the bug.
Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'drivers/misc/carma')
-rw-r--r-- | drivers/misc/carma/carma-fpga.c | 101 |
1 files changed, 61 insertions, 40 deletions
diff --git a/drivers/misc/carma/carma-fpga.c b/drivers/misc/carma/carma-fpga.c index 4fd896deda0d..0cfc5bf13fa5 100644 --- a/drivers/misc/carma/carma-fpga.c +++ b/drivers/misc/carma/carma-fpga.c | |||
@@ -560,6 +560,9 @@ static void data_enable_interrupts(struct fpga_device *priv) | |||
560 | 560 | ||
561 | /* flush the writes */ | 561 | /* flush the writes */ |
562 | fpga_read_reg(priv, 0, MMAP_REG_STATUS); | 562 | fpga_read_reg(priv, 0, MMAP_REG_STATUS); |
563 | fpga_read_reg(priv, 1, MMAP_REG_STATUS); | ||
564 | fpga_read_reg(priv, 2, MMAP_REG_STATUS); | ||
565 | fpga_read_reg(priv, 3, MMAP_REG_STATUS); | ||
563 | 566 | ||
564 | /* switch back to the external interrupt source */ | 567 | /* switch back to the external interrupt source */ |
565 | iowrite32be(0x3F, priv->regs + SYS_IRQ_SOURCE_CTL); | 568 | iowrite32be(0x3F, priv->regs + SYS_IRQ_SOURCE_CTL); |
@@ -591,8 +594,12 @@ static void data_dma_cb(void *data) | |||
591 | list_move_tail(&priv->inflight->entry, &priv->used); | 594 | list_move_tail(&priv->inflight->entry, &priv->used); |
592 | priv->inflight = NULL; | 595 | priv->inflight = NULL; |
593 | 596 | ||
594 | /* clear the FPGA status and re-enable interrupts */ | 597 | /* |
595 | data_enable_interrupts(priv); | 598 | * If data dumping is still enabled, then clear the FPGA |
599 | * status registers and re-enable FPGA interrupts | ||
600 | */ | ||
601 | if (priv->enabled) | ||
602 | data_enable_interrupts(priv); | ||
596 | 603 | ||
597 | spin_unlock_irqrestore(&priv->lock, flags); | 604 | spin_unlock_irqrestore(&priv->lock, flags); |
598 | 605 | ||
@@ -708,6 +715,15 @@ static irqreturn_t data_irq(int irq, void *dev_id) | |||
708 | 715 | ||
709 | spin_lock(&priv->lock); | 716 | spin_lock(&priv->lock); |
710 | 717 | ||
718 | /* | ||
719 | * This is an error case that should never happen. | ||
720 | * | ||
721 | * If this driver has a bug and manages to re-enable interrupts while | ||
722 | * a DMA is in progress, then we will hit this statement and should | ||
723 | * start paying attention immediately. | ||
724 | */ | ||
725 | BUG_ON(priv->inflight != NULL); | ||
726 | |||
711 | /* hide the interrupt by switching the IRQ driver to GPIO */ | 727 | /* hide the interrupt by switching the IRQ driver to GPIO */ |
712 | data_disable_interrupts(priv); | 728 | data_disable_interrupts(priv); |
713 | 729 | ||
@@ -762,11 +778,15 @@ out: | |||
762 | */ | 778 | */ |
763 | static int data_device_enable(struct fpga_device *priv) | 779 | static int data_device_enable(struct fpga_device *priv) |
764 | { | 780 | { |
781 | bool enabled; | ||
765 | u32 val; | 782 | u32 val; |
766 | int ret; | 783 | int ret; |
767 | 784 | ||
768 | /* multiple enables are safe: they do nothing */ | 785 | /* multiple enables are safe: they do nothing */ |
769 | if (priv->enabled) | 786 | spin_lock_irq(&priv->lock); |
787 | enabled = priv->enabled; | ||
788 | spin_unlock_irq(&priv->lock); | ||
789 | if (enabled) | ||
770 | return 0; | 790 | return 0; |
771 | 791 | ||
772 | /* check that the FPGAs are programmed */ | 792 | /* check that the FPGAs are programmed */ |
@@ -797,6 +817,9 @@ static int data_device_enable(struct fpga_device *priv) | |||
797 | goto out_error; | 817 | goto out_error; |
798 | } | 818 | } |
799 | 819 | ||
820 | /* prevent the FPGAs from generating interrupts */ | ||
821 | data_disable_interrupts(priv); | ||
822 | |||
800 | /* hookup the irq handler */ | 823 | /* hookup the irq handler */ |
801 | ret = request_irq(priv->irq, data_irq, IRQF_SHARED, drv_name, priv); | 824 | ret = request_irq(priv->irq, data_irq, IRQF_SHARED, drv_name, priv); |
802 | if (ret) { | 825 | if (ret) { |
@@ -804,11 +827,13 @@ static int data_device_enable(struct fpga_device *priv) | |||
804 | goto out_error; | 827 | goto out_error; |
805 | } | 828 | } |
806 | 829 | ||
807 | /* switch to the external FPGA IRQ line */ | 830 | /* allow the DMA callback to re-enable FPGA interrupts */ |
808 | data_enable_interrupts(priv); | 831 | spin_lock_irq(&priv->lock); |
809 | |||
810 | /* success, we're enabled */ | ||
811 | priv->enabled = true; | 832 | priv->enabled = true; |
833 | spin_unlock_irq(&priv->lock); | ||
834 | |||
835 | /* allow the FPGAs to generate interrupts */ | ||
836 | data_enable_interrupts(priv); | ||
812 | return 0; | 837 | return 0; |
813 | 838 | ||
814 | out_error: | 839 | out_error: |
@@ -834,41 +859,40 @@ out_error: | |||
834 | */ | 859 | */ |
835 | static int data_device_disable(struct fpga_device *priv) | 860 | static int data_device_disable(struct fpga_device *priv) |
836 | { | 861 | { |
837 | int ret; | 862 | spin_lock_irq(&priv->lock); |
838 | 863 | ||
839 | /* allow multiple disable */ | 864 | /* allow multiple disable */ |
840 | if (!priv->enabled) | 865 | if (!priv->enabled) { |
866 | spin_unlock_irq(&priv->lock); | ||
841 | return 0; | 867 | return 0; |
868 | } | ||
869 | |||
870 | /* | ||
871 | * Mark the device disabled | ||
872 | * | ||
873 | * This stops DMA callbacks from re-enabling interrupts | ||
874 | */ | ||
875 | priv->enabled = false; | ||
842 | 876 | ||
843 | /* switch to the internal GPIO IRQ line */ | 877 | /* prevent the FPGAs from generating interrupts */ |
844 | data_disable_interrupts(priv); | 878 | data_disable_interrupts(priv); |
845 | 879 | ||
880 | /* wait until all ongoing DMA has finished */ | ||
881 | while (priv->inflight != NULL) { | ||
882 | spin_unlock_irq(&priv->lock); | ||
883 | wait_event(priv->wait, priv->inflight == NULL); | ||
884 | spin_lock_irq(&priv->lock); | ||
885 | } | ||
886 | |||
887 | spin_unlock_irq(&priv->lock); | ||
888 | |||
846 | /* unhook the irq handler */ | 889 | /* unhook the irq handler */ |
847 | free_irq(priv->irq, priv); | 890 | free_irq(priv->irq, priv); |
848 | 891 | ||
849 | /* | ||
850 | * wait for all outstanding DMA to complete | ||
851 | * | ||
852 | * Device interrupts are disabled, therefore another buffer cannot | ||
853 | * be marked inflight. | ||
854 | */ | ||
855 | ret = wait_event_interruptible(priv->wait, priv->inflight == NULL); | ||
856 | if (ret) | ||
857 | return ret; | ||
858 | |||
859 | /* free the correlation table */ | 892 | /* free the correlation table */ |
860 | sg_free_table(&priv->corl_table); | 893 | sg_free_table(&priv->corl_table); |
861 | priv->corl_nents = 0; | 894 | priv->corl_nents = 0; |
862 | 895 | ||
863 | /* | ||
864 | * We are taking the spinlock not to protect priv->enabled, but instead | ||
865 | * to make sure that there are no readers in the process of altering | ||
866 | * the free or used lists while we are setting this flag. | ||
867 | */ | ||
868 | spin_lock_irq(&priv->lock); | ||
869 | priv->enabled = false; | ||
870 | spin_unlock_irq(&priv->lock); | ||
871 | |||
872 | /* free all buffers: the free and used lists are not being changed */ | 896 | /* free all buffers: the free and used lists are not being changed */ |
873 | data_free_buffers(priv); | 897 | data_free_buffers(priv); |
874 | return 0; | 898 | return 0; |
@@ -896,15 +920,6 @@ static unsigned int list_num_entries(struct list_head *list) | |||
896 | static int data_debug_show(struct seq_file *f, void *offset) | 920 | static int data_debug_show(struct seq_file *f, void *offset) |
897 | { | 921 | { |
898 | struct fpga_device *priv = f->private; | 922 | struct fpga_device *priv = f->private; |
899 | int ret; | ||
900 | |||
901 | /* | ||
902 | * Lock the mutex first, so that we get an accurate value for enable | ||
903 | * Lock the spinlock next, to get accurate list counts | ||
904 | */ | ||
905 | ret = mutex_lock_interruptible(&priv->mutex); | ||
906 | if (ret) | ||
907 | return ret; | ||
908 | 923 | ||
909 | spin_lock_irq(&priv->lock); | 924 | spin_lock_irq(&priv->lock); |
910 | 925 | ||
@@ -917,7 +932,6 @@ static int data_debug_show(struct seq_file *f, void *offset) | |||
917 | seq_printf(f, "num_dropped: %d\n", priv->num_dropped); | 932 | seq_printf(f, "num_dropped: %d\n", priv->num_dropped); |
918 | 933 | ||
919 | spin_unlock_irq(&priv->lock); | 934 | spin_unlock_irq(&priv->lock); |
920 | mutex_unlock(&priv->mutex); | ||
921 | return 0; | 935 | return 0; |
922 | } | 936 | } |
923 | 937 | ||
@@ -970,7 +984,13 @@ static ssize_t data_en_show(struct device *dev, struct device_attribute *attr, | |||
970 | char *buf) | 984 | char *buf) |
971 | { | 985 | { |
972 | struct fpga_device *priv = dev_get_drvdata(dev); | 986 | struct fpga_device *priv = dev_get_drvdata(dev); |
973 | return snprintf(buf, PAGE_SIZE, "%u\n", priv->enabled); | 987 | int ret; |
988 | |||
989 | spin_lock_irq(&priv->lock); | ||
990 | ret = snprintf(buf, PAGE_SIZE, "%u\n", priv->enabled); | ||
991 | spin_unlock_irq(&priv->lock); | ||
992 | |||
993 | return ret; | ||
974 | } | 994 | } |
975 | 995 | ||
976 | static ssize_t data_en_set(struct device *dev, struct device_attribute *attr, | 996 | static ssize_t data_en_set(struct device *dev, struct device_attribute *attr, |
@@ -986,6 +1006,7 @@ static ssize_t data_en_set(struct device *dev, struct device_attribute *attr, | |||
986 | return -EINVAL; | 1006 | return -EINVAL; |
987 | } | 1007 | } |
988 | 1008 | ||
1009 | /* protect against concurrent enable/disable */ | ||
989 | ret = mutex_lock_interruptible(&priv->mutex); | 1010 | ret = mutex_lock_interruptible(&priv->mutex); |
990 | if (ret) | 1011 | if (ret) |
991 | return ret; | 1012 | return ret; |