diff options
-rw-r--r-- | Documentation/kernel-parameters.txt | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/iommu.c | 587 |
2 files changed, 591 insertions, 0 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f4a04c0c7edc..b245ce61ecb8 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -626,6 +626,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
626 | disable= [IPV6] | 626 | disable= [IPV6] |
627 | See Documentation/networking/ipv6.txt. | 627 | See Documentation/networking/ipv6.txt. |
628 | 628 | ||
629 | disable_ddw [PPC/PSERIES] | ||
630 | Disable Dynamic DMA Window support. Use this if | ||
631 | to workaround buggy firmware. | ||
632 | |||
629 | disable_ipv6= [IPV6] | 633 | disable_ipv6= [IPV6] |
630 | See Documentation/networking/ipv6.txt. | 634 | See Documentation/networking/ipv6.txt. |
631 | 635 | ||
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index edea60b7ee90..154c464cdca5 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/pci.h> | 33 | #include <linux/pci.h> |
34 | #include <linux/dma-mapping.h> | 34 | #include <linux/dma-mapping.h> |
35 | #include <linux/crash_dump.h> | 35 | #include <linux/crash_dump.h> |
36 | #include <linux/memory.h> | ||
36 | #include <asm/io.h> | 37 | #include <asm/io.h> |
37 | #include <asm/prom.h> | 38 | #include <asm/prom.h> |
38 | #include <asm/rtas.h> | 39 | #include <asm/rtas.h> |
@@ -45,6 +46,7 @@ | |||
45 | #include <asm/tce.h> | 46 | #include <asm/tce.h> |
46 | #include <asm/ppc-pci.h> | 47 | #include <asm/ppc-pci.h> |
47 | #include <asm/udbg.h> | 48 | #include <asm/udbg.h> |
49 | #include <asm/mmzone.h> | ||
48 | 50 | ||
49 | #include "plpar_wrappers.h" | 51 | #include "plpar_wrappers.h" |
50 | 52 | ||
@@ -270,6 +272,152 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum) | |||
270 | return tce_ret; | 272 | return tce_ret; |
271 | } | 273 | } |
272 | 274 | ||
275 | /* this is compatable with cells for the device tree property */ | ||
276 | struct dynamic_dma_window_prop { | ||
277 | __be32 liobn; /* tce table number */ | ||
278 | __be64 dma_base; /* address hi,lo */ | ||
279 | __be32 tce_shift; /* ilog2(tce_page_size) */ | ||
280 | __be32 window_shift; /* ilog2(tce_window_size) */ | ||
281 | }; | ||
282 | |||
283 | struct direct_window { | ||
284 | struct device_node *device; | ||
285 | const struct dynamic_dma_window_prop *prop; | ||
286 | struct list_head list; | ||
287 | }; | ||
288 | |||
289 | /* Dynamic DMA Window support */ | ||
290 | struct ddw_query_response { | ||
291 | u32 windows_available; | ||
292 | u32 largest_available_block; | ||
293 | u32 page_size; | ||
294 | u32 migration_capable; | ||
295 | }; | ||
296 | |||
297 | struct ddw_create_response { | ||
298 | u32 liobn; | ||
299 | u32 addr_hi; | ||
300 | u32 addr_lo; | ||
301 | }; | ||
302 | |||
303 | static LIST_HEAD(direct_window_list); | ||
304 | /* prevents races between memory on/offline and window creation */ | ||
305 | static DEFINE_SPINLOCK(direct_window_list_lock); | ||
306 | /* protects initializing window twice for same device */ | ||
307 | static DEFINE_MUTEX(direct_window_init_mutex); | ||
308 | #define DIRECT64_PROPNAME "linux,direct64-ddr-window-info" | ||
309 | |||
310 | static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn, | ||
311 | unsigned long num_pfn, const void *arg) | ||
312 | { | ||
313 | const struct dynamic_dma_window_prop *maprange = arg; | ||
314 | int rc; | ||
315 | u64 tce_size, num_tce, dma_offset, next; | ||
316 | u32 tce_shift; | ||
317 | long limit; | ||
318 | |||
319 | tce_shift = be32_to_cpu(maprange->tce_shift); | ||
320 | tce_size = 1ULL << tce_shift; | ||
321 | next = start_pfn << PAGE_SHIFT; | ||
322 | num_tce = num_pfn << PAGE_SHIFT; | ||
323 | |||
324 | /* round back to the beginning of the tce page size */ | ||
325 | num_tce += next & (tce_size - 1); | ||
326 | next &= ~(tce_size - 1); | ||
327 | |||
328 | /* covert to number of tces */ | ||
329 | num_tce |= tce_size - 1; | ||
330 | num_tce >>= tce_shift; | ||
331 | |||
332 | do { | ||
333 | /* | ||
334 | * Set up the page with TCE data, looping through and setting | ||
335 | * the values. | ||
336 | */ | ||
337 | limit = min_t(long, num_tce, 512); | ||
338 | dma_offset = next + be64_to_cpu(maprange->dma_base); | ||
339 | |||
340 | rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn), | ||
341 | dma_offset, | ||
342 | 0, limit); | ||
343 | num_tce -= limit; | ||
344 | } while (num_tce > 0 && !rc); | ||
345 | |||
346 | return rc; | ||
347 | } | ||
348 | |||
349 | static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, | ||
350 | unsigned long num_pfn, const void *arg) | ||
351 | { | ||
352 | const struct dynamic_dma_window_prop *maprange = arg; | ||
353 | u64 *tcep, tce_size, num_tce, dma_offset, next, proto_tce, liobn; | ||
354 | u32 tce_shift; | ||
355 | u64 rc = 0; | ||
356 | long l, limit; | ||
357 | |||
358 | local_irq_disable(); /* to protect tcep and the page behind it */ | ||
359 | tcep = __get_cpu_var(tce_page); | ||
360 | |||
361 | if (!tcep) { | ||
362 | tcep = (u64 *)__get_free_page(GFP_ATOMIC); | ||
363 | if (!tcep) { | ||
364 | local_irq_enable(); | ||
365 | return -ENOMEM; | ||
366 | } | ||
367 | __get_cpu_var(tce_page) = tcep; | ||
368 | } | ||
369 | |||
370 | proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; | ||
371 | |||
372 | liobn = (u64)be32_to_cpu(maprange->liobn); | ||
373 | tce_shift = be32_to_cpu(maprange->tce_shift); | ||
374 | tce_size = 1ULL << tce_shift; | ||
375 | next = start_pfn << PAGE_SHIFT; | ||
376 | num_tce = num_pfn << PAGE_SHIFT; | ||
377 | |||
378 | /* round back to the beginning of the tce page size */ | ||
379 | num_tce += next & (tce_size - 1); | ||
380 | next &= ~(tce_size - 1); | ||
381 | |||
382 | /* covert to number of tces */ | ||
383 | num_tce |= tce_size - 1; | ||
384 | num_tce >>= tce_shift; | ||
385 | |||
386 | /* We can map max one pageful of TCEs at a time */ | ||
387 | do { | ||
388 | /* | ||
389 | * Set up the page with TCE data, looping through and setting | ||
390 | * the values. | ||
391 | */ | ||
392 | limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE); | ||
393 | dma_offset = next + be64_to_cpu(maprange->dma_base); | ||
394 | |||
395 | for (l = 0; l < limit; l++) { | ||
396 | tcep[l] = proto_tce | next; | ||
397 | next += tce_size; | ||
398 | } | ||
399 | |||
400 | rc = plpar_tce_put_indirect(liobn, | ||
401 | dma_offset, | ||
402 | (u64)virt_to_abs(tcep), | ||
403 | limit); | ||
404 | |||
405 | num_tce -= limit; | ||
406 | } while (num_tce > 0 && !rc); | ||
407 | |||
408 | /* error cleanup: caller will clear whole range */ | ||
409 | |||
410 | local_irq_enable(); | ||
411 | return rc; | ||
412 | } | ||
413 | |||
414 | static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn, | ||
415 | unsigned long num_pfn, void *arg) | ||
416 | { | ||
417 | return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg); | ||
418 | } | ||
419 | |||
420 | |||
273 | #ifdef CONFIG_PCI | 421 | #ifdef CONFIG_PCI |
274 | static void iommu_table_setparms(struct pci_controller *phb, | 422 | static void iommu_table_setparms(struct pci_controller *phb, |
275 | struct device_node *dn, | 423 | struct device_node *dn, |
@@ -495,6 +643,329 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) | |||
495 | pci_name(dev)); | 643 | pci_name(dev)); |
496 | } | 644 | } |
497 | 645 | ||
646 | static int __read_mostly disable_ddw; | ||
647 | |||
648 | static int __init disable_ddw_setup(char *str) | ||
649 | { | ||
650 | disable_ddw = 1; | ||
651 | printk(KERN_INFO "ppc iommu: disabling ddw.\n"); | ||
652 | |||
653 | return 0; | ||
654 | } | ||
655 | |||
656 | early_param("disable_ddw", disable_ddw_setup); | ||
657 | |||
658 | static void remove_ddw(struct device_node *np) | ||
659 | { | ||
660 | struct dynamic_dma_window_prop *dwp; | ||
661 | struct property *win64; | ||
662 | const u32 *ddr_avail; | ||
663 | u64 liobn; | ||
664 | int len, ret; | ||
665 | |||
666 | ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len); | ||
667 | win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); | ||
668 | if (!win64 || !ddr_avail || len < 3 * sizeof(u32)) | ||
669 | return; | ||
670 | |||
671 | dwp = win64->value; | ||
672 | liobn = (u64)be32_to_cpu(dwp->liobn); | ||
673 | |||
674 | /* clear the whole window, note the arg is in kernel pages */ | ||
675 | ret = tce_clearrange_multi_pSeriesLP(0, | ||
676 | 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp); | ||
677 | if (ret) | ||
678 | pr_warning("%s failed to clear tces in window.\n", | ||
679 | np->full_name); | ||
680 | else | ||
681 | pr_debug("%s successfully cleared tces in window.\n", | ||
682 | np->full_name); | ||
683 | |||
684 | ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn); | ||
685 | if (ret) | ||
686 | pr_warning("%s: failed to remove direct window: rtas returned " | ||
687 | "%d to ibm,remove-pe-dma-window(%x) %llx\n", | ||
688 | np->full_name, ret, ddr_avail[2], liobn); | ||
689 | else | ||
690 | pr_debug("%s: successfully removed direct window: rtas returned " | ||
691 | "%d to ibm,remove-pe-dma-window(%x) %llx\n", | ||
692 | np->full_name, ret, ddr_avail[2], liobn); | ||
693 | } | ||
694 | |||
695 | |||
696 | static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn) | ||
697 | { | ||
698 | struct device_node *dn; | ||
699 | struct pci_dn *pcidn; | ||
700 | struct direct_window *window; | ||
701 | const struct dynamic_dma_window_prop *direct64; | ||
702 | u64 dma_addr = 0; | ||
703 | |||
704 | dn = pci_device_to_OF_node(dev); | ||
705 | pcidn = PCI_DN(dn); | ||
706 | spin_lock(&direct_window_list_lock); | ||
707 | /* check if we already created a window and dupe that config if so */ | ||
708 | list_for_each_entry(window, &direct_window_list, list) { | ||
709 | if (window->device == pdn) { | ||
710 | direct64 = window->prop; | ||
711 | dma_addr = direct64->dma_base; | ||
712 | break; | ||
713 | } | ||
714 | } | ||
715 | spin_unlock(&direct_window_list_lock); | ||
716 | |||
717 | return dma_addr; | ||
718 | } | ||
719 | |||
720 | static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn) | ||
721 | { | ||
722 | struct device_node *dn; | ||
723 | struct pci_dn *pcidn; | ||
724 | int len; | ||
725 | struct direct_window *window; | ||
726 | const struct dynamic_dma_window_prop *direct64; | ||
727 | u64 dma_addr = 0; | ||
728 | |||
729 | dn = pci_device_to_OF_node(dev); | ||
730 | pcidn = PCI_DN(dn); | ||
731 | direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len); | ||
732 | if (direct64) { | ||
733 | window = kzalloc(sizeof(*window), GFP_KERNEL); | ||
734 | if (!window) { | ||
735 | remove_ddw(pdn); | ||
736 | } else { | ||
737 | window->device = pdn; | ||
738 | window->prop = direct64; | ||
739 | spin_lock(&direct_window_list_lock); | ||
740 | list_add(&window->list, &direct_window_list); | ||
741 | spin_unlock(&direct_window_list_lock); | ||
742 | dma_addr = direct64->dma_base; | ||
743 | } | ||
744 | } | ||
745 | |||
746 | return dma_addr; | ||
747 | } | ||
748 | |||
749 | static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail, | ||
750 | struct ddw_query_response *query) | ||
751 | { | ||
752 | struct device_node *dn; | ||
753 | struct pci_dn *pcidn; | ||
754 | u32 cfg_addr; | ||
755 | u64 buid; | ||
756 | int ret; | ||
757 | |||
758 | /* | ||
759 | * Get the config address and phb buid of the PE window. | ||
760 | * Rely on eeh to retrieve this for us. | ||
761 | * Retrieve them from the pci device, not the node with the | ||
762 | * dma-window property | ||
763 | */ | ||
764 | dn = pci_device_to_OF_node(dev); | ||
765 | pcidn = PCI_DN(dn); | ||
766 | cfg_addr = pcidn->eeh_config_addr; | ||
767 | if (pcidn->eeh_pe_config_addr) | ||
768 | cfg_addr = pcidn->eeh_pe_config_addr; | ||
769 | buid = pcidn->phb->buid; | ||
770 | ret = rtas_call(ddr_avail[0], 3, 5, (u32 *)query, | ||
771 | cfg_addr, BUID_HI(buid), BUID_LO(buid)); | ||
772 | dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x" | ||
773 | " returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid), | ||
774 | BUID_LO(buid), ret); | ||
775 | return ret; | ||
776 | } | ||
777 | |||
778 | static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail, | ||
779 | struct ddw_create_response *create, int page_shift, | ||
780 | int window_shift) | ||
781 | { | ||
782 | struct device_node *dn; | ||
783 | struct pci_dn *pcidn; | ||
784 | u32 cfg_addr; | ||
785 | u64 buid; | ||
786 | int ret; | ||
787 | |||
788 | /* | ||
789 | * Get the config address and phb buid of the PE window. | ||
790 | * Rely on eeh to retrieve this for us. | ||
791 | * Retrieve them from the pci device, not the node with the | ||
792 | * dma-window property | ||
793 | */ | ||
794 | dn = pci_device_to_OF_node(dev); | ||
795 | pcidn = PCI_DN(dn); | ||
796 | cfg_addr = pcidn->eeh_config_addr; | ||
797 | if (pcidn->eeh_pe_config_addr) | ||
798 | cfg_addr = pcidn->eeh_pe_config_addr; | ||
799 | buid = pcidn->phb->buid; | ||
800 | |||
801 | do { | ||
802 | /* extra outputs are LIOBN and dma-addr (hi, lo) */ | ||
803 | ret = rtas_call(ddr_avail[1], 5, 4, (u32 *)create, cfg_addr, | ||
804 | BUID_HI(buid), BUID_LO(buid), page_shift, window_shift); | ||
805 | } while (rtas_busy_delay(ret)); | ||
806 | dev_info(&dev->dev, | ||
807 | "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d " | ||
808 | "(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1], | ||
809 | cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift, | ||
810 | window_shift, ret, create->liobn, create->addr_hi, create->addr_lo); | ||
811 | |||
812 | return ret; | ||
813 | } | ||
814 | |||
815 | /* | ||
816 | * If the PE supports dynamic dma windows, and there is space for a table | ||
817 | * that can map all pages in a linear offset, then setup such a table, | ||
818 | * and record the dma-offset in the struct device. | ||
819 | * | ||
820 | * dev: the pci device we are checking | ||
821 | * pdn: the parent pe node with the ibm,dma_window property | ||
822 | * Future: also check if we can remap the base window for our base page size | ||
823 | * | ||
824 | * returns the dma offset for use by dma_set_mask | ||
825 | */ | ||
826 | static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) | ||
827 | { | ||
828 | int len, ret; | ||
829 | struct ddw_query_response query; | ||
830 | struct ddw_create_response create; | ||
831 | int page_shift; | ||
832 | u64 dma_addr, max_addr; | ||
833 | struct device_node *dn; | ||
834 | const u32 *uninitialized_var(ddr_avail); | ||
835 | struct direct_window *window; | ||
836 | struct property *uninitialized_var(win64); | ||
837 | struct dynamic_dma_window_prop *ddwprop; | ||
838 | |||
839 | mutex_lock(&direct_window_init_mutex); | ||
840 | |||
841 | dma_addr = dupe_ddw_if_already_created(dev, pdn); | ||
842 | if (dma_addr != 0) | ||
843 | goto out_unlock; | ||
844 | |||
845 | dma_addr = dupe_ddw_if_kexec(dev, pdn); | ||
846 | if (dma_addr != 0) | ||
847 | goto out_unlock; | ||
848 | |||
849 | /* | ||
850 | * the ibm,ddw-applicable property holds the tokens for: | ||
851 | * ibm,query-pe-dma-window | ||
852 | * ibm,create-pe-dma-window | ||
853 | * ibm,remove-pe-dma-window | ||
854 | * for the given node in that order. | ||
855 | * the property is actually in the parent, not the PE | ||
856 | */ | ||
857 | ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len); | ||
858 | if (!ddr_avail || len < 3 * sizeof(u32)) | ||
859 | goto out_unlock; | ||
860 | |||
861 | /* | ||
862 | * Query if there is a second window of size to map the | ||
863 | * whole partition. Query returns number of windows, largest | ||
864 | * block assigned to PE (partition endpoint), and two bitmasks | ||
865 | * of page sizes: supported and supported for migrate-dma. | ||
866 | */ | ||
867 | dn = pci_device_to_OF_node(dev); | ||
868 | ret = query_ddw(dev, ddr_avail, &query); | ||
869 | if (ret != 0) | ||
870 | goto out_unlock; | ||
871 | |||
872 | if (query.windows_available == 0) { | ||
873 | /* | ||
874 | * no additional windows are available for this device. | ||
875 | * We might be able to reallocate the existing window, | ||
876 | * trading in for a larger page size. | ||
877 | */ | ||
878 | dev_dbg(&dev->dev, "no free dynamic windows"); | ||
879 | goto out_unlock; | ||
880 | } | ||
881 | if (query.page_size & 4) { | ||
882 | page_shift = 24; /* 16MB */ | ||
883 | } else if (query.page_size & 2) { | ||
884 | page_shift = 16; /* 64kB */ | ||
885 | } else if (query.page_size & 1) { | ||
886 | page_shift = 12; /* 4kB */ | ||
887 | } else { | ||
888 | dev_dbg(&dev->dev, "no supported direct page size in mask %x", | ||
889 | query.page_size); | ||
890 | goto out_unlock; | ||
891 | } | ||
892 | /* verify the window * number of ptes will map the partition */ | ||
893 | /* check largest block * page size > max memory hotplug addr */ | ||
894 | max_addr = memory_hotplug_max(); | ||
895 | if (query.largest_available_block < (max_addr >> page_shift)) { | ||
896 | dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u " | ||
897 | "%llu-sized pages\n", max_addr, query.largest_available_block, | ||
898 | 1ULL << page_shift); | ||
899 | goto out_unlock; | ||
900 | } | ||
901 | len = order_base_2(max_addr); | ||
902 | win64 = kzalloc(sizeof(struct property), GFP_KERNEL); | ||
903 | if (!win64) { | ||
904 | dev_info(&dev->dev, | ||
905 | "couldn't allocate property for 64bit dma window\n"); | ||
906 | goto out_unlock; | ||
907 | } | ||
908 | win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL); | ||
909 | win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL); | ||
910 | if (!win64->name || !win64->value) { | ||
911 | dev_info(&dev->dev, | ||
912 | "couldn't allocate property name and value\n"); | ||
913 | goto out_free_prop; | ||
914 | } | ||
915 | |||
916 | ret = create_ddw(dev, ddr_avail, &create, page_shift, len); | ||
917 | if (ret != 0) | ||
918 | goto out_free_prop; | ||
919 | |||
920 | ddwprop->liobn = cpu_to_be32(create.liobn); | ||
921 | ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2)); | ||
922 | ddwprop->tce_shift = cpu_to_be32(page_shift); | ||
923 | ddwprop->window_shift = cpu_to_be32(len); | ||
924 | |||
925 | dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %s\n", | ||
926 | create.liobn, dn->full_name); | ||
927 | |||
928 | window = kzalloc(sizeof(*window), GFP_KERNEL); | ||
929 | if (!window) | ||
930 | goto out_clear_window; | ||
931 | |||
932 | ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT, | ||
933 | win64->value, tce_setrange_multi_pSeriesLP_walk); | ||
934 | if (ret) { | ||
935 | dev_info(&dev->dev, "failed to map direct window for %s: %d\n", | ||
936 | dn->full_name, ret); | ||
937 | goto out_clear_window; | ||
938 | } | ||
939 | |||
940 | ret = prom_add_property(pdn, win64); | ||
941 | if (ret) { | ||
942 | dev_err(&dev->dev, "unable to add dma window property for %s: %d", | ||
943 | pdn->full_name, ret); | ||
944 | goto out_clear_window; | ||
945 | } | ||
946 | |||
947 | window->device = pdn; | ||
948 | window->prop = ddwprop; | ||
949 | spin_lock(&direct_window_list_lock); | ||
950 | list_add(&window->list, &direct_window_list); | ||
951 | spin_unlock(&direct_window_list_lock); | ||
952 | |||
953 | dma_addr = of_read_number(&create.addr_hi, 2); | ||
954 | goto out_unlock; | ||
955 | |||
956 | out_clear_window: | ||
957 | remove_ddw(pdn); | ||
958 | |||
959 | out_free_prop: | ||
960 | kfree(win64->name); | ||
961 | kfree(win64->value); | ||
962 | kfree(win64); | ||
963 | |||
964 | out_unlock: | ||
965 | mutex_unlock(&direct_window_init_mutex); | ||
966 | return dma_addr; | ||
967 | } | ||
968 | |||
498 | static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) | 969 | static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) |
499 | { | 970 | { |
500 | struct device_node *pdn, *dn; | 971 | struct device_node *pdn, *dn; |
@@ -541,23 +1012,137 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) | |||
541 | 1012 | ||
542 | set_iommu_table_base(&dev->dev, pci->iommu_table); | 1013 | set_iommu_table_base(&dev->dev, pci->iommu_table); |
543 | } | 1014 | } |
1015 | |||
1016 | static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) | ||
1017 | { | ||
1018 | bool ddw_enabled = false; | ||
1019 | struct device_node *pdn, *dn; | ||
1020 | struct pci_dev *pdev; | ||
1021 | const void *dma_window = NULL; | ||
1022 | u64 dma_offset; | ||
1023 | |||
1024 | if (!dev->dma_mask || !dma_supported(dev, dma_mask)) | ||
1025 | return -EIO; | ||
1026 | |||
1027 | /* only attempt to use a new window if 64-bit DMA is requested */ | ||
1028 | if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) { | ||
1029 | pdev = to_pci_dev(dev); | ||
1030 | |||
1031 | dn = pci_device_to_OF_node(pdev); | ||
1032 | dev_dbg(dev, "node is %s\n", dn->full_name); | ||
1033 | |||
1034 | /* | ||
1035 | * the device tree might contain the dma-window properties | ||
1036 | * per-device and not neccesarily for the bus. So we need to | ||
1037 | * search upwards in the tree until we either hit a dma-window | ||
1038 | * property, OR find a parent with a table already allocated. | ||
1039 | */ | ||
1040 | for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; | ||
1041 | pdn = pdn->parent) { | ||
1042 | dma_window = of_get_property(pdn, "ibm,dma-window", NULL); | ||
1043 | if (dma_window) | ||
1044 | break; | ||
1045 | } | ||
1046 | if (pdn && PCI_DN(pdn)) { | ||
1047 | dma_offset = enable_ddw(pdev, pdn); | ||
1048 | if (dma_offset != 0) { | ||
1049 | dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset); | ||
1050 | set_dma_offset(dev, dma_offset); | ||
1051 | set_dma_ops(dev, &dma_direct_ops); | ||
1052 | ddw_enabled = true; | ||
1053 | } | ||
1054 | } | ||
1055 | } | ||
1056 | |||
1057 | /* fall-through to iommu ops */ | ||
1058 | if (!ddw_enabled) { | ||
1059 | dev_info(dev, "Using 32-bit DMA via iommu\n"); | ||
1060 | set_dma_ops(dev, &dma_iommu_ops); | ||
1061 | } | ||
1062 | |||
1063 | *dev->dma_mask = dma_mask; | ||
1064 | return 0; | ||
1065 | } | ||
1066 | |||
544 | #else /* CONFIG_PCI */ | 1067 | #else /* CONFIG_PCI */ |
545 | #define pci_dma_bus_setup_pSeries NULL | 1068 | #define pci_dma_bus_setup_pSeries NULL |
546 | #define pci_dma_dev_setup_pSeries NULL | 1069 | #define pci_dma_dev_setup_pSeries NULL |
547 | #define pci_dma_bus_setup_pSeriesLP NULL | 1070 | #define pci_dma_bus_setup_pSeriesLP NULL |
548 | #define pci_dma_dev_setup_pSeriesLP NULL | 1071 | #define pci_dma_dev_setup_pSeriesLP NULL |
1072 | #define dma_set_mask_pSeriesLP NULL | ||
549 | #endif /* !CONFIG_PCI */ | 1073 | #endif /* !CONFIG_PCI */ |
550 | 1074 | ||
1075 | static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action, | ||
1076 | void *data) | ||
1077 | { | ||
1078 | struct direct_window *window; | ||
1079 | struct memory_notify *arg = data; | ||
1080 | int ret = 0; | ||
1081 | |||
1082 | switch (action) { | ||
1083 | case MEM_GOING_ONLINE: | ||
1084 | spin_lock(&direct_window_list_lock); | ||
1085 | list_for_each_entry(window, &direct_window_list, list) { | ||
1086 | ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn, | ||
1087 | arg->nr_pages, window->prop); | ||
1088 | /* XXX log error */ | ||
1089 | } | ||
1090 | spin_unlock(&direct_window_list_lock); | ||
1091 | break; | ||
1092 | case MEM_CANCEL_ONLINE: | ||
1093 | case MEM_OFFLINE: | ||
1094 | spin_lock(&direct_window_list_lock); | ||
1095 | list_for_each_entry(window, &direct_window_list, list) { | ||
1096 | ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn, | ||
1097 | arg->nr_pages, window->prop); | ||
1098 | /* XXX log error */ | ||
1099 | } | ||
1100 | spin_unlock(&direct_window_list_lock); | ||
1101 | break; | ||
1102 | default: | ||
1103 | break; | ||
1104 | } | ||
1105 | if (ret && action != MEM_CANCEL_ONLINE) | ||
1106 | return NOTIFY_BAD; | ||
1107 | |||
1108 | return NOTIFY_OK; | ||
1109 | } | ||
1110 | |||
1111 | static struct notifier_block iommu_mem_nb = { | ||
1112 | .notifier_call = iommu_mem_notifier, | ||
1113 | }; | ||
1114 | |||
551 | static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) | 1115 | static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) |
552 | { | 1116 | { |
553 | int err = NOTIFY_OK; | 1117 | int err = NOTIFY_OK; |
554 | struct device_node *np = node; | 1118 | struct device_node *np = node; |
555 | struct pci_dn *pci = PCI_DN(np); | 1119 | struct pci_dn *pci = PCI_DN(np); |
1120 | struct direct_window *window; | ||
556 | 1121 | ||
557 | switch (action) { | 1122 | switch (action) { |
558 | case PSERIES_RECONFIG_REMOVE: | 1123 | case PSERIES_RECONFIG_REMOVE: |
559 | if (pci && pci->iommu_table) | 1124 | if (pci && pci->iommu_table) |
560 | iommu_free_table(pci->iommu_table, np->full_name); | 1125 | iommu_free_table(pci->iommu_table, np->full_name); |
1126 | |||
1127 | spin_lock(&direct_window_list_lock); | ||
1128 | list_for_each_entry(window, &direct_window_list, list) { | ||
1129 | if (window->device == np) { | ||
1130 | list_del(&window->list); | ||
1131 | kfree(window); | ||
1132 | break; | ||
1133 | } | ||
1134 | } | ||
1135 | spin_unlock(&direct_window_list_lock); | ||
1136 | |||
1137 | /* | ||
1138 | * Because the notifier runs after isolation of the | ||
1139 | * slot, we are guaranteed any DMA window has already | ||
1140 | * been revoked and the TCEs have been marked invalid, | ||
1141 | * so we don't need a call to remove_ddw(np). However, | ||
1142 | * if an additional notifier action is added before the | ||
1143 | * isolate call, we should update this code for | ||
1144 | * completeness with such a call. | ||
1145 | */ | ||
561 | break; | 1146 | break; |
562 | default: | 1147 | default: |
563 | err = NOTIFY_DONE; | 1148 | err = NOTIFY_DONE; |
@@ -587,6 +1172,7 @@ void iommu_init_early_pSeries(void) | |||
587 | ppc_md.tce_get = tce_get_pSeriesLP; | 1172 | ppc_md.tce_get = tce_get_pSeriesLP; |
588 | ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeriesLP; | 1173 | ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeriesLP; |
589 | ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeriesLP; | 1174 | ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeriesLP; |
1175 | ppc_md.dma_set_mask = dma_set_mask_pSeriesLP; | ||
590 | } else { | 1176 | } else { |
591 | ppc_md.tce_build = tce_build_pSeries; | 1177 | ppc_md.tce_build = tce_build_pSeries; |
592 | ppc_md.tce_free = tce_free_pSeries; | 1178 | ppc_md.tce_free = tce_free_pSeries; |
@@ -597,6 +1183,7 @@ void iommu_init_early_pSeries(void) | |||
597 | 1183 | ||
598 | 1184 | ||
599 | pSeries_reconfig_notifier_register(&iommu_reconfig_nb); | 1185 | pSeries_reconfig_notifier_register(&iommu_reconfig_nb); |
1186 | register_memory_notifier(&iommu_mem_nb); | ||
600 | 1187 | ||
601 | set_pci_dma_ops(&dma_iommu_ops); | 1188 | set_pci_dma_ops(&dma_iommu_ops); |
602 | } | 1189 | } |