aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-03-18 09:31:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-18 09:31:43 -0400
commit0a95d92c0054e74fb79607ac2df958b7bf295706 (patch)
treee2c5f836e799dcfd72904949be47595af91432e7 /arch/powerpc/platforms/pseries
parent08351fc6a75731226e1112fc7254542bd3a2912e (diff)
parent831532035b12a5f7b600515a6f4da0b207b82d6e (diff)
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (62 commits) powerpc/85xx: Fix signedness bug in cache-sram powerpc/fsl: 85xx: document cache sram bindings powerpc/fsl: define binding for fsl mpic interrupt controllers powerpc/fsl_msi: Handle msi-available-ranges better drivers/serial/ucc_uart.c: Add of_node_put to avoid memory leak powerpc/85xx: Fix SPE float to integer conversion failure powerpc/85xx: Update sata controller compatible for p1022ds board ATA: Add FSL sata v2 controller support powerpc/mpc8xxx_gpio: simplify searching for 'fsl, qoriq-gpio' compatiable powerpc/8xx: remove obsolete mgsuvd board powerpc/82xx: rename and update mgcoge board support powerpc/83xx: rename and update kmeter1 powerpc/85xx: Workaroudn e500 CPU erratum A005 powerpc/fsl_pci: Add support for FSL PCIe controllers v2.x powerpc/85xx: Fix writing to spin table 'cpu-release-addr' on ppc64e powerpc/pseries: Disable MSI using new interface if possible powerpc: Enable GENERIC_HARDIRQS_NO_DEPRECATED. powerpc: core irq_data conversion. powerpc: sysdev/xilinx_intc irq_data conversion. powerpc: sysdev/uic irq_data conversion. ... Fix up conflicts in arch/powerpc/sysdev/fsl_msi.c (due to getting rid of of_platform_driver in arch/powerpc)
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c14
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c587
-rw-r--r--arch/powerpc/platforms/pseries/msi.c14
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c255
-rw-r--r--arch/powerpc/platforms/pseries/setup.c5
-rw-r--r--arch/powerpc/platforms/pseries/xics.c89
7 files changed, 849 insertions, 117 deletions
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index f4803868642c..3cafc306b971 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -508,12 +508,7 @@ static int cmm_memory_isolate_cb(struct notifier_block *self,
508 if (action == MEM_ISOLATE_COUNT) 508 if (action == MEM_ISOLATE_COUNT)
509 ret = cmm_count_pages(arg); 509 ret = cmm_count_pages(arg);
510 510
511 if (ret) 511 return notifier_from_errno(ret);
512 ret = notifier_from_errno(ret);
513 else
514 ret = NOTIFY_OK;
515
516 return ret;
517} 512}
518 513
519static struct notifier_block cmm_mem_isolate_nb = { 514static struct notifier_block cmm_mem_isolate_nb = {
@@ -635,12 +630,7 @@ static int cmm_memory_cb(struct notifier_block *self,
635 break; 630 break;
636 } 631 }
637 632
638 if (ret) 633 return notifier_from_errno(ret);
639 ret = notifier_from_errno(ret);
640 else
641 ret = NOTIFY_OK;
642
643 return ret;
644} 634}
645 635
646static struct notifier_block cmm_mem_nb = { 636static struct notifier_block cmm_mem_nb = {
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 17a11c82e6f8..3cc4d102b1f1 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -876,7 +876,7 @@ void eeh_restore_bars(struct pci_dn *pdn)
876 * 876 *
877 * Save the values of the device bars. Unlike the restore 877 * Save the values of the device bars. Unlike the restore
878 * routine, this routine is *not* recursive. This is because 878 * routine, this routine is *not* recursive. This is because
879 * PCI devices are added individuallly; but, for the restore, 879 * PCI devices are added individually; but, for the restore,
880 * an entire slot is reset at a time. 880 * an entire slot is reset at a time.
881 */ 881 */
882static void eeh_save_bars(struct pci_dn *pdn) 882static void eeh_save_bars(struct pci_dn *pdn)
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index edea60b7ee90..154c464cdca5 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -33,6 +33,7 @@
33#include <linux/pci.h> 33#include <linux/pci.h>
34#include <linux/dma-mapping.h> 34#include <linux/dma-mapping.h>
35#include <linux/crash_dump.h> 35#include <linux/crash_dump.h>
36#include <linux/memory.h>
36#include <asm/io.h> 37#include <asm/io.h>
37#include <asm/prom.h> 38#include <asm/prom.h>
38#include <asm/rtas.h> 39#include <asm/rtas.h>
@@ -45,6 +46,7 @@
45#include <asm/tce.h> 46#include <asm/tce.h>
46#include <asm/ppc-pci.h> 47#include <asm/ppc-pci.h>
47#include <asm/udbg.h> 48#include <asm/udbg.h>
49#include <asm/mmzone.h>
48 50
49#include "plpar_wrappers.h" 51#include "plpar_wrappers.h"
50 52
@@ -270,6 +272,152 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
270 return tce_ret; 272 return tce_ret;
271} 273}
272 274
275/* this is compatable with cells for the device tree property */
276struct dynamic_dma_window_prop {
277 __be32 liobn; /* tce table number */
278 __be64 dma_base; /* address hi,lo */
279 __be32 tce_shift; /* ilog2(tce_page_size) */
280 __be32 window_shift; /* ilog2(tce_window_size) */
281};
282
283struct direct_window {
284 struct device_node *device;
285 const struct dynamic_dma_window_prop *prop;
286 struct list_head list;
287};
288
289/* Dynamic DMA Window support */
290struct ddw_query_response {
291 u32 windows_available;
292 u32 largest_available_block;
293 u32 page_size;
294 u32 migration_capable;
295};
296
297struct ddw_create_response {
298 u32 liobn;
299 u32 addr_hi;
300 u32 addr_lo;
301};
302
303static LIST_HEAD(direct_window_list);
304/* prevents races between memory on/offline and window creation */
305static DEFINE_SPINLOCK(direct_window_list_lock);
306/* protects initializing window twice for same device */
307static DEFINE_MUTEX(direct_window_init_mutex);
308#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
309
310static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
311 unsigned long num_pfn, const void *arg)
312{
313 const struct dynamic_dma_window_prop *maprange = arg;
314 int rc;
315 u64 tce_size, num_tce, dma_offset, next;
316 u32 tce_shift;
317 long limit;
318
319 tce_shift = be32_to_cpu(maprange->tce_shift);
320 tce_size = 1ULL << tce_shift;
321 next = start_pfn << PAGE_SHIFT;
322 num_tce = num_pfn << PAGE_SHIFT;
323
324 /* round back to the beginning of the tce page size */
325 num_tce += next & (tce_size - 1);
326 next &= ~(tce_size - 1);
327
328 /* covert to number of tces */
329 num_tce |= tce_size - 1;
330 num_tce >>= tce_shift;
331
332 do {
333 /*
334 * Set up the page with TCE data, looping through and setting
335 * the values.
336 */
337 limit = min_t(long, num_tce, 512);
338 dma_offset = next + be64_to_cpu(maprange->dma_base);
339
340 rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn),
341 dma_offset,
342 0, limit);
343 num_tce -= limit;
344 } while (num_tce > 0 && !rc);
345
346 return rc;
347}
348
349static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
350 unsigned long num_pfn, const void *arg)
351{
352 const struct dynamic_dma_window_prop *maprange = arg;
353 u64 *tcep, tce_size, num_tce, dma_offset, next, proto_tce, liobn;
354 u32 tce_shift;
355 u64 rc = 0;
356 long l, limit;
357
358 local_irq_disable(); /* to protect tcep and the page behind it */
359 tcep = __get_cpu_var(tce_page);
360
361 if (!tcep) {
362 tcep = (u64 *)__get_free_page(GFP_ATOMIC);
363 if (!tcep) {
364 local_irq_enable();
365 return -ENOMEM;
366 }
367 __get_cpu_var(tce_page) = tcep;
368 }
369
370 proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
371
372 liobn = (u64)be32_to_cpu(maprange->liobn);
373 tce_shift = be32_to_cpu(maprange->tce_shift);
374 tce_size = 1ULL << tce_shift;
375 next = start_pfn << PAGE_SHIFT;
376 num_tce = num_pfn << PAGE_SHIFT;
377
378 /* round back to the beginning of the tce page size */
379 num_tce += next & (tce_size - 1);
380 next &= ~(tce_size - 1);
381
382 /* covert to number of tces */
383 num_tce |= tce_size - 1;
384 num_tce >>= tce_shift;
385
386 /* We can map max one pageful of TCEs at a time */
387 do {
388 /*
389 * Set up the page with TCE data, looping through and setting
390 * the values.
391 */
392 limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE);
393 dma_offset = next + be64_to_cpu(maprange->dma_base);
394
395 for (l = 0; l < limit; l++) {
396 tcep[l] = proto_tce | next;
397 next += tce_size;
398 }
399
400 rc = plpar_tce_put_indirect(liobn,
401 dma_offset,
402 (u64)virt_to_abs(tcep),
403 limit);
404
405 num_tce -= limit;
406 } while (num_tce > 0 && !rc);
407
408 /* error cleanup: caller will clear whole range */
409
410 local_irq_enable();
411 return rc;
412}
413
414static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
415 unsigned long num_pfn, void *arg)
416{
417 return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
418}
419
420
273#ifdef CONFIG_PCI 421#ifdef CONFIG_PCI
274static void iommu_table_setparms(struct pci_controller *phb, 422static void iommu_table_setparms(struct pci_controller *phb,
275 struct device_node *dn, 423 struct device_node *dn,
@@ -495,6 +643,329 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
495 pci_name(dev)); 643 pci_name(dev));
496} 644}
497 645
646static int __read_mostly disable_ddw;
647
648static int __init disable_ddw_setup(char *str)
649{
650 disable_ddw = 1;
651 printk(KERN_INFO "ppc iommu: disabling ddw.\n");
652
653 return 0;
654}
655
656early_param("disable_ddw", disable_ddw_setup);
657
658static void remove_ddw(struct device_node *np)
659{
660 struct dynamic_dma_window_prop *dwp;
661 struct property *win64;
662 const u32 *ddr_avail;
663 u64 liobn;
664 int len, ret;
665
666 ddr_avail = of_get_property(np, "ibm,ddw-applicable", &len);
667 win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
668 if (!win64 || !ddr_avail || len < 3 * sizeof(u32))
669 return;
670
671 dwp = win64->value;
672 liobn = (u64)be32_to_cpu(dwp->liobn);
673
674 /* clear the whole window, note the arg is in kernel pages */
675 ret = tce_clearrange_multi_pSeriesLP(0,
676 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
677 if (ret)
678 pr_warning("%s failed to clear tces in window.\n",
679 np->full_name);
680 else
681 pr_debug("%s successfully cleared tces in window.\n",
682 np->full_name);
683
684 ret = rtas_call(ddr_avail[2], 1, 1, NULL, liobn);
685 if (ret)
686 pr_warning("%s: failed to remove direct window: rtas returned "
687 "%d to ibm,remove-pe-dma-window(%x) %llx\n",
688 np->full_name, ret, ddr_avail[2], liobn);
689 else
690 pr_debug("%s: successfully removed direct window: rtas returned "
691 "%d to ibm,remove-pe-dma-window(%x) %llx\n",
692 np->full_name, ret, ddr_avail[2], liobn);
693}
694
695
696static int dupe_ddw_if_already_created(struct pci_dev *dev, struct device_node *pdn)
697{
698 struct device_node *dn;
699 struct pci_dn *pcidn;
700 struct direct_window *window;
701 const struct dynamic_dma_window_prop *direct64;
702 u64 dma_addr = 0;
703
704 dn = pci_device_to_OF_node(dev);
705 pcidn = PCI_DN(dn);
706 spin_lock(&direct_window_list_lock);
707 /* check if we already created a window and dupe that config if so */
708 list_for_each_entry(window, &direct_window_list, list) {
709 if (window->device == pdn) {
710 direct64 = window->prop;
711 dma_addr = direct64->dma_base;
712 break;
713 }
714 }
715 spin_unlock(&direct_window_list_lock);
716
717 return dma_addr;
718}
719
720static u64 dupe_ddw_if_kexec(struct pci_dev *dev, struct device_node *pdn)
721{
722 struct device_node *dn;
723 struct pci_dn *pcidn;
724 int len;
725 struct direct_window *window;
726 const struct dynamic_dma_window_prop *direct64;
727 u64 dma_addr = 0;
728
729 dn = pci_device_to_OF_node(dev);
730 pcidn = PCI_DN(dn);
731 direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
732 if (direct64) {
733 window = kzalloc(sizeof(*window), GFP_KERNEL);
734 if (!window) {
735 remove_ddw(pdn);
736 } else {
737 window->device = pdn;
738 window->prop = direct64;
739 spin_lock(&direct_window_list_lock);
740 list_add(&window->list, &direct_window_list);
741 spin_unlock(&direct_window_list_lock);
742 dma_addr = direct64->dma_base;
743 }
744 }
745
746 return dma_addr;
747}
748
749static int query_ddw(struct pci_dev *dev, const u32 *ddr_avail,
750 struct ddw_query_response *query)
751{
752 struct device_node *dn;
753 struct pci_dn *pcidn;
754 u32 cfg_addr;
755 u64 buid;
756 int ret;
757
758 /*
759 * Get the config address and phb buid of the PE window.
760 * Rely on eeh to retrieve this for us.
761 * Retrieve them from the pci device, not the node with the
762 * dma-window property
763 */
764 dn = pci_device_to_OF_node(dev);
765 pcidn = PCI_DN(dn);
766 cfg_addr = pcidn->eeh_config_addr;
767 if (pcidn->eeh_pe_config_addr)
768 cfg_addr = pcidn->eeh_pe_config_addr;
769 buid = pcidn->phb->buid;
770 ret = rtas_call(ddr_avail[0], 3, 5, (u32 *)query,
771 cfg_addr, BUID_HI(buid), BUID_LO(buid));
772 dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
773 " returned %d\n", ddr_avail[0], cfg_addr, BUID_HI(buid),
774 BUID_LO(buid), ret);
775 return ret;
776}
777
778static int create_ddw(struct pci_dev *dev, const u32 *ddr_avail,
779 struct ddw_create_response *create, int page_shift,
780 int window_shift)
781{
782 struct device_node *dn;
783 struct pci_dn *pcidn;
784 u32 cfg_addr;
785 u64 buid;
786 int ret;
787
788 /*
789 * Get the config address and phb buid of the PE window.
790 * Rely on eeh to retrieve this for us.
791 * Retrieve them from the pci device, not the node with the
792 * dma-window property
793 */
794 dn = pci_device_to_OF_node(dev);
795 pcidn = PCI_DN(dn);
796 cfg_addr = pcidn->eeh_config_addr;
797 if (pcidn->eeh_pe_config_addr)
798 cfg_addr = pcidn->eeh_pe_config_addr;
799 buid = pcidn->phb->buid;
800
801 do {
802 /* extra outputs are LIOBN and dma-addr (hi, lo) */
803 ret = rtas_call(ddr_avail[1], 5, 4, (u32 *)create, cfg_addr,
804 BUID_HI(buid), BUID_LO(buid), page_shift, window_shift);
805 } while (rtas_busy_delay(ret));
806 dev_info(&dev->dev,
807 "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
808 "(liobn = 0x%x starting addr = %x %x)\n", ddr_avail[1],
809 cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
810 window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
811
812 return ret;
813}
814
815/*
816 * If the PE supports dynamic dma windows, and there is space for a table
817 * that can map all pages in a linear offset, then setup such a table,
818 * and record the dma-offset in the struct device.
819 *
820 * dev: the pci device we are checking
821 * pdn: the parent pe node with the ibm,dma_window property
822 * Future: also check if we can remap the base window for our base page size
823 *
824 * returns the dma offset for use by dma_set_mask
825 */
826static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
827{
828 int len, ret;
829 struct ddw_query_response query;
830 struct ddw_create_response create;
831 int page_shift;
832 u64 dma_addr, max_addr;
833 struct device_node *dn;
834 const u32 *uninitialized_var(ddr_avail);
835 struct direct_window *window;
836 struct property *uninitialized_var(win64);
837 struct dynamic_dma_window_prop *ddwprop;
838
839 mutex_lock(&direct_window_init_mutex);
840
841 dma_addr = dupe_ddw_if_already_created(dev, pdn);
842 if (dma_addr != 0)
843 goto out_unlock;
844
845 dma_addr = dupe_ddw_if_kexec(dev, pdn);
846 if (dma_addr != 0)
847 goto out_unlock;
848
849 /*
850 * the ibm,ddw-applicable property holds the tokens for:
851 * ibm,query-pe-dma-window
852 * ibm,create-pe-dma-window
853 * ibm,remove-pe-dma-window
854 * for the given node in that order.
855 * the property is actually in the parent, not the PE
856 */
857 ddr_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
858 if (!ddr_avail || len < 3 * sizeof(u32))
859 goto out_unlock;
860
861 /*
862 * Query if there is a second window of size to map the
863 * whole partition. Query returns number of windows, largest
864 * block assigned to PE (partition endpoint), and two bitmasks
865 * of page sizes: supported and supported for migrate-dma.
866 */
867 dn = pci_device_to_OF_node(dev);
868 ret = query_ddw(dev, ddr_avail, &query);
869 if (ret != 0)
870 goto out_unlock;
871
872 if (query.windows_available == 0) {
873 /*
874 * no additional windows are available for this device.
875 * We might be able to reallocate the existing window,
876 * trading in for a larger page size.
877 */
878 dev_dbg(&dev->dev, "no free dynamic windows");
879 goto out_unlock;
880 }
881 if (query.page_size & 4) {
882 page_shift = 24; /* 16MB */
883 } else if (query.page_size & 2) {
884 page_shift = 16; /* 64kB */
885 } else if (query.page_size & 1) {
886 page_shift = 12; /* 4kB */
887 } else {
888 dev_dbg(&dev->dev, "no supported direct page size in mask %x",
889 query.page_size);
890 goto out_unlock;
891 }
892 /* verify the window * number of ptes will map the partition */
893 /* check largest block * page size > max memory hotplug addr */
894 max_addr = memory_hotplug_max();
895 if (query.largest_available_block < (max_addr >> page_shift)) {
896 dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u "
897 "%llu-sized pages\n", max_addr, query.largest_available_block,
898 1ULL << page_shift);
899 goto out_unlock;
900 }
901 len = order_base_2(max_addr);
902 win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
903 if (!win64) {
904 dev_info(&dev->dev,
905 "couldn't allocate property for 64bit dma window\n");
906 goto out_unlock;
907 }
908 win64->name = kstrdup(DIRECT64_PROPNAME, GFP_KERNEL);
909 win64->value = ddwprop = kmalloc(sizeof(*ddwprop), GFP_KERNEL);
910 if (!win64->name || !win64->value) {
911 dev_info(&dev->dev,
912 "couldn't allocate property name and value\n");
913 goto out_free_prop;
914 }
915
916 ret = create_ddw(dev, ddr_avail, &create, page_shift, len);
917 if (ret != 0)
918 goto out_free_prop;
919
920 ddwprop->liobn = cpu_to_be32(create.liobn);
921 ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2));
922 ddwprop->tce_shift = cpu_to_be32(page_shift);
923 ddwprop->window_shift = cpu_to_be32(len);
924
925 dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %s\n",
926 create.liobn, dn->full_name);
927
928 window = kzalloc(sizeof(*window), GFP_KERNEL);
929 if (!window)
930 goto out_clear_window;
931
932 ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
933 win64->value, tce_setrange_multi_pSeriesLP_walk);
934 if (ret) {
935 dev_info(&dev->dev, "failed to map direct window for %s: %d\n",
936 dn->full_name, ret);
937 goto out_clear_window;
938 }
939
940 ret = prom_add_property(pdn, win64);
941 if (ret) {
942 dev_err(&dev->dev, "unable to add dma window property for %s: %d",
943 pdn->full_name, ret);
944 goto out_clear_window;
945 }
946
947 window->device = pdn;
948 window->prop = ddwprop;
949 spin_lock(&direct_window_list_lock);
950 list_add(&window->list, &direct_window_list);
951 spin_unlock(&direct_window_list_lock);
952
953 dma_addr = of_read_number(&create.addr_hi, 2);
954 goto out_unlock;
955
956out_clear_window:
957 remove_ddw(pdn);
958
959out_free_prop:
960 kfree(win64->name);
961 kfree(win64->value);
962 kfree(win64);
963
964out_unlock:
965 mutex_unlock(&direct_window_init_mutex);
966 return dma_addr;
967}
968
498static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) 969static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
499{ 970{
500 struct device_node *pdn, *dn; 971 struct device_node *pdn, *dn;
@@ -541,23 +1012,137 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
541 1012
542 set_iommu_table_base(&dev->dev, pci->iommu_table); 1013 set_iommu_table_base(&dev->dev, pci->iommu_table);
543} 1014}
1015
1016static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
1017{
1018 bool ddw_enabled = false;
1019 struct device_node *pdn, *dn;
1020 struct pci_dev *pdev;
1021 const void *dma_window = NULL;
1022 u64 dma_offset;
1023
1024 if (!dev->dma_mask || !dma_supported(dev, dma_mask))
1025 return -EIO;
1026
1027 /* only attempt to use a new window if 64-bit DMA is requested */
1028 if (!disable_ddw && dma_mask == DMA_BIT_MASK(64)) {
1029 pdev = to_pci_dev(dev);
1030
1031 dn = pci_device_to_OF_node(pdev);
1032 dev_dbg(dev, "node is %s\n", dn->full_name);
1033
1034 /*
1035 * the device tree might contain the dma-window properties
1036 * per-device and not neccesarily for the bus. So we need to
1037 * search upwards in the tree until we either hit a dma-window
1038 * property, OR find a parent with a table already allocated.
1039 */
1040 for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
1041 pdn = pdn->parent) {
1042 dma_window = of_get_property(pdn, "ibm,dma-window", NULL);
1043 if (dma_window)
1044 break;
1045 }
1046 if (pdn && PCI_DN(pdn)) {
1047 dma_offset = enable_ddw(pdev, pdn);
1048 if (dma_offset != 0) {
1049 dev_info(dev, "Using 64-bit direct DMA at offset %llx\n", dma_offset);
1050 set_dma_offset(dev, dma_offset);
1051 set_dma_ops(dev, &dma_direct_ops);
1052 ddw_enabled = true;
1053 }
1054 }
1055 }
1056
1057 /* fall-through to iommu ops */
1058 if (!ddw_enabled) {
1059 dev_info(dev, "Using 32-bit DMA via iommu\n");
1060 set_dma_ops(dev, &dma_iommu_ops);
1061 }
1062
1063 *dev->dma_mask = dma_mask;
1064 return 0;
1065}
1066
544#else /* CONFIG_PCI */ 1067#else /* CONFIG_PCI */
545#define pci_dma_bus_setup_pSeries NULL 1068#define pci_dma_bus_setup_pSeries NULL
546#define pci_dma_dev_setup_pSeries NULL 1069#define pci_dma_dev_setup_pSeries NULL
547#define pci_dma_bus_setup_pSeriesLP NULL 1070#define pci_dma_bus_setup_pSeriesLP NULL
548#define pci_dma_dev_setup_pSeriesLP NULL 1071#define pci_dma_dev_setup_pSeriesLP NULL
1072#define dma_set_mask_pSeriesLP NULL
549#endif /* !CONFIG_PCI */ 1073#endif /* !CONFIG_PCI */
550 1074
1075static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
1076 void *data)
1077{
1078 struct direct_window *window;
1079 struct memory_notify *arg = data;
1080 int ret = 0;
1081
1082 switch (action) {
1083 case MEM_GOING_ONLINE:
1084 spin_lock(&direct_window_list_lock);
1085 list_for_each_entry(window, &direct_window_list, list) {
1086 ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
1087 arg->nr_pages, window->prop);
1088 /* XXX log error */
1089 }
1090 spin_unlock(&direct_window_list_lock);
1091 break;
1092 case MEM_CANCEL_ONLINE:
1093 case MEM_OFFLINE:
1094 spin_lock(&direct_window_list_lock);
1095 list_for_each_entry(window, &direct_window_list, list) {
1096 ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
1097 arg->nr_pages, window->prop);
1098 /* XXX log error */
1099 }
1100 spin_unlock(&direct_window_list_lock);
1101 break;
1102 default:
1103 break;
1104 }
1105 if (ret && action != MEM_CANCEL_ONLINE)
1106 return NOTIFY_BAD;
1107
1108 return NOTIFY_OK;
1109}
1110
1111static struct notifier_block iommu_mem_nb = {
1112 .notifier_call = iommu_mem_notifier,
1113};
1114
551static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) 1115static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
552{ 1116{
553 int err = NOTIFY_OK; 1117 int err = NOTIFY_OK;
554 struct device_node *np = node; 1118 struct device_node *np = node;
555 struct pci_dn *pci = PCI_DN(np); 1119 struct pci_dn *pci = PCI_DN(np);
1120 struct direct_window *window;
556 1121
557 switch (action) { 1122 switch (action) {
558 case PSERIES_RECONFIG_REMOVE: 1123 case PSERIES_RECONFIG_REMOVE:
559 if (pci && pci->iommu_table) 1124 if (pci && pci->iommu_table)
560 iommu_free_table(pci->iommu_table, np->full_name); 1125 iommu_free_table(pci->iommu_table, np->full_name);
1126
1127 spin_lock(&direct_window_list_lock);
1128 list_for_each_entry(window, &direct_window_list, list) {
1129 if (window->device == np) {
1130 list_del(&window->list);
1131 kfree(window);
1132 break;
1133 }
1134 }
1135 spin_unlock(&direct_window_list_lock);
1136
1137 /*
1138 * Because the notifier runs after isolation of the
1139 * slot, we are guaranteed any DMA window has already
1140 * been revoked and the TCEs have been marked invalid,
1141 * so we don't need a call to remove_ddw(np). However,
1142 * if an additional notifier action is added before the
1143 * isolate call, we should update this code for
1144 * completeness with such a call.
1145 */
561 break; 1146 break;
562 default: 1147 default:
563 err = NOTIFY_DONE; 1148 err = NOTIFY_DONE;
@@ -587,6 +1172,7 @@ void iommu_init_early_pSeries(void)
587 ppc_md.tce_get = tce_get_pSeriesLP; 1172 ppc_md.tce_get = tce_get_pSeriesLP;
588 ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeriesLP; 1173 ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
589 ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeriesLP; 1174 ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
1175 ppc_md.dma_set_mask = dma_set_mask_pSeriesLP;
590 } else { 1176 } else {
591 ppc_md.tce_build = tce_build_pSeries; 1177 ppc_md.tce_build = tce_build_pSeries;
592 ppc_md.tce_free = tce_free_pSeries; 1178 ppc_md.tce_free = tce_free_pSeries;
@@ -597,6 +1183,7 @@ void iommu_init_early_pSeries(void)
597 1183
598 1184
599 pSeries_reconfig_notifier_register(&iommu_reconfig_nb); 1185 pSeries_reconfig_notifier_register(&iommu_reconfig_nb);
1186 register_memory_notifier(&iommu_mem_nb);
600 1187
601 set_pci_dma_ops(&dma_iommu_ops); 1188 set_pci_dma_ops(&dma_iommu_ops);
602} 1189}
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 1164c3430f2c..18ac801f8e90 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -93,8 +93,18 @@ static void rtas_disable_msi(struct pci_dev *pdev)
93 if (!pdn) 93 if (!pdn)
94 return; 94 return;
95 95
96 if (rtas_change_msi(pdn, RTAS_CHANGE_FN, 0) != 0) 96 /*
97 pr_debug("rtas_msi: Setting MSIs to 0 failed!\n"); 97 * disabling MSI with the explicit interface also disables MSI-X
98 */
99 if (rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, 0) != 0) {
100 /*
101 * may have failed because explicit interface is not
102 * present
103 */
104 if (rtas_change_msi(pdn, RTAS_CHANGE_FN, 0) != 0) {
105 pr_debug("rtas_msi: Setting MSIs to 0 failed!\n");
106 }
107 }
98} 108}
99 109
100static int rtas_query_irq_number(struct pci_dn *pdn, int offset) 110static int rtas_query_irq_number(struct pci_dn *pdn, int offset)
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 7e828ba29bc3..419707b07248 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -16,6 +16,8 @@
16#include <linux/errno.h> 16#include <linux/errno.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/spinlock.h> 18#include <linux/spinlock.h>
19#include <linux/slab.h>
20#include <linux/kmsg_dump.h>
19#include <asm/uaccess.h> 21#include <asm/uaccess.h>
20#include <asm/nvram.h> 22#include <asm/nvram.h>
21#include <asm/rtas.h> 23#include <asm/rtas.h>
@@ -30,17 +32,54 @@ static int nvram_fetch, nvram_store;
30static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ 32static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
31static DEFINE_SPINLOCK(nvram_lock); 33static DEFINE_SPINLOCK(nvram_lock);
32 34
33static long nvram_error_log_index = -1;
34static long nvram_error_log_size = 0;
35
36struct err_log_info { 35struct err_log_info {
37 int error_type; 36 int error_type;
38 unsigned int seq_num; 37 unsigned int seq_num;
39}; 38};
40#define NVRAM_MAX_REQ 2079
41#define NVRAM_MIN_REQ 1055
42 39
43#define NVRAM_LOG_PART_NAME "ibm,rtas-log" 40struct nvram_os_partition {
41 const char *name;
42 int req_size; /* desired size, in bytes */
43 int min_size; /* minimum acceptable size (0 means req_size) */
44 long size; /* size of data portion (excluding err_log_info) */
45 long index; /* offset of data portion of partition */
46};
47
48static struct nvram_os_partition rtas_log_partition = {
49 .name = "ibm,rtas-log",
50 .req_size = 2079,
51 .min_size = 1055,
52 .index = -1
53};
54
55static struct nvram_os_partition oops_log_partition = {
56 .name = "lnx,oops-log",
57 .req_size = 4000,
58 .min_size = 2000,
59 .index = -1
60};
61
62static const char *pseries_nvram_os_partitions[] = {
63 "ibm,rtas-log",
64 "lnx,oops-log",
65 NULL
66};
67
68static void oops_to_nvram(struct kmsg_dumper *dumper,
69 enum kmsg_dump_reason reason,
70 const char *old_msgs, unsigned long old_len,
71 const char *new_msgs, unsigned long new_len);
72
73static struct kmsg_dumper nvram_kmsg_dumper = {
74 .dump = oops_to_nvram
75};
76
77/* See clobbering_unread_rtas_event() */
78#define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */
79static unsigned long last_unread_rtas_event; /* timestamp */
80
81/* We preallocate oops_buf during init to avoid kmalloc during oops/panic. */
82static char *oops_buf;
44 83
45static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) 84static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
46{ 85{
@@ -134,7 +173,7 @@ static ssize_t pSeries_nvram_get_size(void)
134} 173}
135 174
136 175
137/* nvram_write_error_log 176/* nvram_write_os_partition, nvram_write_error_log
138 * 177 *
139 * We need to buffer the error logs into nvram to ensure that we have 178 * We need to buffer the error logs into nvram to ensure that we have
140 * the failure information to decode. If we have a severe error there 179 * the failure information to decode. If we have a severe error there
@@ -156,48 +195,58 @@ static ssize_t pSeries_nvram_get_size(void)
156 * The 'data' section would look like (in bytes): 195 * The 'data' section would look like (in bytes):
157 * +--------------+------------+-----------------------------------+ 196 * +--------------+------------+-----------------------------------+
158 * | event_logged | sequence # | error log | 197 * | event_logged | sequence # | error log |
159 * |0 3|4 7|8 nvram_error_log_size-1| 198 * |0 3|4 7|8 error_log_size-1|
160 * +--------------+------------+-----------------------------------+ 199 * +--------------+------------+-----------------------------------+
161 * 200 *
162 * event_logged: 0 if event has not been logged to syslog, 1 if it has 201 * event_logged: 0 if event has not been logged to syslog, 1 if it has
163 * sequence #: The unique sequence # for each event. (until it wraps) 202 * sequence #: The unique sequence # for each event. (until it wraps)
164 * error log: The error log from event_scan 203 * error log: The error log from event_scan
165 */ 204 */
166int nvram_write_error_log(char * buff, int length, 205int nvram_write_os_partition(struct nvram_os_partition *part, char * buff,
167 unsigned int err_type, unsigned int error_log_cnt) 206 int length, unsigned int err_type, unsigned int error_log_cnt)
168{ 207{
169 int rc; 208 int rc;
170 loff_t tmp_index; 209 loff_t tmp_index;
171 struct err_log_info info; 210 struct err_log_info info;
172 211
173 if (nvram_error_log_index == -1) { 212 if (part->index == -1) {
174 return -ESPIPE; 213 return -ESPIPE;
175 } 214 }
176 215
177 if (length > nvram_error_log_size) { 216 if (length > part->size) {
178 length = nvram_error_log_size; 217 length = part->size;
179 } 218 }
180 219
181 info.error_type = err_type; 220 info.error_type = err_type;
182 info.seq_num = error_log_cnt; 221 info.seq_num = error_log_cnt;
183 222
184 tmp_index = nvram_error_log_index; 223 tmp_index = part->index;
185 224
186 rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index); 225 rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
187 if (rc <= 0) { 226 if (rc <= 0) {
188 printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc); 227 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
189 return rc; 228 return rc;
190 } 229 }
191 230
192 rc = ppc_md.nvram_write(buff, length, &tmp_index); 231 rc = ppc_md.nvram_write(buff, length, &tmp_index);
193 if (rc <= 0) { 232 if (rc <= 0) {
194 printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc); 233 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
195 return rc; 234 return rc;
196 } 235 }
197 236
198 return 0; 237 return 0;
199} 238}
200 239
240int nvram_write_error_log(char * buff, int length,
241 unsigned int err_type, unsigned int error_log_cnt)
242{
243 int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
244 err_type, error_log_cnt);
245 if (!rc)
246 last_unread_rtas_event = get_seconds();
247 return rc;
248}
249
201/* nvram_read_error_log 250/* nvram_read_error_log
202 * 251 *
203 * Reads nvram for error log for at most 'length' 252 * Reads nvram for error log for at most 'length'
@@ -209,13 +258,13 @@ int nvram_read_error_log(char * buff, int length,
209 loff_t tmp_index; 258 loff_t tmp_index;
210 struct err_log_info info; 259 struct err_log_info info;
211 260
212 if (nvram_error_log_index == -1) 261 if (rtas_log_partition.index == -1)
213 return -1; 262 return -1;
214 263
215 if (length > nvram_error_log_size) 264 if (length > rtas_log_partition.size)
216 length = nvram_error_log_size; 265 length = rtas_log_partition.size;
217 266
218 tmp_index = nvram_error_log_index; 267 tmp_index = rtas_log_partition.index;
219 268
220 rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); 269 rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
221 if (rc <= 0) { 270 if (rc <= 0) {
@@ -244,37 +293,40 @@ int nvram_clear_error_log(void)
244 int clear_word = ERR_FLAG_ALREADY_LOGGED; 293 int clear_word = ERR_FLAG_ALREADY_LOGGED;
245 int rc; 294 int rc;
246 295
247 if (nvram_error_log_index == -1) 296 if (rtas_log_partition.index == -1)
248 return -1; 297 return -1;
249 298
250 tmp_index = nvram_error_log_index; 299 tmp_index = rtas_log_partition.index;
251 300
252 rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index); 301 rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
253 if (rc <= 0) { 302 if (rc <= 0) {
254 printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc); 303 printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
255 return rc; 304 return rc;
256 } 305 }
306 last_unread_rtas_event = 0;
257 307
258 return 0; 308 return 0;
259} 309}
260 310
261/* pseries_nvram_init_log_partition 311/* pseries_nvram_init_os_partition
262 * 312 *
263 * This will setup the partition we need for buffering the 313 * This sets up a partition with an "OS" signature.
264 * error logs and cleanup partitions if needed.
265 * 314 *
266 * The general strategy is the following: 315 * The general strategy is the following:
267 * 1.) If there is log partition large enough then use it. 316 * 1.) If a partition with the indicated name already exists...
268 * 2.) If there is none large enough, search 317 * - If it's large enough, use it.
269 * for a free partition that is large enough. 318 * - Otherwise, recycle it and keep going.
270 * 3.) If there is not a free partition large enough remove 319 * 2.) Search for a free partition that is large enough.
271 * _all_ OS partitions and consolidate the space. 320 * 3.) If there's not a free partition large enough, recycle any obsolete
272 * 4.) Will first try getting a chunk that will satisfy the maximum 321 * OS partitions and try again.
273 * error log size (NVRAM_MAX_REQ). 322 * 4.) Will first try getting a chunk that will satisfy the requested size.
274 * 5.) If the max chunk cannot be allocated then try finding a chunk 323 * 5.) If a chunk of the requested size cannot be allocated, then try finding
275 * that will satisfy the minum needed (NVRAM_MIN_REQ). 324 * a chunk that will satisfy the minum needed.
325 *
326 * Returns 0 on success, else -1.
276 */ 327 */
277static int __init pseries_nvram_init_log_partition(void) 328static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
329 *part)
278{ 330{
279 loff_t p; 331 loff_t p;
280 int size; 332 int size;
@@ -282,47 +334,76 @@ static int __init pseries_nvram_init_log_partition(void)
282 /* Scan nvram for partitions */ 334 /* Scan nvram for partitions */
283 nvram_scan_partitions(); 335 nvram_scan_partitions();
284 336
285 /* Lookg for ours */ 337 /* Look for ours */
286 p = nvram_find_partition(NVRAM_LOG_PART_NAME, NVRAM_SIG_OS, &size); 338 p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
287 339
288 /* Found one but too small, remove it */ 340 /* Found one but too small, remove it */
289 if (p && size < NVRAM_MIN_REQ) { 341 if (p && size < part->min_size) {
290 pr_info("nvram: Found too small "NVRAM_LOG_PART_NAME" partition" 342 pr_info("nvram: Found too small %s partition,"
291 ",removing it..."); 343 " removing it...\n", part->name);
292 nvram_remove_partition(NVRAM_LOG_PART_NAME, NVRAM_SIG_OS); 344 nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL);
293 p = 0; 345 p = 0;
294 } 346 }
295 347
296 /* Create one if we didn't find */ 348 /* Create one if we didn't find */
297 if (!p) { 349 if (!p) {
298 p = nvram_create_partition(NVRAM_LOG_PART_NAME, NVRAM_SIG_OS, 350 p = nvram_create_partition(part->name, NVRAM_SIG_OS,
299 NVRAM_MAX_REQ, NVRAM_MIN_REQ); 351 part->req_size, part->min_size);
300 /* No room for it, try to get rid of any OS partition
301 * and try again
302 */
303 if (p == -ENOSPC) { 352 if (p == -ENOSPC) {
304 pr_info("nvram: No room to create "NVRAM_LOG_PART_NAME 353 pr_info("nvram: No room to create %s partition, "
305 " partition, deleting all OS partitions..."); 354 "deleting any obsolete OS partitions...\n",
306 nvram_remove_partition(NULL, NVRAM_SIG_OS); 355 part->name);
307 p = nvram_create_partition(NVRAM_LOG_PART_NAME, 356 nvram_remove_partition(NULL, NVRAM_SIG_OS,
308 NVRAM_SIG_OS, NVRAM_MAX_REQ, 357 pseries_nvram_os_partitions);
309 NVRAM_MIN_REQ); 358 p = nvram_create_partition(part->name, NVRAM_SIG_OS,
359 part->req_size, part->min_size);
310 } 360 }
311 } 361 }
312 362
313 if (p <= 0) { 363 if (p <= 0) {
314 pr_err("nvram: Failed to find or create "NVRAM_LOG_PART_NAME 364 pr_err("nvram: Failed to find or create %s"
315 " partition, err %d\n", (int)p); 365 " partition, err %d\n", part->name, (int)p);
316 return 0; 366 return -1;
317 } 367 }
318 368
319 nvram_error_log_index = p; 369 part->index = p;
320 nvram_error_log_size = nvram_get_partition_size(p) - 370 part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info);
321 sizeof(struct err_log_info);
322 371
323 return 0; 372 return 0;
324} 373}
325machine_arch_initcall(pseries, pseries_nvram_init_log_partition); 374
375static void __init nvram_init_oops_partition(int rtas_partition_exists)
376{
377 int rc;
378
379 rc = pseries_nvram_init_os_partition(&oops_log_partition);
380 if (rc != 0) {
381 if (!rtas_partition_exists)
382 return;
383 pr_notice("nvram: Using %s partition to log both"
384 " RTAS errors and oops/panic reports\n",
385 rtas_log_partition.name);
386 memcpy(&oops_log_partition, &rtas_log_partition,
387 sizeof(rtas_log_partition));
388 }
389 oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL);
390 rc = kmsg_dump_register(&nvram_kmsg_dumper);
391 if (rc != 0) {
392 pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
393 kfree(oops_buf);
394 return;
395 }
396}
397
398static int __init pseries_nvram_init_log_partitions(void)
399{
400 int rc;
401
402 rc = pseries_nvram_init_os_partition(&rtas_log_partition);
403 nvram_init_oops_partition(rc == 0);
404 return 0;
405}
406machine_arch_initcall(pseries, pseries_nvram_init_log_partitions);
326 407
327int __init pSeries_nvram_init(void) 408int __init pSeries_nvram_init(void)
328{ 409{
@@ -353,3 +434,59 @@ int __init pSeries_nvram_init(void)
353 434
354 return 0; 435 return 0;
355} 436}
437
438/*
439 * Try to capture the last capture_len bytes of the printk buffer. Return
440 * the amount actually captured.
441 */
442static size_t capture_last_msgs(const char *old_msgs, size_t old_len,
443 const char *new_msgs, size_t new_len,
444 char *captured, size_t capture_len)
445{
446 if (new_len >= capture_len) {
447 memcpy(captured, new_msgs + (new_len - capture_len),
448 capture_len);
449 return capture_len;
450 } else {
451 /* Grab the end of old_msgs. */
452 size_t old_tail_len = min(old_len, capture_len - new_len);
453 memcpy(captured, old_msgs + (old_len - old_tail_len),
454 old_tail_len);
455 memcpy(captured + old_tail_len, new_msgs, new_len);
456 return old_tail_len + new_len;
457 }
458}
459
460/*
461 * Are we using the ibm,rtas-log for oops/panic reports? And if so,
462 * would logging this oops/panic overwrite an RTAS event that rtas_errd
463 * hasn't had a chance to read and process? Return 1 if so, else 0.
464 *
465 * We assume that if rtas_errd hasn't read the RTAS event in
466 * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
467 */
468static int clobbering_unread_rtas_event(void)
469{
470 return (oops_log_partition.index == rtas_log_partition.index
471 && last_unread_rtas_event
472 && get_seconds() - last_unread_rtas_event <=
473 NVRAM_RTAS_READ_TIMEOUT);
474}
475
476/* our kmsg_dump callback */
477static void oops_to_nvram(struct kmsg_dumper *dumper,
478 enum kmsg_dump_reason reason,
479 const char *old_msgs, unsigned long old_len,
480 const char *new_msgs, unsigned long new_len)
481{
482 static unsigned int oops_count = 0;
483 size_t text_len;
484
485 if (clobbering_unread_rtas_event())
486 return;
487
488 text_len = capture_last_msgs(old_msgs, old_len, new_msgs, new_len,
489 oops_buf, oops_log_partition.size);
490 (void) nvram_write_os_partition(&oops_log_partition, oops_buf,
491 (int) text_len, ERR_TYPE_KERNEL_PANIC, ++oops_count);
492}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index d345bfd56bbe..2a0089a2c829 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -114,10 +114,13 @@ static void __init fwnmi_init(void)
114 114
115static void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc) 115static void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc)
116{ 116{
117 struct irq_chip *chip = get_irq_desc_chip(desc);
117 unsigned int cascade_irq = i8259_irq(); 118 unsigned int cascade_irq = i8259_irq();
119
118 if (cascade_irq != NO_IRQ) 120 if (cascade_irq != NO_IRQ)
119 generic_handle_irq(cascade_irq); 121 generic_handle_irq(cascade_irq);
120 desc->chip->eoi(irq); 122
123 chip->irq_eoi(&desc->irq_data);
121} 124}
122 125
123static void __init pseries_setup_i8259_cascade(void) 126static void __init pseries_setup_i8259_cascade(void)
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 7b96e5a270ce..01fea46c0335 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -202,20 +202,20 @@ static int get_irq_server(unsigned int virq, const struct cpumask *cpumask,
202#define get_irq_server(virq, cpumask, strict_check) (default_server) 202#define get_irq_server(virq, cpumask, strict_check) (default_server)
203#endif 203#endif
204 204
205static void xics_unmask_irq(unsigned int virq) 205static void xics_unmask_irq(struct irq_data *d)
206{ 206{
207 unsigned int irq; 207 unsigned int irq;
208 int call_status; 208 int call_status;
209 int server; 209 int server;
210 210
211 pr_devel("xics: unmask virq %d\n", virq); 211 pr_devel("xics: unmask virq %d\n", d->irq);
212 212
213 irq = (unsigned int)irq_map[virq].hwirq; 213 irq = (unsigned int)irq_map[d->irq].hwirq;
214 pr_devel(" -> map to hwirq 0x%x\n", irq); 214 pr_devel(" -> map to hwirq 0x%x\n", irq);
215 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) 215 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
216 return; 216 return;
217 217
218 server = get_irq_server(virq, irq_to_desc(virq)->affinity, 0); 218 server = get_irq_server(d->irq, d->affinity, 0);
219 219
220 call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, 220 call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server,
221 DEFAULT_PRIORITY); 221 DEFAULT_PRIORITY);
@@ -235,61 +235,61 @@ static void xics_unmask_irq(unsigned int virq)
235 } 235 }
236} 236}
237 237
238static unsigned int xics_startup(unsigned int virq) 238static unsigned int xics_startup(struct irq_data *d)
239{ 239{
240 /* 240 /*
241 * The generic MSI code returns with the interrupt disabled on the 241 * The generic MSI code returns with the interrupt disabled on the
242 * card, using the MSI mask bits. Firmware doesn't appear to unmask 242 * card, using the MSI mask bits. Firmware doesn't appear to unmask
243 * at that level, so we do it here by hand. 243 * at that level, so we do it here by hand.
244 */ 244 */
245 if (irq_to_desc(virq)->msi_desc) 245 if (d->msi_desc)
246 unmask_msi_irq(irq_get_irq_data(virq)); 246 unmask_msi_irq(d);
247 247
248 /* unmask it */ 248 /* unmask it */
249 xics_unmask_irq(virq); 249 xics_unmask_irq(d);
250 return 0; 250 return 0;
251} 251}
252 252
253static void xics_mask_real_irq(unsigned int irq) 253static void xics_mask_real_irq(struct irq_data *d)
254{ 254{
255 int call_status; 255 int call_status;
256 256
257 if (irq == XICS_IPI) 257 if (d->irq == XICS_IPI)
258 return; 258 return;
259 259
260 call_status = rtas_call(ibm_int_off, 1, 1, NULL, irq); 260 call_status = rtas_call(ibm_int_off, 1, 1, NULL, d->irq);
261 if (call_status != 0) { 261 if (call_status != 0) {
262 printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n", 262 printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
263 __func__, irq, call_status); 263 __func__, d->irq, call_status);
264 return; 264 return;
265 } 265 }
266 266
267 /* Have to set XIVE to 0xff to be able to remove a slot */ 267 /* Have to set XIVE to 0xff to be able to remove a slot */
268 call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, 268 call_status = rtas_call(ibm_set_xive, 3, 1, NULL, d->irq,
269 default_server, 0xff); 269 default_server, 0xff);
270 if (call_status != 0) { 270 if (call_status != 0) {
271 printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n", 271 printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
272 __func__, irq, call_status); 272 __func__, d->irq, call_status);
273 return; 273 return;
274 } 274 }
275} 275}
276 276
277static void xics_mask_irq(unsigned int virq) 277static void xics_mask_irq(struct irq_data *d)
278{ 278{
279 unsigned int irq; 279 unsigned int irq;
280 280
281 pr_devel("xics: mask virq %d\n", virq); 281 pr_devel("xics: mask virq %d\n", d->irq);
282 282
283 irq = (unsigned int)irq_map[virq].hwirq; 283 irq = (unsigned int)irq_map[d->irq].hwirq;
284 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) 284 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
285 return; 285 return;
286 xics_mask_real_irq(irq); 286 xics_mask_real_irq(d);
287} 287}
288 288
289static void xics_mask_unknown_vec(unsigned int vec) 289static void xics_mask_unknown_vec(unsigned int vec)
290{ 290{
291 printk(KERN_ERR "Interrupt %u (real) is invalid, disabling it.\n", vec); 291 printk(KERN_ERR "Interrupt %u (real) is invalid, disabling it.\n", vec);
292 xics_mask_real_irq(vec); 292 xics_mask_real_irq(irq_get_irq_data(vec));
293} 293}
294 294
295static inline unsigned int xics_xirr_vector(unsigned int xirr) 295static inline unsigned int xics_xirr_vector(unsigned int xirr)
@@ -371,30 +371,31 @@ static unsigned char pop_cppr(void)
371 return os_cppr->stack[--os_cppr->index]; 371 return os_cppr->stack[--os_cppr->index];
372} 372}
373 373
374static void xics_eoi_direct(unsigned int virq) 374static void xics_eoi_direct(struct irq_data *d)
375{ 375{
376 unsigned int irq = (unsigned int)irq_map[virq].hwirq; 376 unsigned int irq = (unsigned int)irq_map[d->irq].hwirq;
377 377
378 iosync(); 378 iosync();
379 direct_xirr_info_set((pop_cppr() << 24) | irq); 379 direct_xirr_info_set((pop_cppr() << 24) | irq);
380} 380}
381 381
382static void xics_eoi_lpar(unsigned int virq) 382static void xics_eoi_lpar(struct irq_data *d)
383{ 383{
384 unsigned int irq = (unsigned int)irq_map[virq].hwirq; 384 unsigned int irq = (unsigned int)irq_map[d->irq].hwirq;
385 385
386 iosync(); 386 iosync();
387 lpar_xirr_info_set((pop_cppr() << 24) | irq); 387 lpar_xirr_info_set((pop_cppr() << 24) | irq);
388} 388}
389 389
390static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) 390static int
391xics_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force)
391{ 392{
392 unsigned int irq; 393 unsigned int irq;
393 int status; 394 int status;
394 int xics_status[2]; 395 int xics_status[2];
395 int irq_server; 396 int irq_server;
396 397
397 irq = (unsigned int)irq_map[virq].hwirq; 398 irq = (unsigned int)irq_map[d->irq].hwirq;
398 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) 399 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
399 return -1; 400 return -1;
400 401
@@ -406,13 +407,13 @@ static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
406 return -1; 407 return -1;
407 } 408 }
408 409
409 irq_server = get_irq_server(virq, cpumask, 1); 410 irq_server = get_irq_server(d->irq, cpumask, 1);
410 if (irq_server == -1) { 411 if (irq_server == -1) {
411 char cpulist[128]; 412 char cpulist[128];
412 cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); 413 cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
413 printk(KERN_WARNING 414 printk(KERN_WARNING
414 "%s: No online cpus in the mask %s for irq %d\n", 415 "%s: No online cpus in the mask %s for irq %d\n",
415 __func__, cpulist, virq); 416 __func__, cpulist, d->irq);
416 return -1; 417 return -1;
417 } 418 }
418 419
@@ -430,20 +431,20 @@ static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
430 431
431static struct irq_chip xics_pic_direct = { 432static struct irq_chip xics_pic_direct = {
432 .name = "XICS", 433 .name = "XICS",
433 .startup = xics_startup, 434 .irq_startup = xics_startup,
434 .mask = xics_mask_irq, 435 .irq_mask = xics_mask_irq,
435 .unmask = xics_unmask_irq, 436 .irq_unmask = xics_unmask_irq,
436 .eoi = xics_eoi_direct, 437 .irq_eoi = xics_eoi_direct,
437 .set_affinity = xics_set_affinity 438 .irq_set_affinity = xics_set_affinity
438}; 439};
439 440
440static struct irq_chip xics_pic_lpar = { 441static struct irq_chip xics_pic_lpar = {
441 .name = "XICS", 442 .name = "XICS",
442 .startup = xics_startup, 443 .irq_startup = xics_startup,
443 .mask = xics_mask_irq, 444 .irq_mask = xics_mask_irq,
444 .unmask = xics_unmask_irq, 445 .irq_unmask = xics_unmask_irq,
445 .eoi = xics_eoi_lpar, 446 .irq_eoi = xics_eoi_lpar,
446 .set_affinity = xics_set_affinity 447 .irq_set_affinity = xics_set_affinity
447}; 448};
448 449
449 450
@@ -890,6 +891,7 @@ void xics_migrate_irqs_away(void)
890 891
891 for_each_irq(virq) { 892 for_each_irq(virq) {
892 struct irq_desc *desc; 893 struct irq_desc *desc;
894 struct irq_chip *chip;
893 int xics_status[2]; 895 int xics_status[2];
894 int status; 896 int status;
895 unsigned long flags; 897 unsigned long flags;
@@ -903,12 +905,15 @@ void xics_migrate_irqs_away(void)
903 /* We need to get IPIs still. */ 905 /* We need to get IPIs still. */
904 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) 906 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
905 continue; 907 continue;
908
906 desc = irq_to_desc(virq); 909 desc = irq_to_desc(virq);
907 910
908 /* We only need to migrate enabled IRQS */ 911 /* We only need to migrate enabled IRQS */
909 if (desc == NULL || desc->chip == NULL 912 if (desc == NULL || desc->action == NULL)
910 || desc->action == NULL 913 continue;
911 || desc->chip->set_affinity == NULL) 914
915 chip = get_irq_desc_chip(desc);
916 if (chip == NULL || chip->irq_set_affinity == NULL)
912 continue; 917 continue;
913 918
914 raw_spin_lock_irqsave(&desc->lock, flags); 919 raw_spin_lock_irqsave(&desc->lock, flags);
@@ -934,8 +939,8 @@ void xics_migrate_irqs_away(void)
934 virq, cpu); 939 virq, cpu);
935 940
936 /* Reset affinity to all cpus */ 941 /* Reset affinity to all cpus */
937 cpumask_setall(irq_to_desc(virq)->affinity); 942 cpumask_setall(desc->irq_data.affinity);
938 desc->chip->set_affinity(virq, cpu_all_mask); 943 chip->irq_set_affinity(&desc->irq_data, cpu_all_mask, true);
939unlock: 944unlock:
940 raw_spin_unlock_irqrestore(&desc->lock, flags); 945 raw_spin_unlock_irqrestore(&desc->lock, flags);
941 } 946 }