aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 15:47:46 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-16 15:47:46 -0500
commit5b0e2cb020085efe202123162502e0b551e49a0e (patch)
tree534bbb4c9f98c2ed9a520e11107029e5df38c3c2 /arch/powerpc/platforms
parent758f875848d78148cf9a9cdb3ff1ddf29b234056 (diff)
parent3ffa9d9e2a7c10127d8cbf91ea2be15390b450ed (diff)
Merge tag 'powerpc-4.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "A bit of a small release, I suspect in part due to me travelling for KS. But my backlog of patches to review is smaller than usual, so I think in part folks just didn't send as much this cycle. Non-highlights: - Five fixes for the >128T address space handling, both to fix bugs in our implementation and to bring the semantics exactly into line with x86. Highlights: - Support for a new OPAL call on bare metal machines which gives us a true NMI (ie. is not masked by MSR[EE]=0) for debugging etc. - Support for Power9 DD2 in the CXL driver. - Improvements to machine check handling so that uncorrectable errors can be reported into the generic memory_failure() machinery. - Some fixes and improvements for VPHN, which is used under PowerVM to notify the Linux partition of topology changes. - Plumbing to enable TM (transactional memory) without suspend on some Power9 processors (PPC_FEATURE2_HTM_NO_SUSPEND). - Support for emulating vector loads form cache-inhibited memory, on some Power9 revisions. - Disable the fast-endian switch "syscall" by default (behind a CONFIG), we believe it has never had any users. - A major rework of the API drivers use when initiating and waiting for long running operations performed by OPAL firmware, and changes to the powernv_flash driver to use the new API. - Several fixes for the handling of FP/VMX/VSX while processes are using transactional memory. - Optimisations of TLB range flushes when using the radix MMU on Power9. - Improvements to the VAS facility used to access coprocessors on Power9, and related improvements to the way the NX crypto driver handles requests. - Implementation of PMEM_API and UACCESS_FLUSHCACHE for 64-bit. Thanks to: Alexey Kardashevskiy, Alistair Popple, Allen Pais, Andrew Donnellan, Aneesh Kumar K.V, Arnd Bergmann, Balbir Singh, Benjamin Herrenschmidt, Breno Leitao, Christophe Leroy, Christophe Lombard, Cyril Bur, Frederic Barrat, Gautham R. Shenoy, Geert Uytterhoeven, Guilherme G. Piccoli, Gustavo Romero, Haren Myneni, Joel Stanley, Kamalesh Babulal, Kautuk Consul, Markus Elfring, Masami Hiramatsu, Michael Bringmann, Michael Neuling, Michal Suchanek, Naveen N. Rao, Nicholas Piggin, Oliver O'Halloran, Paul Mackerras, Pedro Miraglia Franco de Carvalho, Philippe Bergheaud, Sandipan Das, Seth Forshee, Shriya, Stephen Rothwell, Stewart Smith, Sukadev Bhattiprolu, Tyrel Datwyler, Vaibhav Jain, Vaidyanathan Srinivasan, and William A. Kennington III" * tag 'powerpc-4.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (151 commits) powerpc/64s: Fix Power9 DD2.0 workarounds by adding DD2.1 feature powerpc/64s: Fix masking of SRR1 bits on instruction fault powerpc/64s: mm_context.addr_limit is only used on hash powerpc/64s/radix: Fix 128TB-512TB virtual address boundary case allocation powerpc/64s/hash: Allow MAP_FIXED allocations to cross 128TB boundary powerpc/64s/hash: Fix fork() with 512TB process address space powerpc/64s/hash: Fix 128TB-512TB virtual address boundary case allocation powerpc/64s/hash: Fix 512T hint detection to use >= 128T powerpc: Fix DABR match on hash based systems powerpc/signal: Properly handle return value from uprobe_deny_signal() powerpc/fadump: use kstrtoint to handle sysfs store powerpc/lib: Implement UACCESS_FLUSHCACHE API powerpc/lib: Implement PMEM API powerpc/powernv/npu: Don't explicitly flush nmmu tlb powerpc/powernv/npu: Use flush_all_mm() instead of flush_tlb_mm() powerpc/powernv/idle: Round up latency and residency values powerpc/kprobes: refactor kprobe_lookup_name for safer string operations powerpc/kprobes: Blacklist emulate_update_regs() from kprobes powerpc/kprobes: Do not disable interrupts for optprobes and kprobes_on_ftrace powerpc/kprobes: Disable preemption before invoking probe handler for optprobes ...
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype19
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c4
-rw-r--r--arch/powerpc/platforms/powernv/Makefile3
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c42
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c28
-rw-r--r--arch/powerpc/platforms/powernv/opal-async.c180
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c8
-rw-r--r--arch/powerpc/platforms/powernv/opal-memory-errors.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor.c17
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S5
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c29
-rw-r--r--arch/powerpc/platforms/powernv/pci.h4
-rw-r--r--arch/powerpc/platforms/powernv/setup.c26
-rw-r--r--arch/powerpc/platforms/powernv/smp.c59
-rw-r--r--arch/powerpc/platforms/powernv/vas-debug.c209
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c242
-rw-r--r--arch/powerpc/platforms/powernv/vas.c31
-rw-r--r--arch/powerpc/platforms/powernv/vas.h93
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c19
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c8
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c2
-rw-r--r--arch/powerpc/platforms/pseries/vio.c2
25 files changed, 828 insertions, 210 deletions
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index a78f255111f2..ae07470fde3c 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -295,10 +295,6 @@ config PPC_STD_MMU_32
295 def_bool y 295 def_bool y
296 depends on PPC_STD_MMU && PPC32 296 depends on PPC_STD_MMU && PPC32
297 297
298config PPC_STD_MMU_64
299 def_bool y
300 depends on PPC_STD_MMU && PPC64
301
302config PPC_RADIX_MMU 298config PPC_RADIX_MMU
303 bool "Radix MMU Support" 299 bool "Radix MMU Support"
304 depends on PPC_BOOK3S_64 300 depends on PPC_BOOK3S_64
@@ -309,6 +305,19 @@ config PPC_RADIX_MMU
309 is only implemented by IBM Power9 CPUs, if you don't have one of them 305 is only implemented by IBM Power9 CPUs, if you don't have one of them
310 you can probably disable this. 306 you can probably disable this.
311 307
308config PPC_RADIX_MMU_DEFAULT
309 bool "Default to using the Radix MMU when possible"
310 depends on PPC_RADIX_MMU
311 default y
312 help
313 When the hardware supports the Radix MMU, default to using it unless
314 "disable_radix[=yes]" is specified on the kernel command line.
315
316 If this option is disabled, the Hash MMU will be used by default,
317 unless "disable_radix=no" is specified on the kernel command line.
318
319 If you're unsure, say Y.
320
312config ARCH_ENABLE_HUGEPAGE_MIGRATION 321config ARCH_ENABLE_HUGEPAGE_MIGRATION
313 def_bool y 322 def_bool y
314 depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION 323 depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION
@@ -324,7 +333,7 @@ config PPC_BOOK3E_MMU
324 333
325config PPC_MM_SLICES 334config PPC_MM_SLICES
326 bool 335 bool
327 default y if PPC_STD_MMU_64 336 default y if PPC_BOOK3S_64
328 default n 337 default n
329 338
330config PPC_HAVE_PMU_SUPPORT 339config PPC_HAVE_PMU_SUPPORT
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 70183eb3d5c8..39a1d4225e0f 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -513,9 +513,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
513 mutex_init(&host->mutex); 513 mutex_init(&host->mutex);
514 init_completion(&host->complete); 514 init_completion(&host->complete);
515 spin_lock_init(&host->lock); 515 spin_lock_init(&host->lock);
516 init_timer(&host->timeout_timer); 516 setup_timer(&host->timeout_timer, kw_i2c_timeout, (unsigned long)host);
517 host->timeout_timer.function = kw_i2c_timeout;
518 host->timeout_timer.data = (unsigned long)host;
519 517
520 psteps = of_get_property(np, "AAPL,address-step", NULL); 518 psteps = of_get_property(np, "AAPL,address-step", NULL);
521 steps = psteps ? (*psteps) : 0x10; 519 steps = psteps ? (*psteps) : 0x10;
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 7a31c26500e6..3732118a0482 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -15,4 +15,5 @@ obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o
15obj-$(CONFIG_OPAL_PRD) += opal-prd.o 15obj-$(CONFIG_OPAL_PRD) += opal-prd.o
16obj-$(CONFIG_PERF_EVENTS) += opal-imc.o 16obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
17obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o 17obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
18obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o 18obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o
19obj-$(CONFIG_PPC_FTW) += nx-ftw.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 8864065eba22..4650fb294e7a 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -41,7 +41,6 @@
41#include "powernv.h" 41#include "powernv.h"
42#include "pci.h" 42#include "pci.h"
43 43
44static bool pnv_eeh_nb_init = false;
45static int eeh_event_irq = -EINVAL; 44static int eeh_event_irq = -EINVAL;
46 45
47static int pnv_eeh_init(void) 46static int pnv_eeh_init(void)
@@ -197,31 +196,31 @@ PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
197 * been built. If the I/O cache staff has been built, EEH is 196 * been built. If the I/O cache staff has been built, EEH is
198 * ready to supply service. 197 * ready to supply service.
199 */ 198 */
200static int pnv_eeh_post_init(void) 199int pnv_eeh_post_init(void)
201{ 200{
202 struct pci_controller *hose; 201 struct pci_controller *hose;
203 struct pnv_phb *phb; 202 struct pnv_phb *phb;
204 int ret = 0; 203 int ret = 0;
205 204
206 /* Register OPAL event notifier */ 205 /* Probe devices & build address cache */
207 if (!pnv_eeh_nb_init) { 206 eeh_probe_devices();
208 eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); 207 eeh_addr_cache_build();
209 if (eeh_event_irq < 0) {
210 pr_err("%s: Can't register OPAL event interrupt (%d)\n",
211 __func__, eeh_event_irq);
212 return eeh_event_irq;
213 }
214 208
215 ret = request_irq(eeh_event_irq, pnv_eeh_event, 209 /* Register OPAL event notifier */
216 IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL); 210 eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
217 if (ret < 0) { 211 if (eeh_event_irq < 0) {
218 irq_dispose_mapping(eeh_event_irq); 212 pr_err("%s: Can't register OPAL event interrupt (%d)\n",
219 pr_err("%s: Can't request OPAL event interrupt (%d)\n", 213 __func__, eeh_event_irq);
220 __func__, eeh_event_irq); 214 return eeh_event_irq;
221 return ret; 215 }
222 }
223 216
224 pnv_eeh_nb_init = true; 217 ret = request_irq(eeh_event_irq, pnv_eeh_event,
218 IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
219 if (ret < 0) {
220 irq_dispose_mapping(eeh_event_irq);
221 pr_err("%s: Can't request OPAL event interrupt (%d)\n",
222 __func__, eeh_event_irq);
223 return ret;
225 } 224 }
226 225
227 if (!eeh_enabled()) 226 if (!eeh_enabled())
@@ -367,6 +366,10 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
367 if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) 366 if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
368 return NULL; 367 return NULL;
369 368
369 /* Skip if we haven't probed yet */
370 if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE)
371 return NULL;
372
370 /* Initialize eeh device */ 373 /* Initialize eeh device */
371 edev->class_code = pdn->class_code; 374 edev->class_code = pdn->class_code;
372 edev->mode &= 0xFFFFFF00; 375 edev->mode &= 0xFFFFFF00;
@@ -1731,7 +1734,6 @@ static int pnv_eeh_restore_config(struct pci_dn *pdn)
1731static struct eeh_ops pnv_eeh_ops = { 1734static struct eeh_ops pnv_eeh_ops = {
1732 .name = "powernv", 1735 .name = "powernv",
1733 .init = pnv_eeh_init, 1736 .init = pnv_eeh_init,
1734 .post_init = pnv_eeh_post_init,
1735 .probe = pnv_eeh_probe, 1737 .probe = pnv_eeh_probe,
1736 .set_option = pnv_eeh_set_option, 1738 .set_option = pnv_eeh_set_option,
1737 .get_pe_addr = pnv_eeh_get_pe_addr, 1739 .get_pe_addr = pnv_eeh_get_pe_addr,
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 2cb6cbea4b3b..f6cbc1a71472 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -395,6 +395,7 @@ struct npu_context {
395 struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS]; 395 struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS];
396 struct mmu_notifier mn; 396 struct mmu_notifier mn;
397 struct kref kref; 397 struct kref kref;
398 bool nmmu_flush;
398 399
399 /* Callback to stop translation requests on a given GPU */ 400 /* Callback to stop translation requests on a given GPU */
400 struct npu_context *(*release_cb)(struct npu_context *, void *); 401 struct npu_context *(*release_cb)(struct npu_context *, void *);
@@ -545,11 +546,13 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
545 struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; 546 struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
546 unsigned long pid = npu_context->mm->context.id; 547 unsigned long pid = npu_context->mm->context.id;
547 548
548 /* 549 if (npu_context->nmmu_flush)
549 * Unfortunately the nest mmu does not support flushing specific 550 /*
550 * addresses so we have to flush the whole mm. 551 * Unfortunately the nest mmu does not support flushing specific
551 */ 552 * addresses so we have to flush the whole mm once before
552 flush_tlb_mm(npu_context->mm); 553 * shooting down the GPU translation.
554 */
555 flush_all_mm(npu_context->mm);
553 556
554 /* 557 /*
555 * Loop over all the NPUs this process is active on and launch 558 * Loop over all the NPUs this process is active on and launch
@@ -722,6 +725,16 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
722 return ERR_PTR(-ENODEV); 725 return ERR_PTR(-ENODEV);
723 npu_context->npdev[npu->index][nvlink_index] = npdev; 726 npu_context->npdev[npu->index][nvlink_index] = npdev;
724 727
728 if (!nphb->npu.nmmu_flush) {
729 /*
730 * If we're not explicitly flushing ourselves we need to mark
731 * the thread for global flushes
732 */
733 npu_context->nmmu_flush = false;
734 mm_context_add_copro(mm);
735 } else
736 npu_context->nmmu_flush = true;
737
725 return npu_context; 738 return npu_context;
726} 739}
727EXPORT_SYMBOL(pnv_npu2_init_context); 740EXPORT_SYMBOL(pnv_npu2_init_context);
@@ -731,6 +744,9 @@ static void pnv_npu2_release_context(struct kref *kref)
731 struct npu_context *npu_context = 744 struct npu_context *npu_context =
732 container_of(kref, struct npu_context, kref); 745 container_of(kref, struct npu_context, kref);
733 746
747 if (!npu_context->nmmu_flush)
748 mm_context_remove_copro(npu_context->mm);
749
734 npu_context->mm->context.npu_context = NULL; 750 npu_context->mm->context.npu_context = NULL;
735 mmu_notifier_unregister(&npu_context->mn, 751 mmu_notifier_unregister(&npu_context->mn,
736 npu_context->mm); 752 npu_context->mm);
@@ -819,6 +835,8 @@ int pnv_npu2_init(struct pnv_phb *phb)
819 static int npu_index; 835 static int npu_index;
820 uint64_t rc = 0; 836 uint64_t rc = 0;
821 837
838 phb->npu.nmmu_flush =
839 of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
822 for_each_child_of_node(phb->hose->dn, dn) { 840 for_each_child_of_node(phb->hose->dn, dn) {
823 gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn)); 841 gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn));
824 if (gpdev) { 842 if (gpdev) {
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
index cf33769a7b72..18a355fa15e8 100644
--- a/arch/powerpc/platforms/powernv/opal-async.c
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * PowerNV OPAL asynchronous completion interfaces 2 * PowerNV OPAL asynchronous completion interfaces
3 * 3 *
4 * Copyright 2013 IBM Corp. 4 * Copyright 2013-2017 IBM Corp.
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
@@ -23,40 +23,50 @@
23#include <asm/machdep.h> 23#include <asm/machdep.h>
24#include <asm/opal.h> 24#include <asm/opal.h>
25 25
26#define N_ASYNC_COMPLETIONS 64 26enum opal_async_token_state {
27 ASYNC_TOKEN_UNALLOCATED = 0,
28 ASYNC_TOKEN_ALLOCATED,
29 ASYNC_TOKEN_DISPATCHED,
30 ASYNC_TOKEN_ABANDONED,
31 ASYNC_TOKEN_COMPLETED
32};
33
34struct opal_async_token {
35 enum opal_async_token_state state;
36 struct opal_msg response;
37};
27 38
28static DECLARE_BITMAP(opal_async_complete_map, N_ASYNC_COMPLETIONS) = {~0UL};
29static DECLARE_BITMAP(opal_async_token_map, N_ASYNC_COMPLETIONS);
30static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait); 39static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait);
31static DEFINE_SPINLOCK(opal_async_comp_lock); 40static DEFINE_SPINLOCK(opal_async_comp_lock);
32static struct semaphore opal_async_sem; 41static struct semaphore opal_async_sem;
33static struct opal_msg *opal_async_responses;
34static unsigned int opal_max_async_tokens; 42static unsigned int opal_max_async_tokens;
43static struct opal_async_token *opal_async_tokens;
35 44
36int __opal_async_get_token(void) 45static int __opal_async_get_token(void)
37{ 46{
38 unsigned long flags; 47 unsigned long flags;
39 int token; 48 int i, token = -EBUSY;
40 49
41 spin_lock_irqsave(&opal_async_comp_lock, flags); 50 spin_lock_irqsave(&opal_async_comp_lock, flags);
42 token = find_first_bit(opal_async_complete_map, opal_max_async_tokens);
43 if (token >= opal_max_async_tokens) {
44 token = -EBUSY;
45 goto out;
46 }
47 51
48 if (__test_and_set_bit(token, opal_async_token_map)) { 52 for (i = 0; i < opal_max_async_tokens; i++) {
49 token = -EBUSY; 53 if (opal_async_tokens[i].state == ASYNC_TOKEN_UNALLOCATED) {
50 goto out; 54 opal_async_tokens[i].state = ASYNC_TOKEN_ALLOCATED;
55 token = i;
56 break;
57 }
51 } 58 }
52 59
53 __clear_bit(token, opal_async_complete_map);
54
55out:
56 spin_unlock_irqrestore(&opal_async_comp_lock, flags); 60 spin_unlock_irqrestore(&opal_async_comp_lock, flags);
57 return token; 61 return token;
58} 62}
59 63
64/*
65 * Note: If the returned token is used in an opal call and opal returns
66 * OPAL_ASYNC_COMPLETION you MUST call one of opal_async_wait_response() or
67 * opal_async_wait_response_interruptible() at least once before calling another
68 * opal_async_* function
69 */
60int opal_async_get_token_interruptible(void) 70int opal_async_get_token_interruptible(void)
61{ 71{
62 int token; 72 int token;
@@ -73,9 +83,10 @@ int opal_async_get_token_interruptible(void)
73} 83}
74EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible); 84EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible);
75 85
76int __opal_async_release_token(int token) 86static int __opal_async_release_token(int token)
77{ 87{
78 unsigned long flags; 88 unsigned long flags;
89 int rc;
79 90
80 if (token < 0 || token >= opal_max_async_tokens) { 91 if (token < 0 || token >= opal_max_async_tokens) {
81 pr_err("%s: Passed token is out of range, token %d\n", 92 pr_err("%s: Passed token is out of range, token %d\n",
@@ -84,11 +95,26 @@ int __opal_async_release_token(int token)
84 } 95 }
85 96
86 spin_lock_irqsave(&opal_async_comp_lock, flags); 97 spin_lock_irqsave(&opal_async_comp_lock, flags);
87 __set_bit(token, opal_async_complete_map); 98 switch (opal_async_tokens[token].state) {
88 __clear_bit(token, opal_async_token_map); 99 case ASYNC_TOKEN_COMPLETED:
100 case ASYNC_TOKEN_ALLOCATED:
101 opal_async_tokens[token].state = ASYNC_TOKEN_UNALLOCATED;
102 rc = 0;
103 break;
104 /*
105 * DISPATCHED and ABANDONED tokens must wait for OPAL to respond.
106 * Mark a DISPATCHED token as ABANDONED so that the response handling
107 * code knows no one cares and that it can free it then.
108 */
109 case ASYNC_TOKEN_DISPATCHED:
110 opal_async_tokens[token].state = ASYNC_TOKEN_ABANDONED;
111 /* Fall through */
112 default:
113 rc = 1;
114 }
89 spin_unlock_irqrestore(&opal_async_comp_lock, flags); 115 spin_unlock_irqrestore(&opal_async_comp_lock, flags);
90 116
91 return 0; 117 return rc;
92} 118}
93 119
94int opal_async_release_token(int token) 120int opal_async_release_token(int token)
@@ -96,12 +122,10 @@ int opal_async_release_token(int token)
96 int ret; 122 int ret;
97 123
98 ret = __opal_async_release_token(token); 124 ret = __opal_async_release_token(token);
99 if (ret) 125 if (!ret)
100 return ret; 126 up(&opal_async_sem);
101
102 up(&opal_async_sem);
103 127
104 return 0; 128 return ret;
105} 129}
106EXPORT_SYMBOL_GPL(opal_async_release_token); 130EXPORT_SYMBOL_GPL(opal_async_release_token);
107 131
@@ -117,22 +141,83 @@ int opal_async_wait_response(uint64_t token, struct opal_msg *msg)
117 return -EINVAL; 141 return -EINVAL;
118 } 142 }
119 143
120 /* Wakeup the poller before we wait for events to speed things 144 /*
145 * There is no need to mark the token as dispatched, wait_event()
146 * will block until the token completes.
147 *
148 * Wakeup the poller before we wait for events to speed things
121 * up on platforms or simulators where the interrupts aren't 149 * up on platforms or simulators where the interrupts aren't
122 * functional. 150 * functional.
123 */ 151 */
124 opal_wake_poller(); 152 opal_wake_poller();
125 wait_event(opal_async_wait, test_bit(token, opal_async_complete_map)); 153 wait_event(opal_async_wait, opal_async_tokens[token].state
126 memcpy(msg, &opal_async_responses[token], sizeof(*msg)); 154 == ASYNC_TOKEN_COMPLETED);
155 memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
127 156
128 return 0; 157 return 0;
129} 158}
130EXPORT_SYMBOL_GPL(opal_async_wait_response); 159EXPORT_SYMBOL_GPL(opal_async_wait_response);
131 160
161int opal_async_wait_response_interruptible(uint64_t token, struct opal_msg *msg)
162{
163 unsigned long flags;
164 int ret;
165
166 if (token >= opal_max_async_tokens) {
167 pr_err("%s: Invalid token passed\n", __func__);
168 return -EINVAL;
169 }
170
171 if (!msg) {
172 pr_err("%s: Invalid message pointer passed\n", __func__);
173 return -EINVAL;
174 }
175
176 /*
177 * The first time this gets called we mark the token as DISPATCHED
178 * so that if wait_event_interruptible() returns not zero and the
179 * caller frees the token, we know not to actually free the token
180 * until the response comes.
181 *
182 * Only change if the token is ALLOCATED - it may have been
183 * completed even before the caller gets around to calling this
184 * the first time.
185 *
186 * There is also a dirty great comment at the token allocation
187 * function that if the opal call returns OPAL_ASYNC_COMPLETION to
188 * the caller then the caller *must* call this or the not
189 * interruptible version before doing anything else with the
190 * token.
191 */
192 if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED) {
193 spin_lock_irqsave(&opal_async_comp_lock, flags);
194 if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED)
195 opal_async_tokens[token].state = ASYNC_TOKEN_DISPATCHED;
196 spin_unlock_irqrestore(&opal_async_comp_lock, flags);
197 }
198
199 /*
200 * Wakeup the poller before we wait for events to speed things
201 * up on platforms or simulators where the interrupts aren't
202 * functional.
203 */
204 opal_wake_poller();
205 ret = wait_event_interruptible(opal_async_wait,
206 opal_async_tokens[token].state ==
207 ASYNC_TOKEN_COMPLETED);
208 if (!ret)
209 memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
210
211 return ret;
212}
213EXPORT_SYMBOL_GPL(opal_async_wait_response_interruptible);
214
215/* Called from interrupt context */
132static int opal_async_comp_event(struct notifier_block *nb, 216static int opal_async_comp_event(struct notifier_block *nb,
133 unsigned long msg_type, void *msg) 217 unsigned long msg_type, void *msg)
134{ 218{
135 struct opal_msg *comp_msg = msg; 219 struct opal_msg *comp_msg = msg;
220 enum opal_async_token_state state;
136 unsigned long flags; 221 unsigned long flags;
137 uint64_t token; 222 uint64_t token;
138 223
@@ -140,11 +225,17 @@ static int opal_async_comp_event(struct notifier_block *nb,
140 return 0; 225 return 0;
141 226
142 token = be64_to_cpu(comp_msg->params[0]); 227 token = be64_to_cpu(comp_msg->params[0]);
143 memcpy(&opal_async_responses[token], comp_msg, sizeof(*comp_msg));
144 spin_lock_irqsave(&opal_async_comp_lock, flags); 228 spin_lock_irqsave(&opal_async_comp_lock, flags);
145 __set_bit(token, opal_async_complete_map); 229 state = opal_async_tokens[token].state;
230 opal_async_tokens[token].state = ASYNC_TOKEN_COMPLETED;
146 spin_unlock_irqrestore(&opal_async_comp_lock, flags); 231 spin_unlock_irqrestore(&opal_async_comp_lock, flags);
147 232
233 if (state == ASYNC_TOKEN_ABANDONED) {
234 /* Free the token, no one else will */
235 opal_async_release_token(token);
236 return 0;
237 }
238 memcpy(&opal_async_tokens[token].response, comp_msg, sizeof(*comp_msg));
148 wake_up(&opal_async_wait); 239 wake_up(&opal_async_wait);
149 240
150 return 0; 241 return 0;
@@ -178,32 +269,23 @@ int __init opal_async_comp_init(void)
178 } 269 }
179 270
180 opal_max_async_tokens = be32_to_cpup(async); 271 opal_max_async_tokens = be32_to_cpup(async);
181 if (opal_max_async_tokens > N_ASYNC_COMPLETIONS) 272 opal_async_tokens = kcalloc(opal_max_async_tokens,
182 opal_max_async_tokens = N_ASYNC_COMPLETIONS; 273 sizeof(*opal_async_tokens), GFP_KERNEL);
274 if (!opal_async_tokens) {
275 err = -ENOMEM;
276 goto out_opal_node;
277 }
183 278
184 err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP, 279 err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP,
185 &opal_async_comp_nb); 280 &opal_async_comp_nb);
186 if (err) { 281 if (err) {
187 pr_err("%s: Can't register OPAL event notifier (%d)\n", 282 pr_err("%s: Can't register OPAL event notifier (%d)\n",
188 __func__, err); 283 __func__, err);
284 kfree(opal_async_tokens);
189 goto out_opal_node; 285 goto out_opal_node;
190 } 286 }
191 287
192 opal_async_responses = kzalloc( 288 sema_init(&opal_async_sem, opal_max_async_tokens);
193 sizeof(*opal_async_responses) * opal_max_async_tokens,
194 GFP_KERNEL);
195 if (!opal_async_responses) {
196 pr_err("%s: Out of memory, failed to do asynchronous "
197 "completion init\n", __func__);
198 err = -ENOMEM;
199 goto out_opal_node;
200 }
201
202 /* Initialize to 1 less than the maximum tokens available, as we may
203 * require to pop one during emergency through synchronous call to
204 * __opal_async_get_token()
205 */
206 sema_init(&opal_async_sem, opal_max_async_tokens - 1);
207 289
208out_opal_node: 290out_opal_node:
209 of_node_put(opal_node); 291 of_node_put(opal_node);
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index d78fed728cdf..c9e1a4ff295c 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * OPAL hypervisor Maintenance interrupt handling support in PowreNV. 2 * OPAL hypervisor Maintenance interrupt handling support in PowerNV.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index ecdcba9d1220..9d1b8c0aaf93 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -174,8 +174,14 @@ void opal_event_shutdown(void)
174 174
175 /* First free interrupts, which will also mask them */ 175 /* First free interrupts, which will also mask them */
176 for (i = 0; i < opal_irq_count; i++) { 176 for (i = 0; i < opal_irq_count; i++) {
177 if (opal_irqs[i]) 177 if (!opal_irqs[i])
178 continue;
179
180 if (in_interrupt())
181 disable_irq_nosync(opal_irqs[i]);
182 else
178 free_irq(opal_irqs[i], NULL); 183 free_irq(opal_irqs[i], NULL);
184
179 opal_irqs[i] = 0; 185 opal_irqs[i] = 0;
180 } 186 }
181} 187}
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
index 4495f428b500..d9916ea62305 100644
--- a/arch/powerpc/platforms/powernv/opal-memory-errors.c
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * OPAL asynchronus Memory error handling support in PowreNV. 2 * OPAL asynchronus Memory error handling support in PowerNV.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
index aa267f120033..0a7074bb91dc 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -19,13 +19,10 @@
19 */ 19 */
20 20
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/mutex.h>
23#include <linux/of_platform.h> 22#include <linux/of_platform.h>
24#include <asm/opal.h> 23#include <asm/opal.h>
25#include <asm/machdep.h> 24#include <asm/machdep.h>
26 25
27static DEFINE_MUTEX(opal_sensor_mutex);
28
29/* 26/*
30 * This will return sensor information to driver based on the requested sensor 27 * This will return sensor information to driver based on the requested sensor
31 * handle. A handle is an opaque id for the powernv, read by the driver from the 28 * handle. A handle is an opaque id for the powernv, read by the driver from the
@@ -38,13 +35,9 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
38 __be32 data; 35 __be32 data;
39 36
40 token = opal_async_get_token_interruptible(); 37 token = opal_async_get_token_interruptible();
41 if (token < 0) { 38 if (token < 0)
42 pr_err("%s: Couldn't get the token, returning\n", __func__); 39 return token;
43 ret = token;
44 goto out;
45 }
46 40
47 mutex_lock(&opal_sensor_mutex);
48 ret = opal_sensor_read(sensor_hndl, token, &data); 41 ret = opal_sensor_read(sensor_hndl, token, &data);
49 switch (ret) { 42 switch (ret) {
50 case OPAL_ASYNC_COMPLETION: 43 case OPAL_ASYNC_COMPLETION:
@@ -52,7 +45,7 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
52 if (ret) { 45 if (ret) {
53 pr_err("%s: Failed to wait for the async response, %d\n", 46 pr_err("%s: Failed to wait for the async response, %d\n",
54 __func__, ret); 47 __func__, ret);
55 goto out_token; 48 goto out;
56 } 49 }
57 50
58 ret = opal_error_code(opal_get_async_rc(msg)); 51 ret = opal_error_code(opal_get_async_rc(msg));
@@ -73,10 +66,8 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
73 break; 66 break;
74 } 67 }
75 68
76out_token:
77 mutex_unlock(&opal_sensor_mutex);
78 opal_async_release_token(token);
79out: 69out:
70 opal_async_release_token(token);
80 return ret; 71 return ret;
81} 72}
82EXPORT_SYMBOL_GPL(opal_get_sensor_data); 73EXPORT_SYMBOL_GPL(opal_get_sensor_data);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 8c1ede2d3f7e..6f4b00a2ac46 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -94,7 +94,7 @@ opal_return:
94 * bytes (always BE) since MSR:LE will end up fixed up as a side 94 * bytes (always BE) since MSR:LE will end up fixed up as a side
95 * effect of the rfid. 95 * effect of the rfid.
96 */ 96 */
97 FIXUP_ENDIAN 97 FIXUP_ENDIAN_HV
98 ld r2,PACATOC(r13); 98 ld r2,PACATOC(r13);
99 lwz r4,8(r1); 99 lwz r4,8(r1);
100 ld r5,PPC_LR_STKOFF(r1); 100 ld r5,PPC_LR_STKOFF(r1);
@@ -120,7 +120,7 @@ opal_real_call:
120 hrfid 120 hrfid
121 121
122opal_return_realmode: 122opal_return_realmode:
123 FIXUP_ENDIAN 123 FIXUP_ENDIAN_HV
124 ld r2,PACATOC(r13); 124 ld r2,PACATOC(r13);
125 lwz r11,8(r1); 125 lwz r11,8(r1);
126 ld r12,PPC_LR_STKOFF(r1) 126 ld r12,PPC_LR_STKOFF(r1)
@@ -307,6 +307,7 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
307OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO); 307OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
308OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC); 308OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
309OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP); 309OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
310OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
310OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); 311OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
311OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); 312OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
312OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); 313OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 65c79ecf5a4d..041ddbd1fc57 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -998,6 +998,7 @@ int opal_error_code(int rc)
998 998
999 case OPAL_PARAMETER: return -EINVAL; 999 case OPAL_PARAMETER: return -EINVAL;
1000 case OPAL_ASYNC_COMPLETION: return -EINPROGRESS; 1000 case OPAL_ASYNC_COMPLETION: return -EINPROGRESS;
1001 case OPAL_BUSY:
1001 case OPAL_BUSY_EVENT: return -EBUSY; 1002 case OPAL_BUSY_EVENT: return -EBUSY;
1002 case OPAL_NO_MEM: return -ENOMEM; 1003 case OPAL_NO_MEM: return -ENOMEM;
1003 case OPAL_PERMISSION: return -EPERM; 1004 case OPAL_PERMISSION: return -EPERM;
@@ -1037,3 +1038,4 @@ EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
1037/* Export this for KVM */ 1038/* Export this for KVM */
1038EXPORT_SYMBOL_GPL(opal_int_set_mfrr); 1039EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
1039EXPORT_SYMBOL_GPL(opal_int_eoi); 1040EXPORT_SYMBOL_GPL(opal_int_eoi);
1041EXPORT_SYMBOL_GPL(opal_error_code);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 57f9e55f4352..749055553064 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1002,9 +1002,12 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
1002 } 1002 }
1003 1003
1004 /* 1004 /*
1005 * After doing so, there would be a "hole" in the /proc/iomem when 1005 * Since M64 BAR shares segments among all possible 256 PEs,
1006 * offset is a positive value. It looks like the device return some 1006 * we have to shift the beginning of PF IOV BAR to make it start from
1007 * mmio back to the system, which actually no one could use it. 1007 * the segment which belongs to the PE number assigned to the first VF.
1008 * This creates a "hole" in the /proc/iomem which could be used for
1009 * allocating other resources so we reserve this area below and
1010 * release when IOV is released.
1008 */ 1011 */
1009 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 1012 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
1010 res = &dev->resource[i + PCI_IOV_RESOURCES]; 1013 res = &dev->resource[i + PCI_IOV_RESOURCES];
@@ -1018,7 +1021,22 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
1018 dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", 1021 dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
1019 i, &res2, res, (offset > 0) ? "En" : "Dis", 1022 i, &res2, res, (offset > 0) ? "En" : "Dis",
1020 num_vfs, offset); 1023 num_vfs, offset);
1024
1025 if (offset < 0) {
1026 devm_release_resource(&dev->dev, &pdn->holes[i]);
1027 memset(&pdn->holes[i], 0, sizeof(pdn->holes[i]));
1028 }
1029
1021 pci_update_resource(dev, i + PCI_IOV_RESOURCES); 1030 pci_update_resource(dev, i + PCI_IOV_RESOURCES);
1031
1032 if (offset > 0) {
1033 pdn->holes[i].start = res2.start;
1034 pdn->holes[i].end = res2.start + size * offset - 1;
1035 pdn->holes[i].flags = IORESOURCE_BUS;
1036 pdn->holes[i].name = "pnv_iov_reserved";
1037 devm_request_resource(&dev->dev, res->parent,
1038 &pdn->holes[i]);
1039 }
1022 } 1040 }
1023 return 0; 1041 return 0;
1024} 1042}
@@ -2779,7 +2797,7 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
2779 if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) 2797 if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
2780 return -EINVAL; 2798 return -EINVAL;
2781 2799
2782 if ((window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) 2800 if (!is_power_of_2(window_size))
2783 return -EINVAL; 2801 return -EINVAL;
2784 2802
2785 /* Adjust direct table size from window_size and levels */ 2803 /* Adjust direct table size from window_size and levels */
@@ -3293,8 +3311,7 @@ static void pnv_pci_ioda_fixup(void)
3293 pnv_pci_ioda_create_dbgfs(); 3311 pnv_pci_ioda_create_dbgfs();
3294 3312
3295#ifdef CONFIG_EEH 3313#ifdef CONFIG_EEH
3296 eeh_init(); 3314 pnv_eeh_post_init();
3297 eeh_addr_cache_build();
3298#endif 3315#endif
3299} 3316}
3300 3317
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index b47f9406d97e..b772d7473896 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -188,6 +188,9 @@ struct pnv_phb {
188 188
189 /* Bitmask for MMIO register usage */ 189 /* Bitmask for MMIO register usage */
190 unsigned long mmio_atsd_usage; 190 unsigned long mmio_atsd_usage;
191
192 /* Do we need to explicitly flush the nest mmu? */
193 bool nmmu_flush;
191 } npu; 194 } npu;
192 195
193#ifdef CONFIG_CXL_BASE 196#ifdef CONFIG_CXL_BASE
@@ -235,6 +238,7 @@ extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
235extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); 238extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
236extern bool pnv_pci_enable_device_hook(struct pci_dev *dev); 239extern bool pnv_pci_enable_device_hook(struct pci_dev *dev);
237extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); 240extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
241extern int pnv_eeh_post_init(void);
238 242
239extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, 243extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
240 const char *fmt, ...); 244 const char *fmt, ...);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index bbb73aa0eb8f..1edfbc1e40f4 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -36,6 +36,7 @@
36#include <asm/opal.h> 36#include <asm/opal.h>
37#include <asm/kexec.h> 37#include <asm/kexec.h>
38#include <asm/smp.h> 38#include <asm/smp.h>
39#include <asm/tm.h>
39 40
40#include "powernv.h" 41#include "powernv.h"
41 42
@@ -290,6 +291,7 @@ static void __init pnv_setup_machdep_opal(void)
290 ppc_md.restart = pnv_restart; 291 ppc_md.restart = pnv_restart;
291 pm_power_off = pnv_power_off; 292 pm_power_off = pnv_power_off;
292 ppc_md.halt = pnv_halt; 293 ppc_md.halt = pnv_halt;
294 /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
293 ppc_md.machine_check_exception = opal_machine_check; 295 ppc_md.machine_check_exception = opal_machine_check;
294 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; 296 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
295 ppc_md.hmi_exception_early = opal_hmi_exception_early; 297 ppc_md.hmi_exception_early = opal_hmi_exception_early;
@@ -311,6 +313,28 @@ static int __init pnv_probe(void)
311 return 1; 313 return 1;
312} 314}
313 315
316#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
317void __init pnv_tm_init(void)
318{
319 if (!firmware_has_feature(FW_FEATURE_OPAL) ||
320 !pvr_version_is(PVR_POWER9) ||
321 early_cpu_has_feature(CPU_FTR_TM))
322 return;
323
324 if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS)
325 return;
326
327 pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n");
328 cur_cpu_spec->cpu_features |= CPU_FTR_TM;
329 /* Make sure "normal" HTM is off (it should be) */
330 cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM;
331 /* Turn on no suspend mode, and HTM no SC */
332 cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \
333 PPC_FEATURE2_HTM_NOSC;
334 tm_suspend_disabled = true;
335}
336#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
337
314/* 338/*
315 * Returns the cpu frequency for 'cpu' in Hz. This is used by 339 * Returns the cpu frequency for 'cpu' in Hz. This is used by
316 * /proc/cpuinfo 340 * /proc/cpuinfo
@@ -319,7 +343,7 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu)
319{ 343{
320 unsigned long ret_freq; 344 unsigned long ret_freq;
321 345
322 ret_freq = cpufreq_quick_get(cpu) * 1000ul; 346 ret_freq = cpufreq_get(cpu) * 1000ul;
323 347
324 /* 348 /*
325 * If the backend cpufreq driver does not exist, 349 * If the backend cpufreq driver does not exist,
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index c17f81e433f7..ba030669eca1 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -49,6 +49,13 @@
49 49
50static void pnv_smp_setup_cpu(int cpu) 50static void pnv_smp_setup_cpu(int cpu)
51{ 51{
52 /*
53 * P9 workaround for CI vector load (see traps.c),
54 * enable the corresponding HMI interrupt
55 */
56 if (pvr_version_is(PVR_POWER9))
57 mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17));
58
52 if (xive_enabled()) 59 if (xive_enabled())
53 xive_smp_setup_cpu(); 60 xive_smp_setup_cpu();
54 else if (cpu != boot_cpuid) 61 else if (cpu != boot_cpuid)
@@ -290,6 +297,54 @@ static void __init pnv_smp_probe(void)
290 } 297 }
291} 298}
292 299
300static int pnv_system_reset_exception(struct pt_regs *regs)
301{
302 if (smp_handle_nmi_ipi(regs))
303 return 1;
304 return 0;
305}
306
307static int pnv_cause_nmi_ipi(int cpu)
308{
309 int64_t rc;
310
311 if (cpu >= 0) {
312 rc = opal_signal_system_reset(get_hard_smp_processor_id(cpu));
313 if (rc != OPAL_SUCCESS)
314 return 0;
315 return 1;
316
317 } else if (cpu == NMI_IPI_ALL_OTHERS) {
318 bool success = true;
319 int c;
320
321
322 /*
323 * We do not use broadcasts (yet), because it's not clear
324 * exactly what semantics Linux wants or the firmware should
325 * provide.
326 */
327 for_each_online_cpu(c) {
328 if (c == smp_processor_id())
329 continue;
330
331 rc = opal_signal_system_reset(
332 get_hard_smp_processor_id(c));
333 if (rc != OPAL_SUCCESS)
334 success = false;
335 }
336 if (success)
337 return 1;
338
339 /*
340 * Caller will fall back to doorbells, which may pick
341 * up the remainders.
342 */
343 }
344
345 return 0;
346}
347
293static struct smp_ops_t pnv_smp_ops = { 348static struct smp_ops_t pnv_smp_ops = {
294 .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ 349 .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */
295 .cause_ipi = NULL, /* Filled at runtime by pnv_smp_probe() */ 350 .cause_ipi = NULL, /* Filled at runtime by pnv_smp_probe() */
@@ -308,6 +363,10 @@ static struct smp_ops_t pnv_smp_ops = {
308/* This is called very early during platform setup_arch */ 363/* This is called very early during platform setup_arch */
309void __init pnv_smp_init(void) 364void __init pnv_smp_init(void)
310{ 365{
366 if (opal_check_token(OPAL_SIGNAL_SYSTEM_RESET)) {
367 ppc_md.system_reset_exception = pnv_system_reset_exception;
368 pnv_smp_ops.cause_nmi_ipi = pnv_cause_nmi_ipi;
369 }
311 smp_ops = &pnv_smp_ops; 370 smp_ops = &pnv_smp_ops;
312 371
313#ifdef CONFIG_HOTPLUG_CPU 372#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c
new file mode 100644
index 000000000000..ca22f1eae050
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -0,0 +1,209 @@
1/*
2 * Copyright 2016-17 IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10#define pr_fmt(fmt) "vas: " fmt
11
12#include <linux/types.h>
13#include <linux/slab.h>
14#include <linux/debugfs.h>
15#include <linux/seq_file.h>
16#include "vas.h"
17
18static struct dentry *vas_debugfs;
19
20static char *cop_to_str(int cop)
21{
22 switch (cop) {
23 case VAS_COP_TYPE_FAULT: return "Fault";
24 case VAS_COP_TYPE_842: return "NX-842 Normal Priority";
25 case VAS_COP_TYPE_842_HIPRI: return "NX-842 High Priority";
26 case VAS_COP_TYPE_GZIP: return "NX-GZIP Normal Priority";
27 case VAS_COP_TYPE_GZIP_HIPRI: return "NX-GZIP High Priority";
28 case VAS_COP_TYPE_FTW: return "Fast Thread-wakeup";
29 default: return "Unknown";
30 }
31}
32
33static int info_dbg_show(struct seq_file *s, void *private)
34{
35 struct vas_window *window = s->private;
36
37 mutex_lock(&vas_mutex);
38
39 /* ensure window is not unmapped */
40 if (!window->hvwc_map)
41 goto unlock;
42
43 seq_printf(s, "Type: %s, %s\n", cop_to_str(window->cop),
44 window->tx_win ? "Send" : "Receive");
45 seq_printf(s, "Pid : %d\n", window->pid);
46
47unlock:
48 mutex_unlock(&vas_mutex);
49 return 0;
50}
51
52static int info_dbg_open(struct inode *inode, struct file *file)
53{
54 return single_open(file, info_dbg_show, inode->i_private);
55}
56
57static const struct file_operations info_fops = {
58 .open = info_dbg_open,
59 .read = seq_read,
60 .llseek = seq_lseek,
61 .release = single_release,
62};
63
64static inline void print_reg(struct seq_file *s, struct vas_window *win,
65 char *name, u32 reg)
66{
67 seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name);
68}
69
70static int hvwc_dbg_show(struct seq_file *s, void *private)
71{
72 struct vas_window *window = s->private;
73
74 mutex_lock(&vas_mutex);
75
76 /* ensure window is not unmapped */
77 if (!window->hvwc_map)
78 goto unlock;
79
80 print_reg(s, window, VREG(LPID));
81 print_reg(s, window, VREG(PID));
82 print_reg(s, window, VREG(XLATE_MSR));
83 print_reg(s, window, VREG(XLATE_LPCR));
84 print_reg(s, window, VREG(XLATE_CTL));
85 print_reg(s, window, VREG(AMR));
86 print_reg(s, window, VREG(SEIDR));
87 print_reg(s, window, VREG(FAULT_TX_WIN));
88 print_reg(s, window, VREG(OSU_INTR_SRC_RA));
89 print_reg(s, window, VREG(HV_INTR_SRC_RA));
90 print_reg(s, window, VREG(PSWID));
91 print_reg(s, window, VREG(LFIFO_BAR));
92 print_reg(s, window, VREG(LDATA_STAMP_CTL));
93 print_reg(s, window, VREG(LDMA_CACHE_CTL));
94 print_reg(s, window, VREG(LRFIFO_PUSH));
95 print_reg(s, window, VREG(CURR_MSG_COUNT));
96 print_reg(s, window, VREG(LNOTIFY_AFTER_COUNT));
97 print_reg(s, window, VREG(LRX_WCRED));
98 print_reg(s, window, VREG(LRX_WCRED_ADDER));
99 print_reg(s, window, VREG(TX_WCRED));
100 print_reg(s, window, VREG(TX_WCRED_ADDER));
101 print_reg(s, window, VREG(LFIFO_SIZE));
102 print_reg(s, window, VREG(WINCTL));
103 print_reg(s, window, VREG(WIN_STATUS));
104 print_reg(s, window, VREG(WIN_CTX_CACHING_CTL));
105 print_reg(s, window, VREG(TX_RSVD_BUF_COUNT));
106 print_reg(s, window, VREG(LRFIFO_WIN_PTR));
107 print_reg(s, window, VREG(LNOTIFY_CTL));
108 print_reg(s, window, VREG(LNOTIFY_PID));
109 print_reg(s, window, VREG(LNOTIFY_LPID));
110 print_reg(s, window, VREG(LNOTIFY_TID));
111 print_reg(s, window, VREG(LNOTIFY_SCOPE));
112 print_reg(s, window, VREG(NX_UTIL_ADDER));
113unlock:
114 mutex_unlock(&vas_mutex);
115 return 0;
116}
117
118static int hvwc_dbg_open(struct inode *inode, struct file *file)
119{
120 return single_open(file, hvwc_dbg_show, inode->i_private);
121}
122
123static const struct file_operations hvwc_fops = {
124 .open = hvwc_dbg_open,
125 .read = seq_read,
126 .llseek = seq_lseek,
127 .release = single_release,
128};
129
130void vas_window_free_dbgdir(struct vas_window *window)
131{
132 if (window->dbgdir) {
133 debugfs_remove_recursive(window->dbgdir);
134 kfree(window->dbgname);
135 window->dbgdir = NULL;
136 window->dbgname = NULL;
137 }
138}
139
140void vas_window_init_dbgdir(struct vas_window *window)
141{
142 struct dentry *f, *d;
143
144 if (!window->vinst->dbgdir)
145 return;
146
147 window->dbgname = kzalloc(16, GFP_KERNEL);
148 if (!window->dbgname)
149 return;
150
151 snprintf(window->dbgname, 16, "w%d", window->winid);
152
153 d = debugfs_create_dir(window->dbgname, window->vinst->dbgdir);
154 if (IS_ERR(d))
155 goto free_name;
156
157 window->dbgdir = d;
158
159 f = debugfs_create_file("info", 0444, d, window, &info_fops);
160 if (IS_ERR(f))
161 goto remove_dir;
162
163 f = debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops);
164 if (IS_ERR(f))
165 goto remove_dir;
166
167 return;
168
169free_name:
170 kfree(window->dbgname);
171 window->dbgname = NULL;
172
173remove_dir:
174 debugfs_remove_recursive(window->dbgdir);
175 window->dbgdir = NULL;
176}
177
178void vas_instance_init_dbgdir(struct vas_instance *vinst)
179{
180 struct dentry *d;
181
182 if (!vas_debugfs)
183 return;
184
185 vinst->dbgname = kzalloc(16, GFP_KERNEL);
186 if (!vinst->dbgname)
187 return;
188
189 snprintf(vinst->dbgname, 16, "v%d", vinst->vas_id);
190
191 d = debugfs_create_dir(vinst->dbgname, vas_debugfs);
192 if (IS_ERR(d))
193 goto free_name;
194
195 vinst->dbgdir = d;
196 return;
197
198free_name:
199 kfree(vinst->dbgname);
200 vinst->dbgname = NULL;
201 vinst->dbgdir = NULL;
202}
203
204void vas_init_dbgdir(void)
205{
206 vas_debugfs = debugfs_create_dir("vas", NULL);
207 if (IS_ERR(vas_debugfs))
208 vas_debugfs = NULL;
209}
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index 5aae845b8cd9..2b3eb01ab110 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -16,7 +16,8 @@
16#include <linux/log2.h> 16#include <linux/log2.h>
17#include <linux/rcupdate.h> 17#include <linux/rcupdate.h>
18#include <linux/cred.h> 18#include <linux/cred.h>
19 19#include <asm/switch_to.h>
20#include <asm/ppc-opcode.h>
20#include "vas.h" 21#include "vas.h"
21#include "copy-paste.h" 22#include "copy-paste.h"
22 23
@@ -40,6 +41,16 @@ static void compute_paste_address(struct vas_window *window, u64 *addr, int *len
40 pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr); 41 pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
41} 42}
42 43
44u64 vas_win_paste_addr(struct vas_window *win)
45{
46 u64 addr;
47
48 compute_paste_address(win, &addr, NULL);
49
50 return addr;
51}
52EXPORT_SYMBOL(vas_win_paste_addr);
53
43static inline void get_hvwc_mmio_bar(struct vas_window *window, 54static inline void get_hvwc_mmio_bar(struct vas_window *window,
44 u64 *start, int *len) 55 u64 *start, int *len)
45{ 56{
@@ -145,23 +156,37 @@ static void unmap_paste_region(struct vas_window *window)
145} 156}
146 157
147/* 158/*
148 * Unmap the MMIO regions for a window. 159 * Unmap the MMIO regions for a window. Hold the vas_mutex so we don't
160 * unmap when the window's debugfs dir is in use. This serializes close
161 * of a window even on another VAS instance but since its not a critical
162 * path, just minimize the time we hold the mutex for now. We can add
163 * a per-instance mutex later if necessary.
149 */ 164 */
150static void unmap_winctx_mmio_bars(struct vas_window *window) 165static void unmap_winctx_mmio_bars(struct vas_window *window)
151{ 166{
152 int len; 167 int len;
168 void *uwc_map;
169 void *hvwc_map;
153 u64 busaddr_start; 170 u64 busaddr_start;
154 171
155 if (window->hvwc_map) { 172 mutex_lock(&vas_mutex);
173
174 hvwc_map = window->hvwc_map;
175 window->hvwc_map = NULL;
176
177 uwc_map = window->uwc_map;
178 window->uwc_map = NULL;
179
180 mutex_unlock(&vas_mutex);
181
182 if (hvwc_map) {
156 get_hvwc_mmio_bar(window, &busaddr_start, &len); 183 get_hvwc_mmio_bar(window, &busaddr_start, &len);
157 unmap_region(window->hvwc_map, busaddr_start, len); 184 unmap_region(hvwc_map, busaddr_start, len);
158 window->hvwc_map = NULL;
159 } 185 }
160 186
161 if (window->uwc_map) { 187 if (uwc_map) {
162 get_uwc_mmio_bar(window, &busaddr_start, &len); 188 get_uwc_mmio_bar(window, &busaddr_start, &len);
163 unmap_region(window->uwc_map, busaddr_start, len); 189 unmap_region(uwc_map, busaddr_start, len);
164 window->uwc_map = NULL;
165 } 190 }
166} 191}
167 192
@@ -528,6 +553,9 @@ static void vas_window_free(struct vas_window *window)
528 struct vas_instance *vinst = window->vinst; 553 struct vas_instance *vinst = window->vinst;
529 554
530 unmap_winctx_mmio_bars(window); 555 unmap_winctx_mmio_bars(window);
556
557 vas_window_free_dbgdir(window);
558
531 kfree(window); 559 kfree(window);
532 560
533 vas_release_window_id(&vinst->ida, winid); 561 vas_release_window_id(&vinst->ida, winid);
@@ -552,6 +580,8 @@ static struct vas_window *vas_window_alloc(struct vas_instance *vinst)
552 if (map_winctx_mmio_bars(window)) 580 if (map_winctx_mmio_bars(window))
553 goto out_free; 581 goto out_free;
554 582
583 vas_window_init_dbgdir(window);
584
555 return window; 585 return window;
556 586
557out_free: 587out_free:
@@ -569,6 +599,32 @@ static void put_rx_win(struct vas_window *rxwin)
569} 599}
570 600
571/* 601/*
602 * Find the user space receive window given the @pswid.
603 * - We must have a valid vasid and it must belong to this instance.
604 * (so both send and receive windows are on the same VAS instance)
605 * - The window must refer to an OPEN, FTW, RECEIVE window.
606 *
607 * NOTE: We access ->windows[] table and assume that vinst->mutex is held.
608 */
609static struct vas_window *get_user_rxwin(struct vas_instance *vinst, u32 pswid)
610{
611 int vasid, winid;
612 struct vas_window *rxwin;
613
614 decode_pswid(pswid, &vasid, &winid);
615
616 if (vinst->vas_id != vasid)
617 return ERR_PTR(-EINVAL);
618
619 rxwin = vinst->windows[winid];
620
621 if (!rxwin || rxwin->tx_win || rxwin->cop != VAS_COP_TYPE_FTW)
622 return ERR_PTR(-EINVAL);
623
624 return rxwin;
625}
626
627/*
572 * Get the VAS receive window associated with NX engine identified 628 * Get the VAS receive window associated with NX engine identified
573 * by @cop and if applicable, @pswid. 629 * by @cop and if applicable, @pswid.
574 * 630 *
@@ -581,10 +637,10 @@ static struct vas_window *get_vinst_rxwin(struct vas_instance *vinst,
581 637
582 mutex_lock(&vinst->mutex); 638 mutex_lock(&vinst->mutex);
583 639
584 if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI) 640 if (cop == VAS_COP_TYPE_FTW)
585 rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL); 641 rxwin = get_user_rxwin(vinst, pswid);
586 else 642 else
587 rxwin = ERR_PTR(-EINVAL); 643 rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL);
588 644
589 if (!IS_ERR(rxwin)) 645 if (!IS_ERR(rxwin))
590 atomic_inc(&rxwin->num_txwins); 646 atomic_inc(&rxwin->num_txwins);
@@ -674,15 +730,18 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin,
674 730
675 winctx->rx_fifo = rxattr->rx_fifo; 731 winctx->rx_fifo = rxattr->rx_fifo;
676 winctx->rx_fifo_size = rxattr->rx_fifo_size; 732 winctx->rx_fifo_size = rxattr->rx_fifo_size;
677 winctx->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT; 733 winctx->wcreds_max = rxwin->wcreds_max;
678 winctx->pin_win = rxattr->pin_win; 734 winctx->pin_win = rxattr->pin_win;
679 735
680 winctx->nx_win = rxattr->nx_win; 736 winctx->nx_win = rxattr->nx_win;
681 winctx->fault_win = rxattr->fault_win; 737 winctx->fault_win = rxattr->fault_win;
738 winctx->user_win = rxattr->user_win;
739 winctx->rej_no_credit = rxattr->rej_no_credit;
682 winctx->rx_word_mode = rxattr->rx_win_ord_mode; 740 winctx->rx_word_mode = rxattr->rx_win_ord_mode;
683 winctx->tx_word_mode = rxattr->tx_win_ord_mode; 741 winctx->tx_word_mode = rxattr->tx_win_ord_mode;
684 winctx->rx_wcred_mode = rxattr->rx_wcred_mode; 742 winctx->rx_wcred_mode = rxattr->rx_wcred_mode;
685 winctx->tx_wcred_mode = rxattr->tx_wcred_mode; 743 winctx->tx_wcred_mode = rxattr->tx_wcred_mode;
744 winctx->notify_early = rxattr->notify_early;
686 745
687 if (winctx->nx_win) { 746 if (winctx->nx_win) {
688 winctx->data_stamp = true; 747 winctx->data_stamp = true;
@@ -723,7 +782,10 @@ static void init_winctx_for_rxwin(struct vas_window *rxwin,
723static bool rx_win_args_valid(enum vas_cop_type cop, 782static bool rx_win_args_valid(enum vas_cop_type cop,
724 struct vas_rx_win_attr *attr) 783 struct vas_rx_win_attr *attr)
725{ 784{
726 dump_rx_win_attr(attr); 785 pr_debug("Rxattr: fault %d, notify %d, intr %d, early %d, fifo %d\n",
786 attr->fault_win, attr->notify_disable,
787 attr->intr_disable, attr->notify_early,
788 attr->rx_fifo_size);
727 789
728 if (cop >= VAS_COP_TYPE_MAX) 790 if (cop >= VAS_COP_TYPE_MAX)
729 return false; 791 return false;
@@ -735,6 +797,9 @@ static bool rx_win_args_valid(enum vas_cop_type cop,
735 if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX) 797 if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
736 return false; 798 return false;
737 799
800 if (attr->wcreds_max > VAS_RX_WCREDS_MAX)
801 return false;
802
738 if (attr->nx_win) { 803 if (attr->nx_win) {
739 /* cannot be fault or user window if it is nx */ 804 /* cannot be fault or user window if it is nx */
740 if (attr->fault_win || attr->user_win) 805 if (attr->fault_win || attr->user_win)
@@ -835,6 +900,7 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
835 rxwin->nx_win = rxattr->nx_win; 900 rxwin->nx_win = rxattr->nx_win;
836 rxwin->user_win = rxattr->user_win; 901 rxwin->user_win = rxattr->user_win;
837 rxwin->cop = cop; 902 rxwin->cop = cop;
903 rxwin->wcreds_max = rxattr->wcreds_max ?: VAS_WCREDS_DEFAULT;
838 if (rxattr->user_win) 904 if (rxattr->user_win)
839 rxwin->pid = task_pid_vnr(current); 905 rxwin->pid = task_pid_vnr(current);
840 906
@@ -884,21 +950,23 @@ static void init_winctx_for_txwin(struct vas_window *txwin,
884 */ 950 */
885 memset(winctx, 0, sizeof(struct vas_winctx)); 951 memset(winctx, 0, sizeof(struct vas_winctx));
886 952
887 winctx->wcreds_max = txattr->wcreds_max ?: VAS_WCREDS_DEFAULT; 953 winctx->wcreds_max = txwin->wcreds_max;
888 954
889 winctx->user_win = txattr->user_win; 955 winctx->user_win = txattr->user_win;
890 winctx->nx_win = txwin->rxwin->nx_win; 956 winctx->nx_win = txwin->rxwin->nx_win;
891 winctx->pin_win = txattr->pin_win; 957 winctx->pin_win = txattr->pin_win;
958 winctx->rej_no_credit = txattr->rej_no_credit;
959 winctx->rsvd_txbuf_enable = txattr->rsvd_txbuf_enable;
892 960
893 winctx->rx_wcred_mode = txattr->rx_wcred_mode; 961 winctx->rx_wcred_mode = txattr->rx_wcred_mode;
894 winctx->tx_wcred_mode = txattr->tx_wcred_mode; 962 winctx->tx_wcred_mode = txattr->tx_wcred_mode;
895 winctx->rx_word_mode = txattr->rx_win_ord_mode; 963 winctx->rx_word_mode = txattr->rx_win_ord_mode;
896 winctx->tx_word_mode = txattr->tx_win_ord_mode; 964 winctx->tx_word_mode = txattr->tx_win_ord_mode;
965 winctx->rsvd_txbuf_count = txattr->rsvd_txbuf_count;
897 966
898 if (winctx->nx_win) { 967 winctx->intr_disable = true;
968 if (winctx->nx_win)
899 winctx->data_stamp = true; 969 winctx->data_stamp = true;
900 winctx->intr_disable = true;
901 }
902 970
903 winctx->lpid = txattr->lpid; 971 winctx->lpid = txattr->lpid;
904 winctx->pidr = txattr->pidr; 972 winctx->pidr = txattr->pidr;
@@ -921,6 +989,9 @@ static bool tx_win_args_valid(enum vas_cop_type cop,
921 if (cop > VAS_COP_TYPE_MAX) 989 if (cop > VAS_COP_TYPE_MAX)
922 return false; 990 return false;
923 991
992 if (attr->wcreds_max > VAS_TX_WCREDS_MAX)
993 return false;
994
924 if (attr->user_win && 995 if (attr->user_win &&
925 (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count)) 996 (cop != VAS_COP_TYPE_FTW || attr->rsvd_txbuf_count))
926 return false; 997 return false;
@@ -940,6 +1011,14 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
940 if (!tx_win_args_valid(cop, attr)) 1011 if (!tx_win_args_valid(cop, attr))
941 return ERR_PTR(-EINVAL); 1012 return ERR_PTR(-EINVAL);
942 1013
1014 /*
1015 * If caller did not specify a vasid but specified the PSWID of a
1016 * receive window (applicable only to FTW windows), use the vasid
1017 * from that receive window.
1018 */
1019 if (vasid == -1 && attr->pswid)
1020 decode_pswid(attr->pswid, &vasid, NULL);
1021
943 vinst = find_vas_instance(vasid); 1022 vinst = find_vas_instance(vasid);
944 if (!vinst) { 1023 if (!vinst) {
945 pr_devel("vasid %d not found!\n", vasid); 1024 pr_devel("vasid %d not found!\n", vasid);
@@ -958,11 +1037,13 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
958 goto put_rxwin; 1037 goto put_rxwin;
959 } 1038 }
960 1039
1040 txwin->cop = cop;
961 txwin->tx_win = 1; 1041 txwin->tx_win = 1;
962 txwin->rxwin = rxwin; 1042 txwin->rxwin = rxwin;
963 txwin->nx_win = txwin->rxwin->nx_win; 1043 txwin->nx_win = txwin->rxwin->nx_win;
964 txwin->pid = attr->pid; 1044 txwin->pid = attr->pid;
965 txwin->user_win = attr->user_win; 1045 txwin->user_win = attr->user_win;
1046 txwin->wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
966 1047
967 init_winctx_for_txwin(txwin, attr, &winctx); 1048 init_winctx_for_txwin(txwin, attr, &winctx);
968 1049
@@ -984,6 +1065,14 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
984 } 1065 }
985 } 1066 }
986 1067
1068 /*
1069 * Now that we have a send window, ensure context switch issues
1070 * CP_ABORT for this thread.
1071 */
1072 rc = -EINVAL;
1073 if (set_thread_uses_vas() < 0)
1074 goto free_window;
1075
987 set_vinst_win(vinst, txwin); 1076 set_vinst_win(vinst, txwin);
988 1077
989 return txwin; 1078 return txwin;
@@ -1038,50 +1127,110 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
1038 else 1127 else
1039 rc = -EINVAL; 1128 rc = -EINVAL;
1040 1129
1041 print_fifo_msg_count(txwin); 1130 pr_debug("Txwin #%d: Msg count %llu\n", txwin->winid,
1131 read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
1042 1132
1043 return rc; 1133 return rc;
1044} 1134}
1045EXPORT_SYMBOL_GPL(vas_paste_crb); 1135EXPORT_SYMBOL_GPL(vas_paste_crb);
1046 1136
1137/*
1138 * If credit checking is enabled for this window, poll for the return
1139 * of window credits (i.e for NX engines to process any outstanding CRBs).
1140 * Since NX-842 waits for the CRBs to be processed before closing the
1141 * window, we should not have to wait for too long.
1142 *
1143 * TODO: We retry in 10ms intervals now. We could/should probably peek at
1144 * the VAS_LRFIFO_PUSH_OFFSET register to get an estimate of pending
1145 * CRBs on the FIFO and compute the delay dynamically on each retry.
1146 * But that is not really needed until we support NX-GZIP access from
1147 * user space. (NX-842 driver waits for CSB and Fast thread-wakeup
1148 * doesn't use credit checking).
1149 */
1150static void poll_window_credits(struct vas_window *window)
1151{
1152 u64 val;
1153 int creds, mode;
1154
1155 val = read_hvwc_reg(window, VREG(WINCTL));
1156 if (window->tx_win)
1157 mode = GET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val);
1158 else
1159 mode = GET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val);
1160
1161 if (!mode)
1162 return;
1163retry:
1164 if (window->tx_win) {
1165 val = read_hvwc_reg(window, VREG(TX_WCRED));
1166 creds = GET_FIELD(VAS_TX_WCRED, val);
1167 } else {
1168 val = read_hvwc_reg(window, VREG(LRX_WCRED));
1169 creds = GET_FIELD(VAS_LRX_WCRED, val);
1170 }
1171
1172 if (creds < window->wcreds_max) {
1173 val = 0;
1174 set_current_state(TASK_UNINTERRUPTIBLE);
1175 schedule_timeout(msecs_to_jiffies(10));
1176 goto retry;
1177 }
1178}
1179
1180/*
1181 * Wait for the window to go to "not-busy" state. It should only take a
1182 * short time to queue a CRB, so window should not be busy for too long.
1183 * Trying 5ms intervals.
1184 */
1047static void poll_window_busy_state(struct vas_window *window) 1185static void poll_window_busy_state(struct vas_window *window)
1048{ 1186{
1049 int busy; 1187 int busy;
1050 u64 val; 1188 u64 val;
1051 1189
1052retry: 1190retry:
1053 /*
1054 * Poll Window Busy flag
1055 */
1056 val = read_hvwc_reg(window, VREG(WIN_STATUS)); 1191 val = read_hvwc_reg(window, VREG(WIN_STATUS));
1057 busy = GET_FIELD(VAS_WIN_BUSY, val); 1192 busy = GET_FIELD(VAS_WIN_BUSY, val);
1058 if (busy) { 1193 if (busy) {
1059 val = 0; 1194 val = 0;
1060 set_current_state(TASK_UNINTERRUPTIBLE); 1195 set_current_state(TASK_UNINTERRUPTIBLE);
1061 schedule_timeout(HZ); 1196 schedule_timeout(msecs_to_jiffies(5));
1062 goto retry; 1197 goto retry;
1063 } 1198 }
1064} 1199}
1065 1200
1201/*
1202 * Have the hardware cast a window out of cache and wait for it to
1203 * be completed.
1204 *
1205 * NOTE: It can take a relatively long time to cast the window context
1206 * out of the cache. It is not strictly necessary to cast out if:
1207 *
1208 * - we clear the "Pin Window" bit (so hardware is free to evict)
1209 *
1210 * - we re-initialize the window context when it is reassigned.
1211 *
1212 * We do the former in vas_win_close() and latter in vas_win_open().
1213 * So, ignoring the cast-out for now. We can add it as needed. If
1214 * casting out becomes necessary we should consider offloading the
1215 * job to a worker thread, so the window close can proceed quickly.
1216 */
1066static void poll_window_castout(struct vas_window *window) 1217static void poll_window_castout(struct vas_window *window)
1067{ 1218{
1068 int cached; 1219 /* stub for now */
1069 u64 val; 1220}
1070 1221
1071 /* Cast window context out of the cache */ 1222/*
1072retry: 1223 * Unpin and close a window so no new requests are accepted and the
1073 val = read_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL)); 1224 * hardware can evict this window from cache if necessary.
1074 cached = GET_FIELD(VAS_WIN_CACHE_STATUS, val); 1225 */
1075 if (cached) { 1226static void unpin_close_window(struct vas_window *window)
1076 val = 0ULL; 1227{
1077 val = SET_FIELD(VAS_CASTOUT_REQ, val, 1); 1228 u64 val;
1078 val = SET_FIELD(VAS_PUSH_TO_MEM, val, 0);
1079 write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
1080 1229
1081 set_current_state(TASK_UNINTERRUPTIBLE); 1230 val = read_hvwc_reg(window, VREG(WINCTL));
1082 schedule_timeout(HZ); 1231 val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
1083 goto retry; 1232 val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
1084 } 1233 write_hvwc_reg(window, VREG(WINCTL), val);
1085} 1234}
1086 1235
1087/* 1236/*
@@ -1098,8 +1247,6 @@ retry:
1098 */ 1247 */
1099int vas_win_close(struct vas_window *window) 1248int vas_win_close(struct vas_window *window)
1100{ 1249{
1101 u64 val;
1102
1103 if (!window) 1250 if (!window)
1104 return 0; 1251 return 0;
1105 1252
@@ -1115,11 +1262,9 @@ int vas_win_close(struct vas_window *window)
1115 1262
1116 poll_window_busy_state(window); 1263 poll_window_busy_state(window);
1117 1264
1118 /* Unpin window from cache and close it */ 1265 unpin_close_window(window);
1119 val = read_hvwc_reg(window, VREG(WINCTL)); 1266
1120 val = SET_FIELD(VAS_WINCTL_PIN, val, 0); 1267 poll_window_credits(window);
1121 val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
1122 write_hvwc_reg(window, VREG(WINCTL), val);
1123 1268
1124 poll_window_castout(window); 1269 poll_window_castout(window);
1125 1270
@@ -1132,3 +1277,12 @@ int vas_win_close(struct vas_window *window)
1132 return 0; 1277 return 0;
1133} 1278}
1134EXPORT_SYMBOL_GPL(vas_win_close); 1279EXPORT_SYMBOL_GPL(vas_win_close);
1280
1281/*
1282 * Return a system-wide unique window id for the window @win.
1283 */
1284u32 vas_win_id(struct vas_window *win)
1285{
1286 return encode_pswid(win->vinst->vas_id, win->winid);
1287}
1288EXPORT_SYMBOL_GPL(vas_win_id);
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
index 565a4878fefa..c488621dbec3 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -18,15 +18,18 @@
18#include <linux/of_platform.h> 18#include <linux/of_platform.h>
19#include <linux/of_address.h> 19#include <linux/of_address.h>
20#include <linux/of.h> 20#include <linux/of.h>
21#include <asm/prom.h>
21 22
22#include "vas.h" 23#include "vas.h"
23 24
24static DEFINE_MUTEX(vas_mutex); 25DEFINE_MUTEX(vas_mutex);
25static LIST_HEAD(vas_instances); 26static LIST_HEAD(vas_instances);
26 27
28static DEFINE_PER_CPU(int, cpu_vas_id);
29
27static int init_vas_instance(struct platform_device *pdev) 30static int init_vas_instance(struct platform_device *pdev)
28{ 31{
29 int rc, vasid; 32 int rc, cpu, vasid;
30 struct resource *res; 33 struct resource *res;
31 struct vas_instance *vinst; 34 struct vas_instance *vinst;
32 struct device_node *dn = pdev->dev.of_node; 35 struct device_node *dn = pdev->dev.of_node;
@@ -74,10 +77,17 @@ static int init_vas_instance(struct platform_device *pdev)
74 "paste_win_id_shift 0x%llx\n", pdev->name, vasid, 77 "paste_win_id_shift 0x%llx\n", pdev->name, vasid,
75 vinst->paste_base_addr, vinst->paste_win_id_shift); 78 vinst->paste_base_addr, vinst->paste_win_id_shift);
76 79
80 for_each_possible_cpu(cpu) {
81 if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn))
82 per_cpu(cpu_vas_id, cpu) = vasid;
83 }
84
77 mutex_lock(&vas_mutex); 85 mutex_lock(&vas_mutex);
78 list_add(&vinst->node, &vas_instances); 86 list_add(&vinst->node, &vas_instances);
79 mutex_unlock(&vas_mutex); 87 mutex_unlock(&vas_mutex);
80 88
89 vas_instance_init_dbgdir(vinst);
90
81 dev_set_drvdata(&pdev->dev, vinst); 91 dev_set_drvdata(&pdev->dev, vinst);
82 92
83 return 0; 93 return 0;
@@ -98,6 +108,10 @@ struct vas_instance *find_vas_instance(int vasid)
98 struct vas_instance *vinst; 108 struct vas_instance *vinst;
99 109
100 mutex_lock(&vas_mutex); 110 mutex_lock(&vas_mutex);
111
112 if (vasid == -1)
113 vasid = per_cpu(cpu_vas_id, smp_processor_id());
114
101 list_for_each(ent, &vas_instances) { 115 list_for_each(ent, &vas_instances) {
102 vinst = list_entry(ent, struct vas_instance, node); 116 vinst = list_entry(ent, struct vas_instance, node);
103 if (vinst->vas_id == vasid) { 117 if (vinst->vas_id == vasid) {
@@ -111,6 +125,17 @@ struct vas_instance *find_vas_instance(int vasid)
111 return NULL; 125 return NULL;
112} 126}
113 127
128int chip_to_vas_id(int chipid)
129{
130 int cpu;
131
132 for_each_possible_cpu(cpu) {
133 if (cpu_to_chip_id(cpu) == chipid)
134 return per_cpu(cpu_vas_id, cpu);
135 }
136 return -1;
137}
138
114static int vas_probe(struct platform_device *pdev) 139static int vas_probe(struct platform_device *pdev)
115{ 140{
116 return init_vas_instance(pdev); 141 return init_vas_instance(pdev);
@@ -134,6 +159,8 @@ static int __init vas_init(void)
134 int found = 0; 159 int found = 0;
135 struct device_node *dn; 160 struct device_node *dn;
136 161
162 vas_init_dbgdir();
163
137 platform_driver_register(&vas_driver); 164 platform_driver_register(&vas_driver);
138 165
139 for_each_compatible_node(dn, NULL, "ibm,vas") { 166 for_each_compatible_node(dn, NULL, "ibm,vas") {
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
index 38dee5d50f31..ae0100fd35bb 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -13,6 +13,8 @@
13#include <linux/idr.h> 13#include <linux/idr.h>
14#include <asm/vas.h> 14#include <asm/vas.h>
15#include <linux/io.h> 15#include <linux/io.h>
16#include <linux/dcache.h>
17#include <linux/mutex.h>
16 18
17/* 19/*
18 * Overview of Virtual Accelerator Switchboard (VAS). 20 * Overview of Virtual Accelerator Switchboard (VAS).
@@ -106,8 +108,8 @@
106 * 108 *
107 * TODO: Needs tuning for per-process credits 109 * TODO: Needs tuning for per-process credits
108 */ 110 */
109#define VAS_WCREDS_MIN 16 111#define VAS_RX_WCREDS_MAX ((64 << 10) - 1)
110#define VAS_WCREDS_MAX ((64 << 10) - 1) 112#define VAS_TX_WCREDS_MAX ((4 << 10) - 1)
111#define VAS_WCREDS_DEFAULT (1 << 10) 113#define VAS_WCREDS_DEFAULT (1 << 10)
112 114
113/* 115/*
@@ -259,6 +261,16 @@
259#define VAS_NX_UTIL_ADDER PPC_BITMASK(32, 63) 261#define VAS_NX_UTIL_ADDER PPC_BITMASK(32, 63)
260 262
261/* 263/*
264 * VREG(x):
265 * Expand a register's short name (eg: LPID) into two parameters:
266 * - the register's short name in string form ("LPID"), and
267 * - the name of the macro (eg: VAS_LPID_OFFSET), defining the
268 * register's offset in the window context
269 */
270#define VREG_SFX(n, s) __stringify(n), VAS_##n##s
271#define VREG(r) VREG_SFX(r, _OFFSET)
272
273/*
262 * Local Notify Scope Control Register. (Receive windows only). 274 * Local Notify Scope Control Register. (Receive windows only).
263 */ 275 */
264enum vas_notify_scope { 276enum vas_notify_scope {
@@ -307,6 +319,9 @@ struct vas_instance {
307 struct mutex mutex; 319 struct mutex mutex;
308 struct vas_window *rxwin[VAS_COP_TYPE_MAX]; 320 struct vas_window *rxwin[VAS_COP_TYPE_MAX];
309 struct vas_window *windows[VAS_WINDOWS_PER_CHIP]; 321 struct vas_window *windows[VAS_WINDOWS_PER_CHIP];
322
323 char *dbgname;
324 struct dentry *dbgdir;
310}; 325};
311 326
312/* 327/*
@@ -322,6 +337,10 @@ struct vas_window {
322 void *hvwc_map; /* HV window context */ 337 void *hvwc_map; /* HV window context */
323 void *uwc_map; /* OS/User window context */ 338 void *uwc_map; /* OS/User window context */
324 pid_t pid; /* Linux process id of owner */ 339 pid_t pid; /* Linux process id of owner */
340 int wcreds_max; /* Window credits */
341
342 char *dbgname;
343 struct dentry *dbgdir;
325 344
326 /* Fields applicable only to send windows */ 345 /* Fields applicable only to send windows */
327 void *paste_kaddr; 346 void *paste_kaddr;
@@ -383,45 +402,23 @@ struct vas_winctx {
383 enum vas_notify_after_count notify_after_count; 402 enum vas_notify_after_count notify_after_count;
384}; 403};
385 404
386extern struct vas_instance *find_vas_instance(int vasid); 405extern struct mutex vas_mutex;
387 406
388/* 407extern struct vas_instance *find_vas_instance(int vasid);
389 * VREG(x): 408extern void vas_init_dbgdir(void);
390 * Expand a register's short name (eg: LPID) into two parameters: 409extern void vas_instance_init_dbgdir(struct vas_instance *vinst);
391 * - the register's short name in string form ("LPID"), and 410extern void vas_window_init_dbgdir(struct vas_window *win);
392 * - the name of the macro (eg: VAS_LPID_OFFSET), defining the 411extern void vas_window_free_dbgdir(struct vas_window *win);
393 * register's offset in the window context
394 */
395#define VREG_SFX(n, s) __stringify(n), VAS_##n##s
396#define VREG(r) VREG_SFX(r, _OFFSET)
397
398#ifdef vas_debug
399static inline void dump_rx_win_attr(struct vas_rx_win_attr *attr)
400{
401 pr_err("fault %d, notify %d, intr %d early %d\n",
402 attr->fault_win, attr->notify_disable,
403 attr->intr_disable, attr->notify_early);
404
405 pr_err("rx_fifo_size %d, max value %d\n",
406 attr->rx_fifo_size, VAS_RX_FIFO_SIZE_MAX);
407}
408 412
409static inline void vas_log_write(struct vas_window *win, char *name, 413static inline void vas_log_write(struct vas_window *win, char *name,
410 void *regptr, u64 val) 414 void *regptr, u64 val)
411{ 415{
412 if (val) 416 if (val)
413 pr_err("%swin #%d: %s reg %p, val 0x%016llx\n", 417 pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n",
414 win->tx_win ? "Tx" : "Rx", win->winid, name, 418 win->tx_win ? "Tx" : "Rx", win->winid, name,
415 regptr, val); 419 regptr, val);
416} 420}
417 421
418#else /* vas_debug */
419
420#define vas_log_write(win, name, reg, val)
421#define dump_rx_win_attr(attr)
422
423#endif /* vas_debug */
424
425static inline void write_uwc_reg(struct vas_window *win, char *name, 422static inline void write_uwc_reg(struct vas_window *win, char *name,
426 s32 reg, u64 val) 423 s32 reg, u64 val)
427{ 424{
@@ -450,18 +447,32 @@ static inline u64 read_hvwc_reg(struct vas_window *win,
450 return in_be64(win->hvwc_map+reg); 447 return in_be64(win->hvwc_map+reg);
451} 448}
452 449
453#ifdef vas_debug 450/*
454 451 * Encode/decode the Partition Send Window ID (PSWID) for a window in
455static void print_fifo_msg_count(struct vas_window *txwin) 452 * a way that we can uniquely identify any window in the system. i.e.
453 * we should be able to locate the 'struct vas_window' given the PSWID.
454 *
455 * Bits Usage
456 * 0:7 VAS id (8 bits)
457 * 8:15 Unused, 0 (3 bits)
458 * 16:31 Window id (16 bits)
459 */
460static inline u32 encode_pswid(int vasid, int winid)
456{ 461{
457 uint64_t read_hvwc_reg(struct vas_window *w, char *n, uint64_t o); 462 u32 pswid = 0;
458 pr_devel("Winid %d, Msg count %llu\n", txwin->winid,
459 (uint64_t)read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
460}
461#else /* vas_debug */
462 463
463#define print_fifo_msg_count(window) 464 pswid |= vasid << (31 - 7);
465 pswid |= winid;
464 466
465#endif /* vas_debug */ 467 return pswid;
468}
469
470static inline void decode_pswid(u32 pswid, int *vasid, int *winid)
471{
472 if (vasid)
473 *vasid = pswid >> (31 - 7) & 0xFF;
466 474
475 if (winid)
476 *winid = pswid & 0xFFFF;
477}
467#endif /* _VAS_H */ 478#endif /* _VAS_H */
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index fadb95efbb9e..a7d14aa7bb7c 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -363,6 +363,7 @@ static int dlpar_online_cpu(struct device_node *dn)
363 BUG_ON(get_cpu_current_state(cpu) 363 BUG_ON(get_cpu_current_state(cpu)
364 != CPU_STATE_OFFLINE); 364 != CPU_STATE_OFFLINE);
365 cpu_maps_update_done(); 365 cpu_maps_update_done();
366 timed_topology_update(1);
366 rc = device_online(get_cpu_device(cpu)); 367 rc = device_online(get_cpu_device(cpu));
367 if (rc) 368 if (rc)
368 goto out; 369 goto out;
@@ -533,6 +534,7 @@ static int dlpar_offline_cpu(struct device_node *dn)
533 set_preferred_offline_state(cpu, 534 set_preferred_offline_state(cpu,
534 CPU_STATE_OFFLINE); 535 CPU_STATE_OFFLINE);
535 cpu_maps_update_done(); 536 cpu_maps_update_done();
537 timed_topology_update(1);
536 rc = device_offline(get_cpu_device(cpu)); 538 rc = device_offline(get_cpu_device(cpu));
537 if (rc) 539 if (rc)
538 goto out; 540 goto out;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 7c181467d0ad..69921f72e2da 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -55,23 +55,23 @@
55 55
56static struct iommu_table_group *iommu_pseries_alloc_group(int node) 56static struct iommu_table_group *iommu_pseries_alloc_group(int node)
57{ 57{
58 struct iommu_table_group *table_group = NULL; 58 struct iommu_table_group *table_group;
59 struct iommu_table *tbl = NULL; 59 struct iommu_table *tbl;
60 struct iommu_table_group_link *tgl = NULL; 60 struct iommu_table_group_link *tgl;
61 61
62 table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL, 62 table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
63 node); 63 node);
64 if (!table_group) 64 if (!table_group)
65 goto fail_exit; 65 return NULL;
66 66
67 tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node); 67 tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
68 if (!tbl) 68 if (!tbl)
69 goto fail_exit; 69 goto free_group;
70 70
71 tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, 71 tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
72 node); 72 node);
73 if (!tgl) 73 if (!tgl)
74 goto fail_exit; 74 goto free_table;
75 75
76 INIT_LIST_HEAD_RCU(&tbl->it_group_list); 76 INIT_LIST_HEAD_RCU(&tbl->it_group_list);
77 kref_init(&tbl->it_kref); 77 kref_init(&tbl->it_kref);
@@ -82,11 +82,10 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
82 82
83 return table_group; 83 return table_group;
84 84
85fail_exit: 85free_table:
86 kfree(tgl);
87 kfree(table_group);
88 kfree(tbl); 86 kfree(tbl);
89 87free_group:
88 kfree(table_group);
90 return NULL; 89 return NULL;
91} 90}
92 91
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 495ba4e7336d..0ee4a469a4ae 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -93,7 +93,7 @@ void vpa_init(int cpu)
93 return; 93 return;
94 } 94 }
95 95
96#ifdef CONFIG_PPC_STD_MMU_64 96#ifdef CONFIG_PPC_BOOK3S_64
97 /* 97 /*
98 * PAPR says this feature is SLB-Buffer but firmware never 98 * PAPR says this feature is SLB-Buffer but firmware never
99 * reports that. All SPLPAR support SLB shadow buffer. 99 * reports that. All SPLPAR support SLB shadow buffer.
@@ -106,7 +106,7 @@ void vpa_init(int cpu)
106 "cpu %d (hw %d) of area %lx failed with %ld\n", 106 "cpu %d (hw %d) of area %lx failed with %ld\n",
107 cpu, hwcpu, addr, ret); 107 cpu, hwcpu, addr, ret);
108 } 108 }
109#endif /* CONFIG_PPC_STD_MMU_64 */ 109#endif /* CONFIG_PPC_BOOK3S_64 */
110 110
111 /* 111 /*
112 * Register dispatch trace log, if one has been allocated. 112 * Register dispatch trace log, if one has been allocated.
@@ -129,7 +129,7 @@ void vpa_init(int cpu)
129 } 129 }
130} 130}
131 131
132#ifdef CONFIG_PPC_STD_MMU_64 132#ifdef CONFIG_PPC_BOOK3S_64
133 133
134static long pSeries_lpar_hpte_insert(unsigned long hpte_group, 134static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
135 unsigned long vpn, unsigned long pa, 135 unsigned long vpn, unsigned long pa,
@@ -824,7 +824,7 @@ void arch_free_page(struct page *page, int order)
824EXPORT_SYMBOL(arch_free_page); 824EXPORT_SYMBOL(arch_free_page);
825 825
826#endif /* CONFIG_PPC_SMLPAR */ 826#endif /* CONFIG_PPC_SMLPAR */
827#endif /* CONFIG_PPC_STD_MMU_64 */ 827#endif /* CONFIG_PPC_BOOK3S_64 */
828 828
829#ifdef CONFIG_TRACEPOINTS 829#ifdef CONFIG_TRACEPOINTS
830#ifdef HAVE_JUMP_LABEL 830#ifdef HAVE_JUMP_LABEL
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 779fc2a1c8f7..b2706c483067 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -485,7 +485,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
485 seq_printf(m, "shared_processor_mode=%d\n", 485 seq_printf(m, "shared_processor_mode=%d\n",
486 lppaca_shared_proc(get_lppaca())); 486 lppaca_shared_proc(get_lppaca()));
487 487
488#ifdef CONFIG_PPC_STD_MMU_64 488#ifdef CONFIG_PPC_BOOK3S_64
489 seq_printf(m, "slb_size=%d\n", mmu_slb_size); 489 seq_printf(m, "slb_size=%d\n", mmu_slb_size);
490#endif 490#endif
491 parse_em_data(m); 491 parse_em_data(m);
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 12277bc9fd9e..d86938260a86 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1592,6 +1592,8 @@ ATTRIBUTE_GROUPS(vio_dev);
1592void vio_unregister_device(struct vio_dev *viodev) 1592void vio_unregister_device(struct vio_dev *viodev)
1593{ 1593{
1594 device_unregister(&viodev->dev); 1594 device_unregister(&viodev->dev);
1595 if (viodev->family == VDEVICE)
1596 irq_dispose_mapping(viodev->irq);
1595} 1597}
1596EXPORT_SYMBOL(vio_unregister_device); 1598EXPORT_SYMBOL(vio_unregister_device);
1597 1599