aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/boot/gunzip_util.c4
-rw-r--r--arch/powerpc/include/asm/cputable.h6
-rw-r--r--arch/powerpc/include/asm/machdep.h6
-rw-r--r--arch/powerpc/include/asm/opal.h11
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h2
-rw-r--r--arch/powerpc/include/asm/pte-hash64-64k.h30
-rw-r--r--arch/powerpc/include/asm/reg.h3
-rw-r--r--arch/powerpc/include/asm/spinlock.h1
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S110
-rw-r--r--arch/powerpc/kernel/head_44x.S4
-rw-r--r--arch/powerpc/kernel/iommu.c38
-rw-r--r--arch/powerpc/kernel/smp.c11
-rw-r--r--arch/powerpc/lib/locks.c4
-rw-r--r--arch/powerpc/mm/hash_native_64.c40
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c88
-rw-r--r--arch/powerpc/mm/numa.c13
-rw-r--r--arch/powerpc/mm/pgtable_64.c44
-rw-r--r--arch/powerpc/mm/tlb_hash64.c6
-rw-r--r--arch/powerpc/mm/tlb_nohash.c111
-rw-r--r--arch/powerpc/perf/hv-24x7.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S2
-rw-r--r--arch/powerpc/platforms/powernv/opal.c23
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c2
-rw-r--r--arch/powerpc/platforms/pseries/hvcserver.c4
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c20
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c20
-rw-r--r--arch/powerpc/xmon/xmon.c3
28 files changed, 387 insertions, 223 deletions
diff --git a/arch/powerpc/boot/gunzip_util.c b/arch/powerpc/boot/gunzip_util.c
index ef2aed0f63ca..9dc52501de83 100644
--- a/arch/powerpc/boot/gunzip_util.c
+++ b/arch/powerpc/boot/gunzip_util.c
@@ -112,10 +112,10 @@ int gunzip_partial(struct gunzip_state *state, void *dst, int dstlen)
112 r = zlib_inflate(&state->s, Z_FULL_FLUSH); 112 r = zlib_inflate(&state->s, Z_FULL_FLUSH);
113 if (r != Z_OK && r != Z_STREAM_END) 113 if (r != Z_OK && r != Z_STREAM_END)
114 fatal("inflate returned %d msg: %s\n\r", r, state->s.msg); 114 fatal("inflate returned %d msg: %s\n\r", r, state->s.msg);
115 len = state->s.next_out - (unsigned char *)dst; 115 len = state->s.next_out - (Byte *)dst;
116 } else { 116 } else {
117 /* uncompressed image */ 117 /* uncompressed image */
118 len = min(state->s.avail_in, (unsigned)dstlen); 118 len = min(state->s.avail_in, (uLong)dstlen);
119 memcpy(dst, state->s.next_in, len); 119 memcpy(dst, state->s.next_in, len);
120 state->s.next_in += len; 120 state->s.next_in += len;
121 state->s.avail_in -= len; 121 state->s.avail_in -= len;
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 642e436d4595..daa5af91163c 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -459,7 +459,8 @@ extern const char *powerpc_base_platform;
459#define CPU_FTRS_POSSIBLE \ 459#define CPU_FTRS_POSSIBLE \
460 (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ 460 (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
461 CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ 461 CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
462 CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | CPU_FTR_VSX) 462 CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
463 CPU_FTRS_PA6T | CPU_FTR_VSX)
463#endif 464#endif
464#else 465#else
465enum { 466enum {
@@ -509,7 +510,8 @@ enum {
509#define CPU_FTRS_ALWAYS \ 510#define CPU_FTRS_ALWAYS \
510 (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ 511 (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
511 CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \ 512 CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
512 CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE) 513 CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
514 CPU_FTRS_POWER8_DD1 & CPU_FTRS_POSSIBLE)
513#endif 515#endif
514#else 516#else
515enum { 517enum {
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 44e90516519b..b125ceab149c 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -57,10 +57,10 @@ struct machdep_calls {
57 void (*hpte_removebolted)(unsigned long ea, 57 void (*hpte_removebolted)(unsigned long ea,
58 int psize, int ssize); 58 int psize, int ssize);
59 void (*flush_hash_range)(unsigned long number, int local); 59 void (*flush_hash_range)(unsigned long number, int local);
60 void (*hugepage_invalidate)(struct mm_struct *mm, 60 void (*hugepage_invalidate)(unsigned long vsid,
61 unsigned long addr,
61 unsigned char *hpte_slot_array, 62 unsigned char *hpte_slot_array,
62 unsigned long addr, int psize); 63 int psize, int ssize);
63
64 /* special for kexec, to be called in real mode, linear mapping is 64 /* special for kexec, to be called in real mode, linear mapping is
65 * destroyed as well */ 65 * destroyed as well */
66 void (*hpte_clear_all)(void); 66 void (*hpte_clear_all)(void);
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index b2f8ce1fd0d7..86055e598269 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -149,6 +149,8 @@ struct opal_sg_list {
149#define OPAL_DUMP_INFO2 94 149#define OPAL_DUMP_INFO2 94
150#define OPAL_PCI_EEH_FREEZE_SET 97 150#define OPAL_PCI_EEH_FREEZE_SET 97
151#define OPAL_HANDLE_HMI 98 151#define OPAL_HANDLE_HMI 98
152#define OPAL_REGISTER_DUMP_REGION 101
153#define OPAL_UNREGISTER_DUMP_REGION 102
152 154
153#ifndef __ASSEMBLY__ 155#ifndef __ASSEMBLY__
154 156
@@ -920,6 +922,8 @@ int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
920 uint64_t length); 922 uint64_t length);
921int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); 923int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
922int64_t opal_handle_hmi(void); 924int64_t opal_handle_hmi(void);
925int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
926int64_t opal_unregister_dump_region(uint32_t id);
923 927
924/* Internal functions */ 928/* Internal functions */
925extern int early_init_dt_scan_opal(unsigned long node, const char *uname, 929extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
@@ -974,6 +978,13 @@ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
974 unsigned long vmalloc_size); 978 unsigned long vmalloc_size);
975void opal_free_sg_list(struct opal_sg_list *sg); 979void opal_free_sg_list(struct opal_sg_list *sg);
976 980
981/*
982 * Dump region ID range usable by the OS
983 */
984#define OPAL_DUMP_REGION_HOST_START 0x80
985#define OPAL_DUMP_REGION_LOG_BUF 0x80
986#define OPAL_DUMP_REGION_HOST_END 0xFF
987
977#endif /* __ASSEMBLY__ */ 988#endif /* __ASSEMBLY__ */
978 989
979#endif /* __OPAL_H */ 990#endif /* __OPAL_H */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index eb9261024f51..7b3d54fae46f 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -413,7 +413,7 @@ static inline char *get_hpte_slot_array(pmd_t *pmdp)
413} 413}
414 414
415extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, 415extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
416 pmd_t *pmdp); 416 pmd_t *pmdp, unsigned long old_pmd);
417#ifdef CONFIG_TRANSPARENT_HUGEPAGE 417#ifdef CONFIG_TRANSPARENT_HUGEPAGE
418extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); 418extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
419extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); 419extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index b6d2d42f84b5..4f4ec2ab45c9 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -46,11 +46,31 @@
46 * in order to deal with 64K made of 4K HW pages. Thus we override the 46 * in order to deal with 64K made of 4K HW pages. Thus we override the
47 * generic accessors and iterators here 47 * generic accessors and iterators here
48 */ 48 */
49#define __real_pte(e,p) ((real_pte_t) { \ 49#define __real_pte __real_pte
50 (e), (pte_val(e) & _PAGE_COMBO) ? \ 50static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
51 (pte_val(*((p) + PTRS_PER_PTE))) : 0 }) 51{
52#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \ 52 real_pte_t rpte;
53 (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf)) 53
54 rpte.pte = pte;
55 rpte.hidx = 0;
56 if (pte_val(pte) & _PAGE_COMBO) {
57 /*
58 * Make sure we order the hidx load against the _PAGE_COMBO
59 * check. The store side ordering is done in __hash_page_4K
60 */
61 smp_rmb();
62 rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE));
63 }
64 return rpte;
65}
66
67static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
68{
69 if ((pte_val(rpte.pte) & _PAGE_COMBO))
70 return (rpte.hidx >> (index<<2)) & 0xf;
71 return (pte_val(rpte.pte) >> 12) & 0xf;
72}
73
54#define __rpte_to_pte(r) ((r).pte) 74#define __rpte_to_pte(r) ((r).pte)
55#define __rpte_sub_valid(rpte, index) \ 75#define __rpte_sub_valid(rpte, index) \
56 (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index))) 76 (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1c987bf794ef..0c0505956a29 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -213,9 +213,8 @@
213#define SPRN_ACOP 0x1F /* Available Coprocessor Register */ 213#define SPRN_ACOP 0x1F /* Available Coprocessor Register */
214#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ 214#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
215#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ 215#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */
216#define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */
217#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ 216#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
218#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ 217#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
219#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ 218#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
220#define SPRN_CTRLF 0x088 219#define SPRN_CTRLF 0x088
221#define SPRN_CTRLT 0x098 220#define SPRN_CTRLT 0x098
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 35aa339410bd..4dbe072eecbe 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -61,6 +61,7 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
61 61
62static inline int arch_spin_is_locked(arch_spinlock_t *lock) 62static inline int arch_spin_is_locked(arch_spinlock_t *lock)
63{ 63{
64 smp_mb();
64 return !arch_spin_value_unlocked(*lock); 65 return !arch_spin_value_unlocked(*lock);
65} 66}
66 67
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6144d5a6bfe7..050f79a4a168 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -592,61 +592,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
592 MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception) 592 MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception)
593 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62) 593 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62)
594 594
595 .globl hmi_exception_early
596hmi_exception_early:
597 EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
598 mr r10,r1 /* Save r1 */
599 ld r1,PACAEMERGSP(r13) /* Use emergency stack */
600 subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
601 std r9,_CCR(r1) /* save CR in stackframe */
602 mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
603 std r11,_NIP(r1) /* save HSRR0 in stackframe */
604 mfspr r12,SPRN_HSRR1 /* Save SRR1 */
605 std r12,_MSR(r1) /* save SRR1 in stackframe */
606 std r10,0(r1) /* make stack chain pointer */
607 std r0,GPR0(r1) /* save r0 in stackframe */
608 std r10,GPR1(r1) /* save r1 in stackframe */
609 EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
610 EXCEPTION_PROLOG_COMMON_3(0xe60)
611 addi r3,r1,STACK_FRAME_OVERHEAD
612 bl hmi_exception_realmode
613 /* Windup the stack. */
614 /* Clear MSR_RI before setting SRR0 and SRR1. */
615 li r0,MSR_RI
616 mfmsr r9 /* get MSR value */
617 andc r9,r9,r0
618 mtmsrd r9,1 /* Clear MSR_RI */
619 /* Move original HSRR0 and HSRR1 into the respective regs */
620 ld r9,_MSR(r1)
621 mtspr SPRN_HSRR1,r9
622 ld r3,_NIP(r1)
623 mtspr SPRN_HSRR0,r3
624 ld r9,_CTR(r1)
625 mtctr r9
626 ld r9,_XER(r1)
627 mtxer r9
628 ld r9,_LINK(r1)
629 mtlr r9
630 REST_GPR(0, r1)
631 REST_8GPRS(2, r1)
632 REST_GPR(10, r1)
633 ld r11,_CCR(r1)
634 mtcr r11
635 REST_GPR(11, r1)
636 REST_2GPRS(12, r1)
637 /* restore original r1. */
638 ld r1,GPR1(r1)
639
640 /*
641 * Go to virtual mode and pull the HMI event information from
642 * firmware.
643 */
644 .globl hmi_exception_after_realmode
645hmi_exception_after_realmode:
646 SET_SCRATCH0(r13)
647 EXCEPTION_PROLOG_0(PACA_EXGEN)
648 b hmi_exception_hv
649
650 MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell) 595 MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell)
651 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82) 596 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82)
652 597
@@ -1306,6 +1251,61 @@ fwnmi_data_area:
1306 . = 0x8000 1251 . = 0x8000
1307#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ 1252#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
1308 1253
1254 .globl hmi_exception_early
1255hmi_exception_early:
1256 EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
1257 mr r10,r1 /* Save r1 */
1258 ld r1,PACAEMERGSP(r13) /* Use emergency stack */
1259 subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
1260 std r9,_CCR(r1) /* save CR in stackframe */
1261 mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
1262 std r11,_NIP(r1) /* save HSRR0 in stackframe */
1263 mfspr r12,SPRN_HSRR1 /* Save SRR1 */
1264 std r12,_MSR(r1) /* save SRR1 in stackframe */
1265 std r10,0(r1) /* make stack chain pointer */
1266 std r0,GPR0(r1) /* save r0 in stackframe */
1267 std r10,GPR1(r1) /* save r1 in stackframe */
1268 EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
1269 EXCEPTION_PROLOG_COMMON_3(0xe60)
1270 addi r3,r1,STACK_FRAME_OVERHEAD
1271 bl hmi_exception_realmode
1272 /* Windup the stack. */
1273 /* Clear MSR_RI before setting SRR0 and SRR1. */
1274 li r0,MSR_RI
1275 mfmsr r9 /* get MSR value */
1276 andc r9,r9,r0
1277 mtmsrd r9,1 /* Clear MSR_RI */
1278 /* Move original HSRR0 and HSRR1 into the respective regs */
1279 ld r9,_MSR(r1)
1280 mtspr SPRN_HSRR1,r9
1281 ld r3,_NIP(r1)
1282 mtspr SPRN_HSRR0,r3
1283 ld r9,_CTR(r1)
1284 mtctr r9
1285 ld r9,_XER(r1)
1286 mtxer r9
1287 ld r9,_LINK(r1)
1288 mtlr r9
1289 REST_GPR(0, r1)
1290 REST_8GPRS(2, r1)
1291 REST_GPR(10, r1)
1292 ld r11,_CCR(r1)
1293 mtcr r11
1294 REST_GPR(11, r1)
1295 REST_2GPRS(12, r1)
1296 /* restore original r1. */
1297 ld r1,GPR1(r1)
1298
1299 /*
1300 * Go to virtual mode and pull the HMI event information from
1301 * firmware.
1302 */
1303 .globl hmi_exception_after_realmode
1304hmi_exception_after_realmode:
1305 SET_SCRATCH0(r13)
1306 EXCEPTION_PROLOG_0(PACA_EXGEN)
1307 b hmi_exception_hv
1308
1309#ifdef CONFIG_PPC_POWERNV 1309#ifdef CONFIG_PPC_POWERNV
1310_GLOBAL(opal_mc_secondary_handler) 1310_GLOBAL(opal_mc_secondary_handler)
1311 HMT_MEDIUM_PPR_DISCARD 1311 HMT_MEDIUM_PPR_DISCARD
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index c334f53453f7..b5061abbd2e0 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -1210,10 +1210,12 @@ clear_utlb_entry:
1210 1210
1211 /* We configure icbi to invalidate 128 bytes at a time since the 1211 /* We configure icbi to invalidate 128 bytes at a time since the
1212 * current 32-bit kernel code isn't too happy with icache != dcache 1212 * current 32-bit kernel code isn't too happy with icache != dcache
1213 * block size 1213 * block size. We also disable the BTAC as this can cause errors
1214 * in some circumstances (see IBM Erratum 47).
1214 */ 1215 */
1215 mfspr r3,SPRN_CCR0 1216 mfspr r3,SPRN_CCR0
1216 oris r3,r3,0x0020 1217 oris r3,r3,0x0020
1218 ori r3,r3,0x0040
1217 mtspr SPRN_CCR0,r3 1219 mtspr SPRN_CCR0,r3
1218 isync 1220 isync
1219 1221
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index f84f799babb1..a10642a0d861 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1120,37 +1120,41 @@ EXPORT_SYMBOL_GPL(iommu_release_ownership);
1120int iommu_add_device(struct device *dev) 1120int iommu_add_device(struct device *dev)
1121{ 1121{
1122 struct iommu_table *tbl; 1122 struct iommu_table *tbl;
1123 int ret = 0;
1124 1123
1125 if (WARN_ON(dev->iommu_group)) { 1124 /*
1126 pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n", 1125 * The sysfs entries should be populated before
1127 dev_name(dev), 1126 * binding IOMMU group. If sysfs entries isn't
1128 iommu_group_id(dev->iommu_group)); 1127 * ready, we simply bail.
1128 */
1129 if (!device_is_registered(dev))
1130 return -ENOENT;
1131
1132 if (dev->iommu_group) {
1133 pr_debug("%s: Skipping device %s with iommu group %d\n",
1134 __func__, dev_name(dev),
1135 iommu_group_id(dev->iommu_group));
1129 return -EBUSY; 1136 return -EBUSY;
1130 } 1137 }
1131 1138
1132 tbl = get_iommu_table_base(dev); 1139 tbl = get_iommu_table_base(dev);
1133 if (!tbl || !tbl->it_group) { 1140 if (!tbl || !tbl->it_group) {
1134 pr_debug("iommu_tce: skipping device %s with no tbl\n", 1141 pr_debug("%s: Skipping device %s with no tbl\n",
1135 dev_name(dev)); 1142 __func__, dev_name(dev));
1136 return 0; 1143 return 0;
1137 } 1144 }
1138 1145
1139 pr_debug("iommu_tce: adding %s to iommu group %d\n", 1146 pr_debug("%s: Adding %s to iommu group %d\n",
1140 dev_name(dev), iommu_group_id(tbl->it_group)); 1147 __func__, dev_name(dev),
1148 iommu_group_id(tbl->it_group));
1141 1149
1142 if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) { 1150 if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
1143 pr_err("iommu_tce: unsupported iommu page size."); 1151 pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
1144 pr_err("%s has not been added\n", dev_name(dev)); 1152 __func__, IOMMU_PAGE_SIZE(tbl),
1153 PAGE_SIZE, dev_name(dev));
1145 return -EINVAL; 1154 return -EINVAL;
1146 } 1155 }
1147 1156
1148 ret = iommu_group_add_device(tbl->it_group, dev); 1157 return iommu_group_add_device(tbl->it_group, dev);
1149 if (ret < 0)
1150 pr_err("iommu_tce: %s has not been added, ret=%d\n",
1151 dev_name(dev), ret);
1152
1153 return ret;
1154} 1158}
1155EXPORT_SYMBOL_GPL(iommu_add_device); 1159EXPORT_SYMBOL_GPL(iommu_add_device);
1156 1160
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 1007fb802e6b..a0738af4aba6 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -376,6 +376,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
376 GFP_KERNEL, cpu_to_node(cpu)); 376 GFP_KERNEL, cpu_to_node(cpu));
377 zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), 377 zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
378 GFP_KERNEL, cpu_to_node(cpu)); 378 GFP_KERNEL, cpu_to_node(cpu));
379 /*
380 * numa_node_id() works after this.
381 */
382 set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
383 set_cpu_numa_mem(cpu, local_memory_node(numa_cpu_lookup_table[cpu]));
379 } 384 }
380 385
381 cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); 386 cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
@@ -723,12 +728,6 @@ void start_secondary(void *unused)
723 } 728 }
724 traverse_core_siblings(cpu, true); 729 traverse_core_siblings(cpu, true);
725 730
726 /*
727 * numa_node_id() works after this.
728 */
729 set_numa_node(numa_cpu_lookup_table[cpu]);
730 set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
731
732 smp_wmb(); 731 smp_wmb();
733 notify_cpu_starting(cpu); 732 notify_cpu_starting(cpu);
734 set_cpu_online(cpu, true); 733 set_cpu_online(cpu, true);
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 0c9c8d7d0734..170a0346f756 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -70,12 +70,16 @@ void __rw_yield(arch_rwlock_t *rw)
70 70
71void arch_spin_unlock_wait(arch_spinlock_t *lock) 71void arch_spin_unlock_wait(arch_spinlock_t *lock)
72{ 72{
73 smp_mb();
74
73 while (lock->slock) { 75 while (lock->slock) {
74 HMT_low(); 76 HMT_low();
75 if (SHARED_PROCESSOR) 77 if (SHARED_PROCESSOR)
76 __spin_yield(lock); 78 __spin_yield(lock);
77 } 79 }
78 HMT_medium(); 80 HMT_medium();
81
82 smp_mb();
79} 83}
80 84
81EXPORT_SYMBOL(arch_spin_unlock_wait); 85EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index cf1d325eae8b..afc0a8295f84 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -412,18 +412,18 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
412 local_irq_restore(flags); 412 local_irq_restore(flags);
413} 413}
414 414
415static void native_hugepage_invalidate(struct mm_struct *mm, 415static void native_hugepage_invalidate(unsigned long vsid,
416 unsigned long addr,
416 unsigned char *hpte_slot_array, 417 unsigned char *hpte_slot_array,
417 unsigned long addr, int psize) 418 int psize, int ssize)
418{ 419{
419 int ssize = 0, i; 420 int i;
420 int lock_tlbie;
421 struct hash_pte *hptep; 421 struct hash_pte *hptep;
422 int actual_psize = MMU_PAGE_16M; 422 int actual_psize = MMU_PAGE_16M;
423 unsigned int max_hpte_count, valid; 423 unsigned int max_hpte_count, valid;
424 unsigned long flags, s_addr = addr; 424 unsigned long flags, s_addr = addr;
425 unsigned long hpte_v, want_v, shift; 425 unsigned long hpte_v, want_v, shift;
426 unsigned long hidx, vpn = 0, vsid, hash, slot; 426 unsigned long hidx, vpn = 0, hash, slot;
427 427
428 shift = mmu_psize_defs[psize].shift; 428 shift = mmu_psize_defs[psize].shift;
429 max_hpte_count = 1U << (PMD_SHIFT - shift); 429 max_hpte_count = 1U << (PMD_SHIFT - shift);
@@ -437,15 +437,6 @@ static void native_hugepage_invalidate(struct mm_struct *mm,
437 437
438 /* get the vpn */ 438 /* get the vpn */
439 addr = s_addr + (i * (1ul << shift)); 439 addr = s_addr + (i * (1ul << shift));
440 if (!is_kernel_addr(addr)) {
441 ssize = user_segment_size(addr);
442 vsid = get_vsid(mm->context.id, addr, ssize);
443 WARN_ON(vsid == 0);
444 } else {
445 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
446 ssize = mmu_kernel_ssize;
447 }
448
449 vpn = hpt_vpn(addr, vsid, ssize); 440 vpn = hpt_vpn(addr, vsid, ssize);
450 hash = hpt_hash(vpn, shift, ssize); 441 hash = hpt_hash(vpn, shift, ssize);
451 if (hidx & _PTEIDX_SECONDARY) 442 if (hidx & _PTEIDX_SECONDARY)
@@ -465,22 +456,13 @@ static void native_hugepage_invalidate(struct mm_struct *mm,
465 else 456 else
466 /* Invalidate the hpte. NOTE: this also unlocks it */ 457 /* Invalidate the hpte. NOTE: this also unlocks it */
467 hptep->v = 0; 458 hptep->v = 0;
459 /*
460 * We need to do tlb invalidate for all the address, tlbie
461 * instruction compares entry_VA in tlb with the VA specified
462 * here
463 */
464 tlbie(vpn, psize, actual_psize, ssize, 0);
468 } 465 }
469 /*
470 * Since this is a hugepage, we just need a single tlbie.
471 * use the last vpn.
472 */
473 lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
474 if (lock_tlbie)
475 raw_spin_lock(&native_tlbie_lock);
476
477 asm volatile("ptesync":::"memory");
478 __tlbie(vpn, psize, actual_psize, ssize);
479 asm volatile("eieio; tlbsync; ptesync":::"memory");
480
481 if (lock_tlbie)
482 raw_spin_unlock(&native_tlbie_lock);
483
484 local_irq_restore(flags); 466 local_irq_restore(flags);
485} 467}
486 468
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 826893fcb3a7..5f5e6328c21c 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -18,6 +18,57 @@
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <asm/machdep.h> 19#include <asm/machdep.h>
20 20
21static void invalidate_old_hpte(unsigned long vsid, unsigned long addr,
22 pmd_t *pmdp, unsigned int psize, int ssize)
23{
24 int i, max_hpte_count, valid;
25 unsigned long s_addr;
26 unsigned char *hpte_slot_array;
27 unsigned long hidx, shift, vpn, hash, slot;
28
29 s_addr = addr & HPAGE_PMD_MASK;
30 hpte_slot_array = get_hpte_slot_array(pmdp);
31 /*
32 * IF we try to do a HUGE PTE update after a withdraw is done.
33 * we will find the below NULL. This happens when we do
34 * split_huge_page_pmd
35 */
36 if (!hpte_slot_array)
37 return;
38
39 if (ppc_md.hugepage_invalidate)
40 return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
41 psize, ssize);
42 /*
43 * No bluk hpte removal support, invalidate each entry
44 */
45 shift = mmu_psize_defs[psize].shift;
46 max_hpte_count = HPAGE_PMD_SIZE >> shift;
47 for (i = 0; i < max_hpte_count; i++) {
48 /*
49 * 8 bits per each hpte entries
50 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
51 */
52 valid = hpte_valid(hpte_slot_array, i);
53 if (!valid)
54 continue;
55 hidx = hpte_hash_index(hpte_slot_array, i);
56
57 /* get the vpn */
58 addr = s_addr + (i * (1ul << shift));
59 vpn = hpt_vpn(addr, vsid, ssize);
60 hash = hpt_hash(vpn, shift, ssize);
61 if (hidx & _PTEIDX_SECONDARY)
62 hash = ~hash;
63
64 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
65 slot += hidx & _PTEIDX_GROUP_IX;
66 ppc_md.hpte_invalidate(slot, vpn, psize,
67 MMU_PAGE_16M, ssize, 0);
68 }
69}
70
71
21int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, 72int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
22 pmd_t *pmdp, unsigned long trap, int local, int ssize, 73 pmd_t *pmdp, unsigned long trap, int local, int ssize,
23 unsigned int psize) 74 unsigned int psize)
@@ -33,7 +84,9 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
33 * atomically mark the linux large page PMD busy and dirty 84 * atomically mark the linux large page PMD busy and dirty
34 */ 85 */
35 do { 86 do {
36 old_pmd = pmd_val(*pmdp); 87 pmd_t pmd = ACCESS_ONCE(*pmdp);
88
89 old_pmd = pmd_val(pmd);
37 /* If PMD busy, retry the access */ 90 /* If PMD busy, retry the access */
38 if (unlikely(old_pmd & _PAGE_BUSY)) 91 if (unlikely(old_pmd & _PAGE_BUSY))
39 return 0; 92 return 0;
@@ -85,6 +138,15 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
85 vpn = hpt_vpn(ea, vsid, ssize); 138 vpn = hpt_vpn(ea, vsid, ssize);
86 hash = hpt_hash(vpn, shift, ssize); 139 hash = hpt_hash(vpn, shift, ssize);
87 hpte_slot_array = get_hpte_slot_array(pmdp); 140 hpte_slot_array = get_hpte_slot_array(pmdp);
141 if (psize == MMU_PAGE_4K) {
142 /*
143 * invalidate the old hpte entry if we have that mapped via 64K
144 * base page size. This is because demote_segment won't flush
145 * hash page table entries.
146 */
147 if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
148 invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize);
149 }
88 150
89 valid = hpte_valid(hpte_slot_array, index); 151 valid = hpte_valid(hpte_slot_array, index);
90 if (valid) { 152 if (valid) {
@@ -107,11 +169,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
107 * safely update this here. 169 * safely update this here.
108 */ 170 */
109 valid = 0; 171 valid = 0;
110 new_pmd &= ~_PAGE_HPTEFLAGS;
111 hpte_slot_array[index] = 0; 172 hpte_slot_array[index] = 0;
112 } else 173 }
113 /* clear the busy bits and set the hash pte bits */
114 new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
115 } 174 }
116 175
117 if (!valid) { 176 if (!valid) {
@@ -119,11 +178,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
119 178
120 /* insert new entry */ 179 /* insert new entry */
121 pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; 180 pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
122repeat: 181 new_pmd |= _PAGE_HASHPTE;
123 hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
124
125 /* clear the busy bits and set the hash pte bits */
126 new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
127 182
128 /* Add in WIMG bits */ 183 /* Add in WIMG bits */
129 rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | 184 rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
@@ -132,6 +187,8 @@ repeat:
132 * enable the memory coherence always 187 * enable the memory coherence always
133 */ 188 */
134 rflags |= HPTE_R_M; 189 rflags |= HPTE_R_M;
190repeat:
191 hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
135 192
136 /* Insert into the hash table, primary slot */ 193 /* Insert into the hash table, primary slot */
137 slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, 194 slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
@@ -172,8 +229,17 @@ repeat:
172 mark_hpte_slot_valid(hpte_slot_array, index, slot); 229 mark_hpte_slot_valid(hpte_slot_array, index, slot);
173 } 230 }
174 /* 231 /*
175 * No need to use ldarx/stdcx here 232 * Mark the pte with _PAGE_COMBO, if we are trying to hash it with
233 * base page size 4k.
234 */
235 if (psize == MMU_PAGE_4K)
236 new_pmd |= _PAGE_COMBO;
237 /*
238 * The hpte valid is stored in the pgtable whose address is in the
239 * second half of the PMD. Order this against clearing of the busy bit in
240 * huge pmd.
176 */ 241 */
242 smp_wmb();
177 *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); 243 *pmdp = __pmd(new_pmd & ~_PAGE_BUSY);
178 return 0; 244 return 0;
179} 245}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index d3e9a78eaed3..d7737a542fd7 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1049,7 +1049,7 @@ static void __init mark_reserved_regions_for_nid(int nid)
1049 1049
1050void __init do_init_bootmem(void) 1050void __init do_init_bootmem(void)
1051{ 1051{
1052 int nid; 1052 int nid, cpu;
1053 1053
1054 min_low_pfn = 0; 1054 min_low_pfn = 0;
1055 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 1055 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
@@ -1122,8 +1122,15 @@ void __init do_init_bootmem(void)
1122 1122
1123 reset_numa_cpu_lookup_table(); 1123 reset_numa_cpu_lookup_table();
1124 register_cpu_notifier(&ppc64_numa_nb); 1124 register_cpu_notifier(&ppc64_numa_nb);
1125 cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, 1125 /*
1126 (void *)(unsigned long)boot_cpuid); 1126 * We need the numa_cpu_lookup_table to be accurate for all CPUs,
1127 * even before we online them, so that we can use cpu_to_{node,mem}
1128 * early in boot, cf. smp_prepare_cpus().
1129 */
1130 for_each_possible_cpu(cpu) {
1131 cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
1132 (void *)(unsigned long)cpu);
1133 }
1127} 1134}
1128 1135
1129void __init paging_init(void) 1136void __init paging_init(void)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 3b3c4d34c7a0..c8d709ab489d 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -54,6 +54,9 @@
54 54
55#include "mmu_decl.h" 55#include "mmu_decl.h"
56 56
57#define CREATE_TRACE_POINTS
58#include <trace/events/thp.h>
59
57/* Some sanity checking */ 60/* Some sanity checking */
58#if TASK_SIZE_USER64 > PGTABLE_RANGE 61#if TASK_SIZE_USER64 > PGTABLE_RANGE
59#error TASK_SIZE_USER64 exceeds pagetable range 62#error TASK_SIZE_USER64 exceeds pagetable range
@@ -537,8 +540,9 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
537 old = pmd_val(*pmdp); 540 old = pmd_val(*pmdp);
538 *pmdp = __pmd((old & ~clr) | set); 541 *pmdp = __pmd((old & ~clr) | set);
539#endif 542#endif
543 trace_hugepage_update(addr, old, clr, set);
540 if (old & _PAGE_HASHPTE) 544 if (old & _PAGE_HASHPTE)
541 hpte_do_hugepage_flush(mm, addr, pmdp); 545 hpte_do_hugepage_flush(mm, addr, pmdp, old);
542 return old; 546 return old;
543} 547}
544 548
@@ -642,10 +646,11 @@ void pmdp_splitting_flush(struct vm_area_struct *vma,
642 * If we didn't had the splitting flag set, go and flush the 646 * If we didn't had the splitting flag set, go and flush the
643 * HPTE entries. 647 * HPTE entries.
644 */ 648 */
649 trace_hugepage_splitting(address, old);
645 if (!(old & _PAGE_SPLITTING)) { 650 if (!(old & _PAGE_SPLITTING)) {
646 /* We need to flush the hpte */ 651 /* We need to flush the hpte */
647 if (old & _PAGE_HASHPTE) 652 if (old & _PAGE_HASHPTE)
648 hpte_do_hugepage_flush(vma->vm_mm, address, pmdp); 653 hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old);
649 } 654 }
650 /* 655 /*
651 * This ensures that generic code that rely on IRQ disabling 656 * This ensures that generic code that rely on IRQ disabling
@@ -709,6 +714,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
709 assert_spin_locked(&mm->page_table_lock); 714 assert_spin_locked(&mm->page_table_lock);
710 WARN_ON(!pmd_trans_huge(pmd)); 715 WARN_ON(!pmd_trans_huge(pmd));
711#endif 716#endif
717 trace_hugepage_set_pmd(addr, pmd);
712 return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); 718 return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
713} 719}
714 720
@@ -723,7 +729,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
723 * neesd to be flushed. 729 * neesd to be flushed.
724 */ 730 */
725void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, 731void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
726 pmd_t *pmdp) 732 pmd_t *pmdp, unsigned long old_pmd)
727{ 733{
728 int ssize, i; 734 int ssize, i;
729 unsigned long s_addr; 735 unsigned long s_addr;
@@ -745,12 +751,29 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
745 if (!hpte_slot_array) 751 if (!hpte_slot_array)
746 return; 752 return;
747 753
748 /* get the base page size */ 754 /* get the base page size,vsid and segment size */
755#ifdef CONFIG_DEBUG_VM
749 psize = get_slice_psize(mm, s_addr); 756 psize = get_slice_psize(mm, s_addr);
757 BUG_ON(psize == MMU_PAGE_16M);
758#endif
759 if (old_pmd & _PAGE_COMBO)
760 psize = MMU_PAGE_4K;
761 else
762 psize = MMU_PAGE_64K;
763
764 if (!is_kernel_addr(s_addr)) {
765 ssize = user_segment_size(s_addr);
766 vsid = get_vsid(mm->context.id, s_addr, ssize);
767 WARN_ON(vsid == 0);
768 } else {
769 vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize);
770 ssize = mmu_kernel_ssize;
771 }
750 772
751 if (ppc_md.hugepage_invalidate) 773 if (ppc_md.hugepage_invalidate)
752 return ppc_md.hugepage_invalidate(mm, hpte_slot_array, 774 return ppc_md.hugepage_invalidate(vsid, s_addr,
753 s_addr, psize); 775 hpte_slot_array,
776 psize, ssize);
754 /* 777 /*
755 * No bluk hpte removal support, invalidate each entry 778 * No bluk hpte removal support, invalidate each entry
756 */ 779 */
@@ -768,15 +791,6 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
768 791
769 /* get the vpn */ 792 /* get the vpn */
770 addr = s_addr + (i * (1ul << shift)); 793 addr = s_addr + (i * (1ul << shift));
771 if (!is_kernel_addr(addr)) {
772 ssize = user_segment_size(addr);
773 vsid = get_vsid(mm->context.id, addr, ssize);
774 WARN_ON(vsid == 0);
775 } else {
776 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
777 ssize = mmu_kernel_ssize;
778 }
779
780 vpn = hpt_vpn(addr, vsid, ssize); 794 vpn = hpt_vpn(addr, vsid, ssize);
781 hash = hpt_hash(vpn, shift, ssize); 795 hash = hpt_hash(vpn, shift, ssize);
782 if (hidx & _PTEIDX_SECONDARY) 796 if (hidx & _PTEIDX_SECONDARY)
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index c99f6510a0b2..d2a94b85dbc2 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -30,6 +30,8 @@
30#include <asm/tlb.h> 30#include <asm/tlb.h>
31#include <asm/bug.h> 31#include <asm/bug.h>
32 32
33#include <trace/events/thp.h>
34
33DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); 35DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
34 36
35/* 37/*
@@ -213,10 +215,12 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
213 if (ptep == NULL) 215 if (ptep == NULL)
214 continue; 216 continue;
215 pte = pte_val(*ptep); 217 pte = pte_val(*ptep);
218 if (hugepage_shift)
219 trace_hugepage_invalidate(start, pte_val(pte));
216 if (!(pte & _PAGE_HASHPTE)) 220 if (!(pte & _PAGE_HASHPTE))
217 continue; 221 continue;
218 if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) 222 if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte)))
219 hpte_do_hugepage_flush(mm, start, (pmd_t *)pte); 223 hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte);
220 else 224 else
221 hpte_need_flush(mm, start, ptep, pte, 0); 225 hpte_need_flush(mm, start, ptep, pte, 0);
222 } 226 }
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 92cb18d52ea8..f38ea4df6a85 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -581,42 +581,10 @@ static void setup_mmu_htw(void)
581/* 581/*
582 * Early initialization of the MMU TLB code 582 * Early initialization of the MMU TLB code
583 */ 583 */
584static void __early_init_mmu(int boot_cpu) 584static void early_init_this_mmu(void)
585{ 585{
586 unsigned int mas4; 586 unsigned int mas4;
587 587
588 /* XXX This will have to be decided at runtime, but right
589 * now our boot and TLB miss code hard wires it. Ideally
590 * we should find out a suitable page size and patch the
591 * TLB miss code (either that or use the PACA to store
592 * the value we want)
593 */
594 mmu_linear_psize = MMU_PAGE_1G;
595
596 /* XXX This should be decided at runtime based on supported
597 * page sizes in the TLB, but for now let's assume 16M is
598 * always there and a good fit (which it probably is)
599 *
600 * Freescale booke only supports 4K pages in TLB0, so use that.
601 */
602 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
603 mmu_vmemmap_psize = MMU_PAGE_4K;
604 else
605 mmu_vmemmap_psize = MMU_PAGE_16M;
606
607 /* XXX This code only checks for TLB 0 capabilities and doesn't
608 * check what page size combos are supported by the HW. It
609 * also doesn't handle the case where a separate array holds
610 * the IND entries from the array loaded by the PT.
611 */
612 if (boot_cpu) {
613 /* Look for supported page sizes */
614 setup_page_sizes();
615
616 /* Look for HW tablewalk support */
617 setup_mmu_htw();
618 }
619
620 /* Set MAS4 based on page table setting */ 588 /* Set MAS4 based on page table setting */
621 589
622 mas4 = 0x4 << MAS4_WIMGED_SHIFT; 590 mas4 = 0x4 << MAS4_WIMGED_SHIFT;
@@ -650,11 +618,6 @@ static void __early_init_mmu(int boot_cpu)
650 } 618 }
651 mtspr(SPRN_MAS4, mas4); 619 mtspr(SPRN_MAS4, mas4);
652 620
653 /* Set the global containing the top of the linear mapping
654 * for use by the TLB miss code
655 */
656 linear_map_top = memblock_end_of_DRAM();
657
658#ifdef CONFIG_PPC_FSL_BOOK3E 621#ifdef CONFIG_PPC_FSL_BOOK3E
659 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { 622 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
660 unsigned int num_cams; 623 unsigned int num_cams;
@@ -662,10 +625,49 @@ static void __early_init_mmu(int boot_cpu)
662 /* use a quarter of the TLBCAM for bolted linear map */ 625 /* use a quarter of the TLBCAM for bolted linear map */
663 num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; 626 num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
664 linear_map_top = map_mem_in_cams(linear_map_top, num_cams); 627 linear_map_top = map_mem_in_cams(linear_map_top, num_cams);
628 }
629#endif
665 630
666 /* limit memory so we dont have linear faults */ 631 /* A sync won't hurt us after mucking around with
667 memblock_enforce_memory_limit(linear_map_top); 632 * the MMU configuration
633 */
634 mb();
635}
668 636
637static void __init early_init_mmu_global(void)
638{
639 /* XXX This will have to be decided at runtime, but right
640 * now our boot and TLB miss code hard wires it. Ideally
641 * we should find out a suitable page size and patch the
642 * TLB miss code (either that or use the PACA to store
643 * the value we want)
644 */
645 mmu_linear_psize = MMU_PAGE_1G;
646
647 /* XXX This should be decided at runtime based on supported
648 * page sizes in the TLB, but for now let's assume 16M is
649 * always there and a good fit (which it probably is)
650 *
651 * Freescale booke only supports 4K pages in TLB0, so use that.
652 */
653 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
654 mmu_vmemmap_psize = MMU_PAGE_4K;
655 else
656 mmu_vmemmap_psize = MMU_PAGE_16M;
657
658 /* XXX This code only checks for TLB 0 capabilities and doesn't
659 * check what page size combos are supported by the HW. It
660 * also doesn't handle the case where a separate array holds
661 * the IND entries from the array loaded by the PT.
662 */
663 /* Look for supported page sizes */
664 setup_page_sizes();
665
666 /* Look for HW tablewalk support */
667 setup_mmu_htw();
668
669#ifdef CONFIG_PPC_FSL_BOOK3E
670 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
669 if (book3e_htw_mode == PPC_HTW_NONE) { 671 if (book3e_htw_mode == PPC_HTW_NONE) {
670 extlb_level_exc = EX_TLB_SIZE; 672 extlb_level_exc = EX_TLB_SIZE;
671 patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); 673 patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
@@ -675,22 +677,41 @@ static void __early_init_mmu(int boot_cpu)
675 } 677 }
676#endif 678#endif
677 679
678 /* A sync won't hurt us after mucking around with 680 /* Set the global containing the top of the linear mapping
679 * the MMU configuration 681 * for use by the TLB miss code
680 */ 682 */
681 mb(); 683 linear_map_top = memblock_end_of_DRAM();
684}
685
686static void __init early_mmu_set_memory_limit(void)
687{
688#ifdef CONFIG_PPC_FSL_BOOK3E
689 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
690 /*
691 * Limit memory so we dont have linear faults.
692 * Unlike memblock_set_current_limit, which limits
693 * memory available during early boot, this permanently
694 * reduces the memory available to Linux. We need to
695 * do this because highmem is not supported on 64-bit.
696 */
697 memblock_enforce_memory_limit(linear_map_top);
698 }
699#endif
682 700
683 memblock_set_current_limit(linear_map_top); 701 memblock_set_current_limit(linear_map_top);
684} 702}
685 703
704/* boot cpu only */
686void __init early_init_mmu(void) 705void __init early_init_mmu(void)
687{ 706{
688 __early_init_mmu(1); 707 early_init_mmu_global();
708 early_init_this_mmu();
709 early_mmu_set_memory_limit();
689} 710}
690 711
691void early_init_mmu_secondary(void) 712void early_init_mmu_secondary(void)
692{ 713{
693 __early_init_mmu(0); 714 early_init_this_mmu();
694} 715}
695 716
696void setup_initial_memory_limit(phys_addr_t first_memblock_base, 717void setup_initial_memory_limit(phys_addr_t first_memblock_base,
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 66d0f179650f..70d4f748b54b 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -223,7 +223,7 @@ e_free:
223 pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:" 223 pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
224 " rc=%ld\n", 224 " rc=%ld\n",
225 catalog_version_num, page_offset, hret); 225 catalog_version_num, page_offset, hret);
226 kfree(page); 226 kmem_cache_free(hv_page_cache, page);
227 227
228 pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n", 228 pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n",
229 offset, page_offset, count, page_count, catalog_len, 229 offset, page_offset, count, page_count, catalog_len,
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index a328be44880f..2e6ce1b8dc8f 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -245,3 +245,5 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
245OPAL_CALL(opal_get_param, OPAL_GET_PARAM); 245OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
246OPAL_CALL(opal_set_param, OPAL_SET_PARAM); 246OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
247OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); 247OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
248OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
249OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index f0a01a46a57d..b44eec3e8dbd 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -605,6 +605,24 @@ static int opal_sysfs_init(void)
605 return 0; 605 return 0;
606} 606}
607 607
608static void __init opal_dump_region_init(void)
609{
610 void *addr;
611 uint64_t size;
612 int rc;
613
614 /* Register kernel log buffer */
615 addr = log_buf_addr_get();
616 size = log_buf_len_get();
617 rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
618 __pa(addr), size);
619 /* Don't warn if this is just an older OPAL that doesn't
620 * know about that call
621 */
622 if (rc && rc != OPAL_UNSUPPORTED)
623 pr_warn("DUMP: Failed to register kernel log buffer. "
624 "rc = %d\n", rc);
625}
608static int __init opal_init(void) 626static int __init opal_init(void)
609{ 627{
610 struct device_node *np, *consoles; 628 struct device_node *np, *consoles;
@@ -654,6 +672,8 @@ static int __init opal_init(void)
654 /* Create "opal" kobject under /sys/firmware */ 672 /* Create "opal" kobject under /sys/firmware */
655 rc = opal_sysfs_init(); 673 rc = opal_sysfs_init();
656 if (rc == 0) { 674 if (rc == 0) {
675 /* Setup dump region interface */
676 opal_dump_region_init();
657 /* Setup error log interface */ 677 /* Setup error log interface */
658 rc = opal_elog_init(); 678 rc = opal_elog_init();
659 /* Setup code update interface */ 679 /* Setup code update interface */
@@ -694,6 +714,9 @@ void opal_shutdown(void)
694 else 714 else
695 mdelay(10); 715 mdelay(10);
696 } 716 }
717
718 /* Unregister memory dump region */
719 opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
697} 720}
698 721
699/* Export this so that test modules can use it */ 722/* Export this so that test modules can use it */
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index b136108ddc99..df241b11d4f7 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -857,7 +857,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
857 857
858 pe = &phb->ioda.pe_array[pdn->pe_number]; 858 pe = &phb->ioda.pe_array[pdn->pe_number];
859 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); 859 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
860 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 860 set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
861} 861}
862 862
863static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, 863static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index ac01e188faef..c904583baf4b 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -146,7 +146,7 @@ static inline int pseries_remove_memblock(unsigned long base,
146} 146}
147static inline int pseries_remove_mem_node(struct device_node *np) 147static inline int pseries_remove_mem_node(struct device_node *np)
148{ 148{
149 return -EOPNOTSUPP; 149 return 0;
150} 150}
151#endif /* CONFIG_MEMORY_HOTREMOVE */ 151#endif /* CONFIG_MEMORY_HOTREMOVE */
152 152
diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c
index 4557e91626c4..eedb64594dc5 100644
--- a/arch/powerpc/platforms/pseries/hvcserver.c
+++ b/arch/powerpc/platforms/pseries/hvcserver.c
@@ -163,8 +163,8 @@ int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
163 return retval; 163 return retval;
164 } 164 }
165 165
166 last_p_partition_ID = pi_buff[0]; 166 last_p_partition_ID = be64_to_cpu(pi_buff[0]);
167 last_p_unit_address = pi_buff[1]; 167 last_p_unit_address = be64_to_cpu(pi_buff[1]);
168 168
169 /* This indicates that there are no further partners */ 169 /* This indicates that there are no further partners */
170 if (last_p_partition_ID == ~0UL 170 if (last_p_partition_ID == ~0UL
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 33b552ffbe57..4642d6a4d356 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -721,13 +721,13 @@ static int __init disable_ddw_setup(char *str)
721 721
722early_param("disable_ddw", disable_ddw_setup); 722early_param("disable_ddw", disable_ddw_setup);
723 723
724static void remove_ddw(struct device_node *np) 724static void remove_ddw(struct device_node *np, bool remove_prop)
725{ 725{
726 struct dynamic_dma_window_prop *dwp; 726 struct dynamic_dma_window_prop *dwp;
727 struct property *win64; 727 struct property *win64;
728 const u32 *ddw_avail; 728 const u32 *ddw_avail;
729 u64 liobn; 729 u64 liobn;
730 int len, ret; 730 int len, ret = 0;
731 731
732 ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len); 732 ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len);
733 win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); 733 win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
@@ -761,7 +761,8 @@ static void remove_ddw(struct device_node *np)
761 np->full_name, ret, ddw_avail[2], liobn); 761 np->full_name, ret, ddw_avail[2], liobn);
762 762
763delprop: 763delprop:
764 ret = of_remove_property(np, win64); 764 if (remove_prop)
765 ret = of_remove_property(np, win64);
765 if (ret) 766 if (ret)
766 pr_warning("%s: failed to remove direct window property: %d\n", 767 pr_warning("%s: failed to remove direct window property: %d\n",
767 np->full_name, ret); 768 np->full_name, ret);
@@ -805,7 +806,7 @@ static int find_existing_ddw_windows(void)
805 window = kzalloc(sizeof(*window), GFP_KERNEL); 806 window = kzalloc(sizeof(*window), GFP_KERNEL);
806 if (!window || len < sizeof(struct dynamic_dma_window_prop)) { 807 if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
807 kfree(window); 808 kfree(window);
808 remove_ddw(pdn); 809 remove_ddw(pdn, true);
809 continue; 810 continue;
810 } 811 }
811 812
@@ -1045,7 +1046,7 @@ out_free_window:
1045 kfree(window); 1046 kfree(window);
1046 1047
1047out_clear_window: 1048out_clear_window:
1048 remove_ddw(pdn); 1049 remove_ddw(pdn, true);
1049 1050
1050out_free_prop: 1051out_free_prop:
1051 kfree(win64->name); 1052 kfree(win64->name);
@@ -1255,7 +1256,14 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
1255 1256
1256 switch (action) { 1257 switch (action) {
1257 case OF_RECONFIG_DETACH_NODE: 1258 case OF_RECONFIG_DETACH_NODE:
1258 remove_ddw(np); 1259 /*
1260 * Removing the property will invoke the reconfig
1261 * notifier again, which causes dead-lock on the
1262 * read-write semaphore of the notifier chain. So
1263 * we have to remove the property when releasing
1264 * the device node.
1265 */
1266 remove_ddw(np, false);
1259 if (pci && pci->iommu_table) 1267 if (pci && pci->iommu_table)
1260 iommu_free_table(pci->iommu_table, np->full_name); 1268 iommu_free_table(pci->iommu_table, np->full_name);
1261 1269
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index fbfcef514aa7..34e64237fff9 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -431,16 +431,17 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
431 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); 431 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
432} 432}
433 433
434static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm, 434static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
435 unsigned char *hpte_slot_array, 435 unsigned long addr,
436 unsigned long addr, int psize) 436 unsigned char *hpte_slot_array,
437 int psize, int ssize)
437{ 438{
438 int ssize = 0, i, index = 0; 439 int i, index = 0;
439 unsigned long s_addr = addr; 440 unsigned long s_addr = addr;
440 unsigned int max_hpte_count, valid; 441 unsigned int max_hpte_count, valid;
441 unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH]; 442 unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
442 unsigned long slot_array[PPC64_HUGE_HPTE_BATCH]; 443 unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
443 unsigned long shift, hidx, vpn = 0, vsid, hash, slot; 444 unsigned long shift, hidx, vpn = 0, hash, slot;
444 445
445 shift = mmu_psize_defs[psize].shift; 446 shift = mmu_psize_defs[psize].shift;
446 max_hpte_count = 1U << (PMD_SHIFT - shift); 447 max_hpte_count = 1U << (PMD_SHIFT - shift);
@@ -453,15 +454,6 @@ static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
453 454
454 /* get the vpn */ 455 /* get the vpn */
455 addr = s_addr + (i * (1ul << shift)); 456 addr = s_addr + (i * (1ul << shift));
456 if (!is_kernel_addr(addr)) {
457 ssize = user_segment_size(addr);
458 vsid = get_vsid(mm->context.id, addr, ssize);
459 WARN_ON(vsid == 0);
460 } else {
461 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
462 ssize = mmu_kernel_ssize;
463 }
464
465 vpn = hpt_vpn(addr, vsid, ssize); 457 vpn = hpt_vpn(addr, vsid, ssize);
466 hash = hpt_hash(vpn, shift, ssize); 458 hash = hpt_hash(vpn, shift, ssize);
467 if (hidx & _PTEIDX_SECONDARY) 459 if (hidx & _PTEIDX_SECONDARY)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 8d198b5e9e0a..b988b5addf86 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -24,6 +24,7 @@
24#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <linux/irq.h> 25#include <linux/irq.h>
26#include <linux/bug.h> 26#include <linux/bug.h>
27#include <linux/nmi.h>
27 28
28#include <asm/ptrace.h> 29#include <asm/ptrace.h>
29#include <asm/string.h> 30#include <asm/string.h>
@@ -374,6 +375,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
374#endif 375#endif
375 376
376 local_irq_save(flags); 377 local_irq_save(flags);
378 hard_irq_disable();
377 379
378 bp = in_breakpoint_table(regs->nip, &offset); 380 bp = in_breakpoint_table(regs->nip, &offset);
379 if (bp != NULL) { 381 if (bp != NULL) {
@@ -558,6 +560,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
558#endif 560#endif
559 insert_cpu_bpts(); 561 insert_cpu_bpts();
560 562
563 touch_nmi_watchdog();
561 local_irq_restore(flags); 564 local_irq_restore(flags);
562 565
563 return cmd != 'X' && cmd != EOF; 566 return cmd != 'X' && cmd != EOF;