aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-28 12:45:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-28 12:45:34 -0400
commit0d95cfa922c24bcc20b5ccf7496b6ac7c8e29efb (patch)
tree2e65b9c58e1b76911a934d58a78460656b9376fd
parent46dc111dfe47bf47f23884cade3c8a355be87c8c (diff)
parentb2d7ecbe355698010a6b7a15eb179e09eb3d6a34 (diff)
Merge tag 'powerpc-4.17-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc fixes from Michael Ellerman: "A bunch of fixes, mostly for existing code and going to stable. Our memory hot-unplug path wasn't flushing the cache before removing memory. That is a problem now that we are doing memory hotplug on bare metal. Three fixes for the NPU code that supports devices connected via NVLink (ie. GPUs). The main one tweaks the TLB flush algorithm to avoid soft lockups for large flushes. A fix for our memory error handling where we would loop infinitely, returning back to the bad access and hard lockup the CPU. Fixes for the OPAL RTC driver, which wasn't handling some error cases correctly. A fix for a hardlockup in the powernv cpufreq driver. And finally two fixes to our smp_send_stop(), required due to a recent change to use it on shutdown. Thanks to: Alistair Popple, Balbir Singh, Laurentiu Tudor, Mahesh Salgaonkar, Mark Hairgrove, Nicholas Piggin, Rashmica Gupta, Shilpasri G Bhat" * tag 'powerpc-4.17-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: powerpc/kvm/booke: Fix altivec related build break powerpc: Fix deadlock with multiple calls to smp_send_stop cpufreq: powernv: Fix hardlockup due to synchronous smp_call in timer interrupt powerpc: Fix smp_send_stop NMI IPI handling rtc: opal: Fix OPAL RTC driver OPAL_BUSY loops powerpc/mce: Fix a bug where mce loops on memory UE. powerpc/powernv/npu: Do a PID GPU TLB flush when invalidating a large address range powerpc/powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters powerpc/powernv/npu: Add lock to prevent race in concurrent context init/destroy powerpc/powernv/memtrace: Let the arch hotunplug code flush cache powerpc/mm: Flush cache on memory hot(un)plug
-rw-r--r--arch/powerpc/include/asm/powernv.h2
-rw-r--r--arch/powerpc/kernel/mce_power.c7
-rw-r--r--arch/powerpc/kernel/smp.c49
-rw-r--r--arch/powerpc/kvm/booke.c7
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c17
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c88
-rw-r--r--arch/powerpc/platforms/powernv/opal-rtc.c8
-rw-r--r--drivers/cpufreq/powernv-cpufreq.c14
-rw-r--r--drivers/rtc/rtc-opal.c37
10 files changed, 166 insertions, 65 deletions
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
index d1c2d2e658cf..2f3ff7a27881 100644
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -15,7 +15,7 @@
15extern void powernv_set_nmmu_ptcr(unsigned long ptcr); 15extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
16extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, 16extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
17 unsigned long flags, 17 unsigned long flags,
18 struct npu_context *(*cb)(struct npu_context *, void *), 18 void (*cb)(struct npu_context *, void *),
19 void *priv); 19 void *priv);
20extern void pnv_npu2_destroy_context(struct npu_context *context, 20extern void pnv_npu2_destroy_context(struct npu_context *context,
21 struct pci_dev *gpdev); 21 struct pci_dev *gpdev);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index fe6fc63251fe..38c5b4764bfe 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -441,7 +441,6 @@ static int mce_handle_ierror(struct pt_regs *regs,
441 if (pfn != ULONG_MAX) { 441 if (pfn != ULONG_MAX) {
442 *phys_addr = 442 *phys_addr =
443 (pfn << PAGE_SHIFT); 443 (pfn << PAGE_SHIFT);
444 handled = 1;
445 } 444 }
446 } 445 }
447 } 446 }
@@ -532,9 +531,7 @@ static int mce_handle_derror(struct pt_regs *regs,
532 * kernel/exception-64s.h 531 * kernel/exception-64s.h
533 */ 532 */
534 if (get_paca()->in_mce < MAX_MCE_DEPTH) 533 if (get_paca()->in_mce < MAX_MCE_DEPTH)
535 if (!mce_find_instr_ea_and_pfn(regs, addr, 534 mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
536 phys_addr))
537 handled = 1;
538 } 535 }
539 found = 1; 536 found = 1;
540 } 537 }
@@ -572,7 +569,7 @@ static long mce_handle_error(struct pt_regs *regs,
572 const struct mce_ierror_table itable[]) 569 const struct mce_ierror_table itable[])
573{ 570{
574 struct mce_error_info mce_err = { 0 }; 571 struct mce_error_info mce_err = { 0 };
575 uint64_t addr, phys_addr; 572 uint64_t addr, phys_addr = ULONG_MAX;
576 uint64_t srr1 = regs->msr; 573 uint64_t srr1 = regs->msr;
577 long handled; 574 long handled;
578 575
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e16ec7b3b427..9ca7148b5881 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -566,10 +566,35 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
566#endif 566#endif
567 567
568#ifdef CONFIG_NMI_IPI 568#ifdef CONFIG_NMI_IPI
569static void stop_this_cpu(struct pt_regs *regs) 569static void nmi_stop_this_cpu(struct pt_regs *regs)
570#else 570{
571 /*
572 * This is a special case because it never returns, so the NMI IPI
573 * handling would never mark it as done, which makes any later
574 * smp_send_nmi_ipi() call spin forever. Mark it done now.
575 *
576 * IRQs are already hard disabled by the smp_handle_nmi_ipi.
577 */
578 nmi_ipi_lock();
579 nmi_ipi_busy_count--;
580 nmi_ipi_unlock();
581
582 /* Remove this CPU */
583 set_cpu_online(smp_processor_id(), false);
584
585 spin_begin();
586 while (1)
587 spin_cpu_relax();
588}
589
590void smp_send_stop(void)
591{
592 smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000);
593}
594
595#else /* CONFIG_NMI_IPI */
596
571static void stop_this_cpu(void *dummy) 597static void stop_this_cpu(void *dummy)
572#endif
573{ 598{
574 /* Remove this CPU */ 599 /* Remove this CPU */
575 set_cpu_online(smp_processor_id(), false); 600 set_cpu_online(smp_processor_id(), false);
@@ -582,12 +607,22 @@ static void stop_this_cpu(void *dummy)
582 607
583void smp_send_stop(void) 608void smp_send_stop(void)
584{ 609{
585#ifdef CONFIG_NMI_IPI 610 static bool stopped = false;
586 smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, stop_this_cpu, 1000000); 611
587#else 612 /*
613 * Prevent waiting on csd lock from a previous smp_send_stop.
614 * This is racy, but in general callers try to do the right
615 * thing and only fire off one smp_send_stop (e.g., see
616 * kernel/panic.c)
617 */
618 if (stopped)
619 return;
620
621 stopped = true;
622
588 smp_call_function(stop_this_cpu, NULL, 0); 623 smp_call_function(stop_this_cpu, NULL, 0);
589#endif
590} 624}
625#endif /* CONFIG_NMI_IPI */
591 626
592struct thread_info *current_set[NR_CPUS]; 627struct thread_info *current_set[NR_CPUS];
593 628
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 6038e2e7aee0..876d4f294fdd 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -305,6 +305,13 @@ void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu)
305 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL); 305 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
306} 306}
307 307
308#ifdef CONFIG_ALTIVEC
309void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu)
310{
311 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
312}
313#endif
314
308void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) 315void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
309{ 316{
310 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER); 317 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 737f8a4632cc..c3c39b02b2ba 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -133,6 +133,7 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *
133 start, start + size, rc); 133 start, start + size, rc);
134 return -EFAULT; 134 return -EFAULT;
135 } 135 }
136 flush_inval_dcache_range(start, start + size);
136 137
137 return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); 138 return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
138} 139}
@@ -159,6 +160,7 @@ int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap
159 160
160 /* Remove htab bolted mappings for this section of memory */ 161 /* Remove htab bolted mappings for this section of memory */
161 start = (unsigned long)__va(start); 162 start = (unsigned long)__va(start);
163 flush_inval_dcache_range(start, start + size);
162 ret = remove_section_mapping(start, start + size); 164 ret = remove_section_mapping(start, start + size);
163 165
164 /* Ensure all vmalloc mappings are flushed in case they also 166 /* Ensure all vmalloc mappings are flushed in case they also
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index de470caf0784..fc222a0c2ac4 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -82,19 +82,6 @@ static const struct file_operations memtrace_fops = {
82 .open = simple_open, 82 .open = simple_open,
83}; 83};
84 84
85static void flush_memory_region(u64 base, u64 size)
86{
87 unsigned long line_size = ppc64_caches.l1d.size;
88 u64 end = base + size;
89 u64 addr;
90
91 base = round_down(base, line_size);
92 end = round_up(end, line_size);
93
94 for (addr = base; addr < end; addr += line_size)
95 asm volatile("dcbf 0,%0" : "=r" (addr) :: "memory");
96}
97
98static int check_memblock_online(struct memory_block *mem, void *arg) 85static int check_memblock_online(struct memory_block *mem, void *arg)
99{ 86{
100 if (mem->state != MEM_ONLINE) 87 if (mem->state != MEM_ONLINE)
@@ -132,10 +119,6 @@ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
132 walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, 119 walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
133 change_memblock_state); 120 change_memblock_state);
134 121
135 /* RCU grace period? */
136 flush_memory_region((u64)__va(start_pfn << PAGE_SHIFT),
137 nr_pages << PAGE_SHIFT);
138
139 lock_device_hotplug(); 122 lock_device_hotplug();
140 remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); 123 remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
141 unlock_device_hotplug(); 124 unlock_device_hotplug();
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 69a4f9e8bd55..525e966dce34 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -34,6 +34,19 @@
34#define npu_to_phb(x) container_of(x, struct pnv_phb, npu) 34#define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
35 35
36/* 36/*
37 * spinlock to protect initialisation of an npu_context for a particular
38 * mm_struct.
39 */
40static DEFINE_SPINLOCK(npu_context_lock);
41
42/*
43 * When an address shootdown range exceeds this threshold we invalidate the
44 * entire TLB on the GPU for the given PID rather than each specific address in
45 * the range.
46 */
47#define ATSD_THRESHOLD (2*1024*1024)
48
49/*
37 * Other types of TCE cache invalidation are not functional in the 50 * Other types of TCE cache invalidation are not functional in the
38 * hardware. 51 * hardware.
39 */ 52 */
@@ -401,7 +414,7 @@ struct npu_context {
401 bool nmmu_flush; 414 bool nmmu_flush;
402 415
403 /* Callback to stop translation requests on a given GPU */ 416 /* Callback to stop translation requests on a given GPU */
404 struct npu_context *(*release_cb)(struct npu_context *, void *); 417 void (*release_cb)(struct npu_context *context, void *priv);
405 418
406 /* 419 /*
407 * Private pointer passed to the above callback for usage by 420 * Private pointer passed to the above callback for usage by
@@ -671,11 +684,19 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
671 struct npu_context *npu_context = mn_to_npu_context(mn); 684 struct npu_context *npu_context = mn_to_npu_context(mn);
672 unsigned long address; 685 unsigned long address;
673 686
674 for (address = start; address < end; address += PAGE_SIZE) 687 if (end - start > ATSD_THRESHOLD) {
675 mmio_invalidate(npu_context, 1, address, false); 688 /*
689 * Just invalidate the entire PID if the address range is too
690 * large.
691 */
692 mmio_invalidate(npu_context, 0, 0, true);
693 } else {
694 for (address = start; address < end; address += PAGE_SIZE)
695 mmio_invalidate(npu_context, 1, address, false);
676 696
677 /* Do the flush only on the final addess == end */ 697 /* Do the flush only on the final addess == end */
678 mmio_invalidate(npu_context, 1, address, true); 698 mmio_invalidate(npu_context, 1, address, true);
699 }
679} 700}
680 701
681static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { 702static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -696,11 +717,12 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
696 * Returns an error if there no contexts are currently available or a 717 * Returns an error if there no contexts are currently available or a
697 * npu_context which should be passed to pnv_npu2_handle_fault(). 718 * npu_context which should be passed to pnv_npu2_handle_fault().
698 * 719 *
699 * mmap_sem must be held in write mode. 720 * mmap_sem must be held in write mode and must not be called from interrupt
721 * context.
700 */ 722 */
701struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, 723struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
702 unsigned long flags, 724 unsigned long flags,
703 struct npu_context *(*cb)(struct npu_context *, void *), 725 void (*cb)(struct npu_context *, void *),
704 void *priv) 726 void *priv)
705{ 727{
706 int rc; 728 int rc;
@@ -743,7 +765,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
743 /* 765 /*
744 * Setup the NPU context table for a particular GPU. These need to be 766 * Setup the NPU context table for a particular GPU. These need to be
745 * per-GPU as we need the tables to filter ATSDs when there are no 767 * per-GPU as we need the tables to filter ATSDs when there are no
746 * active contexts on a particular GPU. 768 * active contexts on a particular GPU. It is safe for these to be
769 * called concurrently with destroy as the OPAL call takes appropriate
770 * locks and refcounts on init/destroy.
747 */ 771 */
748 rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags, 772 rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
749 PCI_DEVID(gpdev->bus->number, gpdev->devfn)); 773 PCI_DEVID(gpdev->bus->number, gpdev->devfn));
@@ -754,8 +778,29 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
754 * We store the npu pci device so we can more easily get at the 778 * We store the npu pci device so we can more easily get at the
755 * associated npus. 779 * associated npus.
756 */ 780 */
781 spin_lock(&npu_context_lock);
757 npu_context = mm->context.npu_context; 782 npu_context = mm->context.npu_context;
783 if (npu_context) {
784 if (npu_context->release_cb != cb ||
785 npu_context->priv != priv) {
786 spin_unlock(&npu_context_lock);
787 opal_npu_destroy_context(nphb->opal_id, mm->context.id,
788 PCI_DEVID(gpdev->bus->number,
789 gpdev->devfn));
790 return ERR_PTR(-EINVAL);
791 }
792
793 WARN_ON(!kref_get_unless_zero(&npu_context->kref));
794 }
795 spin_unlock(&npu_context_lock);
796
758 if (!npu_context) { 797 if (!npu_context) {
798 /*
799 * We can set up these fields without holding the
800 * npu_context_lock as the npu_context hasn't been returned to
801 * the caller meaning it can't be destroyed. Parallel allocation
802 * is protected against by mmap_sem.
803 */
759 rc = -ENOMEM; 804 rc = -ENOMEM;
760 npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); 805 npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
761 if (npu_context) { 806 if (npu_context) {
@@ -774,8 +819,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
774 } 819 }
775 820
776 mm->context.npu_context = npu_context; 821 mm->context.npu_context = npu_context;
777 } else {
778 WARN_ON(!kref_get_unless_zero(&npu_context->kref));
779 } 822 }
780 823
781 npu_context->release_cb = cb; 824 npu_context->release_cb = cb;
@@ -814,15 +857,16 @@ static void pnv_npu2_release_context(struct kref *kref)
814 mm_context_remove_copro(npu_context->mm); 857 mm_context_remove_copro(npu_context->mm);
815 858
816 npu_context->mm->context.npu_context = NULL; 859 npu_context->mm->context.npu_context = NULL;
817 mmu_notifier_unregister(&npu_context->mn,
818 npu_context->mm);
819
820 kfree(npu_context);
821} 860}
822 861
862/*
863 * Destroy a context on the given GPU. May free the npu_context if it is no
864 * longer active on any GPUs. Must not be called from interrupt context.
865 */
823void pnv_npu2_destroy_context(struct npu_context *npu_context, 866void pnv_npu2_destroy_context(struct npu_context *npu_context,
824 struct pci_dev *gpdev) 867 struct pci_dev *gpdev)
825{ 868{
869 int removed;
826 struct pnv_phb *nphb; 870 struct pnv_phb *nphb;
827 struct npu *npu; 871 struct npu *npu;
828 struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); 872 struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
@@ -844,7 +888,21 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
844 WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); 888 WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
845 opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, 889 opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
846 PCI_DEVID(gpdev->bus->number, gpdev->devfn)); 890 PCI_DEVID(gpdev->bus->number, gpdev->devfn));
847 kref_put(&npu_context->kref, pnv_npu2_release_context); 891 spin_lock(&npu_context_lock);
892 removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
893 spin_unlock(&npu_context_lock);
894
895 /*
896 * We need to do this outside of pnv_npu2_release_context so that it is
897 * outside the spinlock as mmu_notifier_destroy uses SRCU.
898 */
899 if (removed) {
900 mmu_notifier_unregister(&npu_context->mn,
901 npu_context->mm);
902
903 kfree(npu_context);
904 }
905
848} 906}
849EXPORT_SYMBOL(pnv_npu2_destroy_context); 907EXPORT_SYMBOL(pnv_npu2_destroy_context);
850 908
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index f8868864f373..aa2a5139462e 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -48,10 +48,12 @@ unsigned long __init opal_get_boot_time(void)
48 48
49 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 49 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
50 rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); 50 rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
51 if (rc == OPAL_BUSY_EVENT) 51 if (rc == OPAL_BUSY_EVENT) {
52 mdelay(OPAL_BUSY_DELAY_MS);
52 opal_poll_events(NULL); 53 opal_poll_events(NULL);
53 else if (rc == OPAL_BUSY) 54 } else if (rc == OPAL_BUSY) {
54 mdelay(10); 55 mdelay(OPAL_BUSY_DELAY_MS);
56 }
55 } 57 }
56 if (rc != OPAL_SUCCESS) 58 if (rc != OPAL_SUCCESS)
57 return 0; 59 return 0;
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 0591874856d3..54edaec1e608 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -679,6 +679,16 @@ void gpstate_timer_handler(struct timer_list *t)
679 679
680 if (!spin_trylock(&gpstates->gpstate_lock)) 680 if (!spin_trylock(&gpstates->gpstate_lock))
681 return; 681 return;
682 /*
683 * If the timer has migrated to the different cpu then bring
684 * it back to one of the policy->cpus
685 */
686 if (!cpumask_test_cpu(raw_smp_processor_id(), policy->cpus)) {
687 gpstates->timer.expires = jiffies + msecs_to_jiffies(1);
688 add_timer_on(&gpstates->timer, cpumask_first(policy->cpus));
689 spin_unlock(&gpstates->gpstate_lock);
690 return;
691 }
682 692
683 /* 693 /*
684 * If PMCR was last updated was using fast_swtich then 694 * If PMCR was last updated was using fast_swtich then
@@ -718,10 +728,8 @@ void gpstate_timer_handler(struct timer_list *t)
718 if (gpstate_idx != gpstates->last_lpstate_idx) 728 if (gpstate_idx != gpstates->last_lpstate_idx)
719 queue_gpstate_timer(gpstates); 729 queue_gpstate_timer(gpstates);
720 730
731 set_pstate(&freq_data);
721 spin_unlock(&gpstates->gpstate_lock); 732 spin_unlock(&gpstates->gpstate_lock);
722
723 /* Timer may get migrated to a different cpu on cpu hot unplug */
724 smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
725} 733}
726 734
727/* 735/*
diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c
index 304e891e35fc..60f2250fd96b 100644
--- a/drivers/rtc/rtc-opal.c
+++ b/drivers/rtc/rtc-opal.c
@@ -57,7 +57,7 @@ static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms)
57 57
58static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) 58static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
59{ 59{
60 long rc = OPAL_BUSY; 60 s64 rc = OPAL_BUSY;
61 int retries = 10; 61 int retries = 10;
62 u32 y_m_d; 62 u32 y_m_d;
63 u64 h_m_s_ms; 63 u64 h_m_s_ms;
@@ -66,13 +66,17 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
66 66
67 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 67 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
68 rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); 68 rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
69 if (rc == OPAL_BUSY_EVENT) 69 if (rc == OPAL_BUSY_EVENT) {
70 msleep(OPAL_BUSY_DELAY_MS);
70 opal_poll_events(NULL); 71 opal_poll_events(NULL);
71 else if (retries-- && (rc == OPAL_HARDWARE 72 } else if (rc == OPAL_BUSY) {
72 || rc == OPAL_INTERNAL_ERROR)) 73 msleep(OPAL_BUSY_DELAY_MS);
73 msleep(10); 74 } else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) {
74 else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT) 75 if (retries--) {
75 break; 76 msleep(10); /* Wait 10ms before retry */
77 rc = OPAL_BUSY; /* go around again */
78 }
79 }
76 } 80 }
77 81
78 if (rc != OPAL_SUCCESS) 82 if (rc != OPAL_SUCCESS)
@@ -87,21 +91,26 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
87 91
88static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm) 92static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm)
89{ 93{
90 long rc = OPAL_BUSY; 94 s64 rc = OPAL_BUSY;
91 int retries = 10; 95 int retries = 10;
92 u32 y_m_d = 0; 96 u32 y_m_d = 0;
93 u64 h_m_s_ms = 0; 97 u64 h_m_s_ms = 0;
94 98
95 tm_to_opal(tm, &y_m_d, &h_m_s_ms); 99 tm_to_opal(tm, &y_m_d, &h_m_s_ms);
100
96 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 101 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
97 rc = opal_rtc_write(y_m_d, h_m_s_ms); 102 rc = opal_rtc_write(y_m_d, h_m_s_ms);
98 if (rc == OPAL_BUSY_EVENT) 103 if (rc == OPAL_BUSY_EVENT) {
104 msleep(OPAL_BUSY_DELAY_MS);
99 opal_poll_events(NULL); 105 opal_poll_events(NULL);
100 else if (retries-- && (rc == OPAL_HARDWARE 106 } else if (rc == OPAL_BUSY) {
101 || rc == OPAL_INTERNAL_ERROR)) 107 msleep(OPAL_BUSY_DELAY_MS);
102 msleep(10); 108 } else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) {
103 else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT) 109 if (retries--) {
104 break; 110 msleep(10); /* Wait 10ms before retry */
111 rc = OPAL_BUSY; /* go around again */
112 }
113 }
105 } 114 }
106 115
107 return rc == OPAL_SUCCESS ? 0 : -EIO; 116 return rc == OPAL_SUCCESS ? 0 : -EIO;