aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig11
-rw-r--r--arch/s390/hypfs/hypfs.h13
-rw-r--r--arch/s390/hypfs/hypfs_dbfs.c2
-rw-r--r--arch/s390/hypfs/hypfs_diag.c50
-rw-r--r--arch/s390/hypfs/hypfs_vm.c65
-rw-r--r--arch/s390/hypfs/inode.c36
-rw-r--r--arch/s390/include/asm/airq.h67
-rw-r--r--arch/s390/include/asm/bitops.h12
-rw-r--r--arch/s390/include/asm/cio.h1
-rw-r--r--arch/s390/include/asm/cputime.h3
-rw-r--r--arch/s390/include/asm/hardirq.h5
-rw-r--r--arch/s390/include/asm/hugetlb.h135
-rw-r--r--arch/s390/include/asm/hw_irq.h17
-rw-r--r--arch/s390/include/asm/irq.h35
-rw-r--r--arch/s390/include/asm/kvm_host.h8
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h22
-rw-r--r--arch/s390/include/asm/page.h19
-rw-r--r--arch/s390/include/asm/pci.h54
-rw-r--r--arch/s390/include/asm/pci_insn.h12
-rw-r--r--arch/s390/include/asm/pci_io.h10
-rw-r--r--arch/s390/include/asm/pgtable.h648
-rw-r--r--arch/s390/include/asm/processor.h2
-rw-r--r--arch/s390/include/asm/serial.h6
-rw-r--r--arch/s390/include/asm/switch_to.h9
-rw-r--r--arch/s390/include/asm/tlb.h11
-rw-r--r--arch/s390/include/asm/tlbflush.h6
-rw-r--r--arch/s390/include/asm/vtime.h7
-rw-r--r--arch/s390/kernel/entry.S16
-rw-r--r--arch/s390/kernel/entry64.S11
-rw-r--r--arch/s390/kernel/irq.c160
-rw-r--r--arch/s390/kernel/kprobes.c21
-rw-r--r--arch/s390/kernel/nmi.c5
-rw-r--r--arch/s390/kernel/process.c1
-rw-r--r--arch/s390/kernel/ptrace.c8
-rw-r--r--arch/s390/kernel/suspend.c11
-rw-r--r--arch/s390/kernel/swsusp_asm64.S7
-rw-r--r--arch/s390/kernel/time.c1
-rw-r--r--arch/s390/kernel/vdso.c6
-rw-r--r--arch/s390/kernel/vtime.c1
-rw-r--r--arch/s390/kvm/diag.c17
-rw-r--r--arch/s390/kvm/gaccess.h12
-rw-r--r--arch/s390/kvm/kvm-s390.c27
-rw-r--r--arch/s390/kvm/kvm-s390.h10
-rw-r--r--arch/s390/kvm/priv.c32
-rw-r--r--arch/s390/lib/delay.c2
-rw-r--r--arch/s390/lib/uaccess_pt.c16
-rw-r--r--arch/s390/mm/dump_pagetables.c18
-rw-r--r--arch/s390/mm/gup.c6
-rw-r--r--arch/s390/mm/hugetlbpage.c124
-rw-r--r--arch/s390/mm/pageattr.c2
-rw-r--r--arch/s390/mm/pgtable.c266
-rw-r--r--arch/s390/mm/vmem.c15
-rw-r--r--arch/s390/oprofile/init.c35
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci.c575
-rw-r--r--arch/s390/pci/pci_clp.c146
-rw-r--r--arch/s390/pci/pci_dma.c16
-rw-r--r--arch/s390/pci/pci_event.c2
-rw-r--r--arch/s390/pci/pci_insn.c18
-rw-r--r--arch/s390/pci/pci_msi.c142
-rw-r--r--arch/s390/pci/pci_sysfs.c27
62 files changed, 1514 insertions, 1512 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 21e5c165df28..c696ad7d3439 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -116,6 +116,7 @@ config S390
116 select HAVE_FUNCTION_GRAPH_TRACER 116 select HAVE_FUNCTION_GRAPH_TRACER
117 select HAVE_FUNCTION_TRACER 117 select HAVE_FUNCTION_TRACER
118 select HAVE_FUNCTION_TRACE_MCOUNT_TEST 118 select HAVE_FUNCTION_TRACE_MCOUNT_TEST
119 select HAVE_GENERIC_HARDIRQS
119 select HAVE_KERNEL_BZIP2 120 select HAVE_KERNEL_BZIP2
120 select HAVE_KERNEL_GZIP 121 select HAVE_KERNEL_GZIP
121 select HAVE_KERNEL_LZ4 122 select HAVE_KERNEL_LZ4
@@ -444,6 +445,16 @@ config PCI_NR_FUNCTIONS
444 This allows you to specify the maximum number of PCI functions which 445 This allows you to specify the maximum number of PCI functions which
445 this kernel will support. 446 this kernel will support.
446 447
448config PCI_NR_MSI
449 int "Maximum number of MSI interrupts (64-32768)"
450 range 64 32768
451 default "256"
452 help
453 This defines the number of virtual interrupts the kernel will
454 provide for MSI interrupts. If you configure your system to have
455 too few drivers will fail to allocate MSI interrupts for all
456 PCI devices.
457
447source "drivers/pci/Kconfig" 458source "drivers/pci/Kconfig"
448source "drivers/pci/pcie/Kconfig" 459source "drivers/pci/pcie/Kconfig"
449source "drivers/pci/hotplug/Kconfig" 460source "drivers/pci/hotplug/Kconfig"
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index f41e0ef7fdf9..79f2ac55253f 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -18,26 +18,23 @@
18#define UPDATE_FILE_MODE 0220 18#define UPDATE_FILE_MODE 0220
19#define DIR_MODE 0550 19#define DIR_MODE 0550
20 20
21extern struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent, 21extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name);
22 const char *name);
23 22
24extern struct dentry *hypfs_create_u64(struct super_block *sb, 23extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name,
25 struct dentry *dir, const char *name,
26 __u64 value); 24 __u64 value);
27 25
28extern struct dentry *hypfs_create_str(struct super_block *sb, 26extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name,
29 struct dentry *dir, const char *name,
30 char *string); 27 char *string);
31 28
32/* LPAR Hypervisor */ 29/* LPAR Hypervisor */
33extern int hypfs_diag_init(void); 30extern int hypfs_diag_init(void);
34extern void hypfs_diag_exit(void); 31extern void hypfs_diag_exit(void);
35extern int hypfs_diag_create_files(struct super_block *sb, struct dentry *root); 32extern int hypfs_diag_create_files(struct dentry *root);
36 33
37/* VM Hypervisor */ 34/* VM Hypervisor */
38extern int hypfs_vm_init(void); 35extern int hypfs_vm_init(void);
39extern void hypfs_vm_exit(void); 36extern void hypfs_vm_exit(void);
40extern int hypfs_vm_create_files(struct super_block *sb, struct dentry *root); 37extern int hypfs_vm_create_files(struct dentry *root);
41 38
42/* debugfs interface */ 39/* debugfs interface */
43struct hypfs_dbfs_file; 40struct hypfs_dbfs_file;
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index bb5dd496614f..17ab8b7b53cc 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -105,7 +105,7 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df)
105int hypfs_dbfs_init(void) 105int hypfs_dbfs_init(void)
106{ 106{
107 dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); 107 dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
108 return PTR_RET(dbfs_dir); 108 return PTR_ERR_OR_ZERO(dbfs_dir);
109} 109}
110 110
111void hypfs_dbfs_exit(void) 111void hypfs_dbfs_exit(void)
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 138893e5f736..5eeffeefae06 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -623,8 +623,7 @@ void hypfs_diag_exit(void)
623 * ******************************************* 623 * *******************************************
624 */ 624 */
625 625
626static int hypfs_create_cpu_files(struct super_block *sb, 626static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
627 struct dentry *cpus_dir, void *cpu_info)
628{ 627{
629 struct dentry *cpu_dir; 628 struct dentry *cpu_dir;
630 char buffer[TMP_SIZE]; 629 char buffer[TMP_SIZE];
@@ -632,30 +631,29 @@ static int hypfs_create_cpu_files(struct super_block *sb,
632 631
633 snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type, 632 snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type,
634 cpu_info)); 633 cpu_info));
635 cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer); 634 cpu_dir = hypfs_mkdir(cpus_dir, buffer);
636 rc = hypfs_create_u64(sb, cpu_dir, "mgmtime", 635 rc = hypfs_create_u64(cpu_dir, "mgmtime",
637 cpu_info__acc_time(diag204_info_type, cpu_info) - 636 cpu_info__acc_time(diag204_info_type, cpu_info) -
638 cpu_info__lp_time(diag204_info_type, cpu_info)); 637 cpu_info__lp_time(diag204_info_type, cpu_info));
639 if (IS_ERR(rc)) 638 if (IS_ERR(rc))
640 return PTR_ERR(rc); 639 return PTR_ERR(rc);
641 rc = hypfs_create_u64(sb, cpu_dir, "cputime", 640 rc = hypfs_create_u64(cpu_dir, "cputime",
642 cpu_info__lp_time(diag204_info_type, cpu_info)); 641 cpu_info__lp_time(diag204_info_type, cpu_info));
643 if (IS_ERR(rc)) 642 if (IS_ERR(rc))
644 return PTR_ERR(rc); 643 return PTR_ERR(rc);
645 if (diag204_info_type == INFO_EXT) { 644 if (diag204_info_type == INFO_EXT) {
646 rc = hypfs_create_u64(sb, cpu_dir, "onlinetime", 645 rc = hypfs_create_u64(cpu_dir, "onlinetime",
647 cpu_info__online_time(diag204_info_type, 646 cpu_info__online_time(diag204_info_type,
648 cpu_info)); 647 cpu_info));
649 if (IS_ERR(rc)) 648 if (IS_ERR(rc))
650 return PTR_ERR(rc); 649 return PTR_ERR(rc);
651 } 650 }
652 diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); 651 diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
653 rc = hypfs_create_str(sb, cpu_dir, "type", buffer); 652 rc = hypfs_create_str(cpu_dir, "type", buffer);
654 return PTR_RET(rc); 653 return PTR_RET(rc);
655} 654}
656 655
657static void *hypfs_create_lpar_files(struct super_block *sb, 656static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
658 struct dentry *systems_dir, void *part_hdr)
659{ 657{
660 struct dentry *cpus_dir; 658 struct dentry *cpus_dir;
661 struct dentry *lpar_dir; 659 struct dentry *lpar_dir;
@@ -665,16 +663,16 @@ static void *hypfs_create_lpar_files(struct super_block *sb,
665 663
666 part_hdr__part_name(diag204_info_type, part_hdr, lpar_name); 664 part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
667 lpar_name[LPAR_NAME_LEN] = 0; 665 lpar_name[LPAR_NAME_LEN] = 0;
668 lpar_dir = hypfs_mkdir(sb, systems_dir, lpar_name); 666 lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
669 if (IS_ERR(lpar_dir)) 667 if (IS_ERR(lpar_dir))
670 return lpar_dir; 668 return lpar_dir;
671 cpus_dir = hypfs_mkdir(sb, lpar_dir, "cpus"); 669 cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
672 if (IS_ERR(cpus_dir)) 670 if (IS_ERR(cpus_dir))
673 return cpus_dir; 671 return cpus_dir;
674 cpu_info = part_hdr + part_hdr__size(diag204_info_type); 672 cpu_info = part_hdr + part_hdr__size(diag204_info_type);
675 for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) { 673 for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) {
676 int rc; 674 int rc;
677 rc = hypfs_create_cpu_files(sb, cpus_dir, cpu_info); 675 rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
678 if (rc) 676 if (rc)
679 return ERR_PTR(rc); 677 return ERR_PTR(rc);
680 cpu_info += cpu_info__size(diag204_info_type); 678 cpu_info += cpu_info__size(diag204_info_type);
@@ -682,8 +680,7 @@ static void *hypfs_create_lpar_files(struct super_block *sb,
682 return cpu_info; 680 return cpu_info;
683} 681}
684 682
685static int hypfs_create_phys_cpu_files(struct super_block *sb, 683static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
686 struct dentry *cpus_dir, void *cpu_info)
687{ 684{
688 struct dentry *cpu_dir; 685 struct dentry *cpu_dir;
689 char buffer[TMP_SIZE]; 686 char buffer[TMP_SIZE];
@@ -691,32 +688,31 @@ static int hypfs_create_phys_cpu_files(struct super_block *sb,
691 688
692 snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type, 689 snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type,
693 cpu_info)); 690 cpu_info));
694 cpu_dir = hypfs_mkdir(sb, cpus_dir, buffer); 691 cpu_dir = hypfs_mkdir(cpus_dir, buffer);
695 if (IS_ERR(cpu_dir)) 692 if (IS_ERR(cpu_dir))
696 return PTR_ERR(cpu_dir); 693 return PTR_ERR(cpu_dir);
697 rc = hypfs_create_u64(sb, cpu_dir, "mgmtime", 694 rc = hypfs_create_u64(cpu_dir, "mgmtime",
698 phys_cpu__mgm_time(diag204_info_type, cpu_info)); 695 phys_cpu__mgm_time(diag204_info_type, cpu_info));
699 if (IS_ERR(rc)) 696 if (IS_ERR(rc))
700 return PTR_ERR(rc); 697 return PTR_ERR(rc);
701 diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); 698 diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
702 rc = hypfs_create_str(sb, cpu_dir, "type", buffer); 699 rc = hypfs_create_str(cpu_dir, "type", buffer);
703 return PTR_RET(rc); 700 return PTR_RET(rc);
704} 701}
705 702
706static void *hypfs_create_phys_files(struct super_block *sb, 703static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
707 struct dentry *parent_dir, void *phys_hdr)
708{ 704{
709 int i; 705 int i;
710 void *cpu_info; 706 void *cpu_info;
711 struct dentry *cpus_dir; 707 struct dentry *cpus_dir;
712 708
713 cpus_dir = hypfs_mkdir(sb, parent_dir, "cpus"); 709 cpus_dir = hypfs_mkdir(parent_dir, "cpus");
714 if (IS_ERR(cpus_dir)) 710 if (IS_ERR(cpus_dir))
715 return cpus_dir; 711 return cpus_dir;
716 cpu_info = phys_hdr + phys_hdr__size(diag204_info_type); 712 cpu_info = phys_hdr + phys_hdr__size(diag204_info_type);
717 for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) { 713 for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) {
718 int rc; 714 int rc;
719 rc = hypfs_create_phys_cpu_files(sb, cpus_dir, cpu_info); 715 rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
720 if (rc) 716 if (rc)
721 return ERR_PTR(rc); 717 return ERR_PTR(rc);
722 cpu_info += phys_cpu__size(diag204_info_type); 718 cpu_info += phys_cpu__size(diag204_info_type);
@@ -724,7 +720,7 @@ static void *hypfs_create_phys_files(struct super_block *sb,
724 return cpu_info; 720 return cpu_info;
725} 721}
726 722
727int hypfs_diag_create_files(struct super_block *sb, struct dentry *root) 723int hypfs_diag_create_files(struct dentry *root)
728{ 724{
729 struct dentry *systems_dir, *hyp_dir; 725 struct dentry *systems_dir, *hyp_dir;
730 void *time_hdr, *part_hdr; 726 void *time_hdr, *part_hdr;
@@ -735,7 +731,7 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
735 if (IS_ERR(buffer)) 731 if (IS_ERR(buffer))
736 return PTR_ERR(buffer); 732 return PTR_ERR(buffer);
737 733
738 systems_dir = hypfs_mkdir(sb, root, "systems"); 734 systems_dir = hypfs_mkdir(root, "systems");
739 if (IS_ERR(systems_dir)) { 735 if (IS_ERR(systems_dir)) {
740 rc = PTR_ERR(systems_dir); 736 rc = PTR_ERR(systems_dir);
741 goto err_out; 737 goto err_out;
@@ -743,25 +739,25 @@ int hypfs_diag_create_files(struct super_block *sb, struct dentry *root)
743 time_hdr = (struct x_info_blk_hdr *)buffer; 739 time_hdr = (struct x_info_blk_hdr *)buffer;
744 part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type); 740 part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type);
745 for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) { 741 for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) {
746 part_hdr = hypfs_create_lpar_files(sb, systems_dir, part_hdr); 742 part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
747 if (IS_ERR(part_hdr)) { 743 if (IS_ERR(part_hdr)) {
748 rc = PTR_ERR(part_hdr); 744 rc = PTR_ERR(part_hdr);
749 goto err_out; 745 goto err_out;
750 } 746 }
751 } 747 }
752 if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) { 748 if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) {
753 ptr = hypfs_create_phys_files(sb, root, part_hdr); 749 ptr = hypfs_create_phys_files(root, part_hdr);
754 if (IS_ERR(ptr)) { 750 if (IS_ERR(ptr)) {
755 rc = PTR_ERR(ptr); 751 rc = PTR_ERR(ptr);
756 goto err_out; 752 goto err_out;
757 } 753 }
758 } 754 }
759 hyp_dir = hypfs_mkdir(sb, root, "hyp"); 755 hyp_dir = hypfs_mkdir(root, "hyp");
760 if (IS_ERR(hyp_dir)) { 756 if (IS_ERR(hyp_dir)) {
761 rc = PTR_ERR(hyp_dir); 757 rc = PTR_ERR(hyp_dir);
762 goto err_out; 758 goto err_out;
763 } 759 }
764 ptr = hypfs_create_str(sb, hyp_dir, "type", "LPAR Hypervisor"); 760 ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
765 if (IS_ERR(ptr)) { 761 if (IS_ERR(ptr)) {
766 rc = PTR_ERR(ptr); 762 rc = PTR_ERR(ptr);
767 goto err_out; 763 goto err_out;
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index f364dcf77e8e..24908ce149f1 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -107,16 +107,15 @@ static void diag2fc_free(const void *data)
107 vfree(data); 107 vfree(data);
108} 108}
109 109
110#define ATTRIBUTE(sb, dir, name, member) \ 110#define ATTRIBUTE(dir, name, member) \
111do { \ 111do { \
112 void *rc; \ 112 void *rc; \
113 rc = hypfs_create_u64(sb, dir, name, member); \ 113 rc = hypfs_create_u64(dir, name, member); \
114 if (IS_ERR(rc)) \ 114 if (IS_ERR(rc)) \
115 return PTR_ERR(rc); \ 115 return PTR_ERR(rc); \
116} while(0) 116} while(0)
117 117
118static int hpyfs_vm_create_guest(struct super_block *sb, 118static int hpyfs_vm_create_guest(struct dentry *systems_dir,
119 struct dentry *systems_dir,
120 struct diag2fc_data *data) 119 struct diag2fc_data *data)
121{ 120{
122 char guest_name[NAME_LEN + 1] = {}; 121 char guest_name[NAME_LEN + 1] = {};
@@ -130,46 +129,46 @@ static int hpyfs_vm_create_guest(struct super_block *sb,
130 memcpy(guest_name, data->guest_name, NAME_LEN); 129 memcpy(guest_name, data->guest_name, NAME_LEN);
131 EBCASC(guest_name, NAME_LEN); 130 EBCASC(guest_name, NAME_LEN);
132 strim(guest_name); 131 strim(guest_name);
133 guest_dir = hypfs_mkdir(sb, systems_dir, guest_name); 132 guest_dir = hypfs_mkdir(systems_dir, guest_name);
134 if (IS_ERR(guest_dir)) 133 if (IS_ERR(guest_dir))
135 return PTR_ERR(guest_dir); 134 return PTR_ERR(guest_dir);
136 ATTRIBUTE(sb, guest_dir, "onlinetime_us", data->el_time); 135 ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
137 136
138 /* logical cpu information */ 137 /* logical cpu information */
139 cpus_dir = hypfs_mkdir(sb, guest_dir, "cpus"); 138 cpus_dir = hypfs_mkdir(guest_dir, "cpus");
140 if (IS_ERR(cpus_dir)) 139 if (IS_ERR(cpus_dir))
141 return PTR_ERR(cpus_dir); 140 return PTR_ERR(cpus_dir);
142 ATTRIBUTE(sb, cpus_dir, "cputime_us", data->used_cpu); 141 ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
143 ATTRIBUTE(sb, cpus_dir, "capped", capped_value); 142 ATTRIBUTE(cpus_dir, "capped", capped_value);
144 ATTRIBUTE(sb, cpus_dir, "dedicated", dedicated_flag); 143 ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
145 ATTRIBUTE(sb, cpus_dir, "count", data->vcpus); 144 ATTRIBUTE(cpus_dir, "count", data->vcpus);
146 ATTRIBUTE(sb, cpus_dir, "weight_min", data->cpu_min); 145 ATTRIBUTE(cpus_dir, "weight_min", data->cpu_min);
147 ATTRIBUTE(sb, cpus_dir, "weight_max", data->cpu_max); 146 ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
148 ATTRIBUTE(sb, cpus_dir, "weight_cur", data->cpu_shares); 147 ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
149 148
150 /* memory information */ 149 /* memory information */
151 mem_dir = hypfs_mkdir(sb, guest_dir, "mem"); 150 mem_dir = hypfs_mkdir(guest_dir, "mem");
152 if (IS_ERR(mem_dir)) 151 if (IS_ERR(mem_dir))
153 return PTR_ERR(mem_dir); 152 return PTR_ERR(mem_dir);
154 ATTRIBUTE(sb, mem_dir, "min_KiB", data->mem_min_kb); 153 ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
155 ATTRIBUTE(sb, mem_dir, "max_KiB", data->mem_max_kb); 154 ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
156 ATTRIBUTE(sb, mem_dir, "used_KiB", data->mem_used_kb); 155 ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
157 ATTRIBUTE(sb, mem_dir, "share_KiB", data->mem_share_kb); 156 ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
158 157
159 /* samples */ 158 /* samples */
160 samples_dir = hypfs_mkdir(sb, guest_dir, "samples"); 159 samples_dir = hypfs_mkdir(guest_dir, "samples");
161 if (IS_ERR(samples_dir)) 160 if (IS_ERR(samples_dir))
162 return PTR_ERR(samples_dir); 161 return PTR_ERR(samples_dir);
163 ATTRIBUTE(sb, samples_dir, "cpu_using", data->cpu_use_samp); 162 ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
164 ATTRIBUTE(sb, samples_dir, "cpu_delay", data->cpu_delay_samp); 163 ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
165 ATTRIBUTE(sb, samples_dir, "mem_delay", data->page_wait_samp); 164 ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
166 ATTRIBUTE(sb, samples_dir, "idle", data->idle_samp); 165 ATTRIBUTE(samples_dir, "idle", data->idle_samp);
167 ATTRIBUTE(sb, samples_dir, "other", data->other_samp); 166 ATTRIBUTE(samples_dir, "other", data->other_samp);
168 ATTRIBUTE(sb, samples_dir, "total", data->total_samp); 167 ATTRIBUTE(samples_dir, "total", data->total_samp);
169 return 0; 168 return 0;
170} 169}
171 170
172int hypfs_vm_create_files(struct super_block *sb, struct dentry *root) 171int hypfs_vm_create_files(struct dentry *root)
173{ 172{
174 struct dentry *dir, *file; 173 struct dentry *dir, *file;
175 struct diag2fc_data *data; 174 struct diag2fc_data *data;
@@ -181,38 +180,38 @@ int hypfs_vm_create_files(struct super_block *sb, struct dentry *root)
181 return PTR_ERR(data); 180 return PTR_ERR(data);
182 181
183 /* Hpervisor Info */ 182 /* Hpervisor Info */
184 dir = hypfs_mkdir(sb, root, "hyp"); 183 dir = hypfs_mkdir(root, "hyp");
185 if (IS_ERR(dir)) { 184 if (IS_ERR(dir)) {
186 rc = PTR_ERR(dir); 185 rc = PTR_ERR(dir);
187 goto failed; 186 goto failed;
188 } 187 }
189 file = hypfs_create_str(sb, dir, "type", "z/VM Hypervisor"); 188 file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
190 if (IS_ERR(file)) { 189 if (IS_ERR(file)) {
191 rc = PTR_ERR(file); 190 rc = PTR_ERR(file);
192 goto failed; 191 goto failed;
193 } 192 }
194 193
195 /* physical cpus */ 194 /* physical cpus */
196 dir = hypfs_mkdir(sb, root, "cpus"); 195 dir = hypfs_mkdir(root, "cpus");
197 if (IS_ERR(dir)) { 196 if (IS_ERR(dir)) {
198 rc = PTR_ERR(dir); 197 rc = PTR_ERR(dir);
199 goto failed; 198 goto failed;
200 } 199 }
201 file = hypfs_create_u64(sb, dir, "count", data->lcpus); 200 file = hypfs_create_u64(dir, "count", data->lcpus);
202 if (IS_ERR(file)) { 201 if (IS_ERR(file)) {
203 rc = PTR_ERR(file); 202 rc = PTR_ERR(file);
204 goto failed; 203 goto failed;
205 } 204 }
206 205
207 /* guests */ 206 /* guests */
208 dir = hypfs_mkdir(sb, root, "systems"); 207 dir = hypfs_mkdir(root, "systems");
209 if (IS_ERR(dir)) { 208 if (IS_ERR(dir)) {
210 rc = PTR_ERR(dir); 209 rc = PTR_ERR(dir);
211 goto failed; 210 goto failed;
212 } 211 }
213 212
214 for (i = 0; i < count; i++) { 213 for (i = 0; i < count; i++) {
215 rc = hpyfs_vm_create_guest(sb, dir, &(data[i])); 214 rc = hpyfs_vm_create_guest(dir, &(data[i]));
216 if (rc) 215 if (rc)
217 goto failed; 216 goto failed;
218 } 217 }
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 7a539f4f5e30..ddfe09b45134 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -28,8 +28,7 @@
28#define HYPFS_MAGIC 0x687970 /* ASCII 'hyp' */ 28#define HYPFS_MAGIC 0x687970 /* ASCII 'hyp' */
29#define TMP_SIZE 64 /* size of temporary buffers */ 29#define TMP_SIZE 64 /* size of temporary buffers */
30 30
31static struct dentry *hypfs_create_update_file(struct super_block *sb, 31static struct dentry *hypfs_create_update_file(struct dentry *dir);
32 struct dentry *dir);
33 32
34struct hypfs_sb_info { 33struct hypfs_sb_info {
35 kuid_t uid; /* uid used for files and dirs */ 34 kuid_t uid; /* uid used for files and dirs */
@@ -193,9 +192,9 @@ static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov,
193 } 192 }
194 hypfs_delete_tree(sb->s_root); 193 hypfs_delete_tree(sb->s_root);
195 if (MACHINE_IS_VM) 194 if (MACHINE_IS_VM)
196 rc = hypfs_vm_create_files(sb, sb->s_root); 195 rc = hypfs_vm_create_files(sb->s_root);
197 else 196 else
198 rc = hypfs_diag_create_files(sb, sb->s_root); 197 rc = hypfs_diag_create_files(sb->s_root);
199 if (rc) { 198 if (rc) {
200 pr_err("Updating the hypfs tree failed\n"); 199 pr_err("Updating the hypfs tree failed\n");
201 hypfs_delete_tree(sb->s_root); 200 hypfs_delete_tree(sb->s_root);
@@ -302,12 +301,12 @@ static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
302 if (!root_dentry) 301 if (!root_dentry)
303 return -ENOMEM; 302 return -ENOMEM;
304 if (MACHINE_IS_VM) 303 if (MACHINE_IS_VM)
305 rc = hypfs_vm_create_files(sb, root_dentry); 304 rc = hypfs_vm_create_files(root_dentry);
306 else 305 else
307 rc = hypfs_diag_create_files(sb, root_dentry); 306 rc = hypfs_diag_create_files(root_dentry);
308 if (rc) 307 if (rc)
309 return rc; 308 return rc;
310 sbi->update_file = hypfs_create_update_file(sb, root_dentry); 309 sbi->update_file = hypfs_create_update_file(root_dentry);
311 if (IS_ERR(sbi->update_file)) 310 if (IS_ERR(sbi->update_file))
312 return PTR_ERR(sbi->update_file); 311 return PTR_ERR(sbi->update_file);
313 hypfs_update_update(sb); 312 hypfs_update_update(sb);
@@ -334,8 +333,7 @@ static void hypfs_kill_super(struct super_block *sb)
334 kill_litter_super(sb); 333 kill_litter_super(sb);
335} 334}
336 335
337static struct dentry *hypfs_create_file(struct super_block *sb, 336static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
338 struct dentry *parent, const char *name,
339 char *data, umode_t mode) 337 char *data, umode_t mode)
340{ 338{
341 struct dentry *dentry; 339 struct dentry *dentry;
@@ -347,7 +345,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb,
347 dentry = ERR_PTR(-ENOMEM); 345 dentry = ERR_PTR(-ENOMEM);
348 goto fail; 346 goto fail;
349 } 347 }
350 inode = hypfs_make_inode(sb, mode); 348 inode = hypfs_make_inode(parent->d_sb, mode);
351 if (!inode) { 349 if (!inode) {
352 dput(dentry); 350 dput(dentry);
353 dentry = ERR_PTR(-ENOMEM); 351 dentry = ERR_PTR(-ENOMEM);
@@ -373,24 +371,22 @@ fail:
373 return dentry; 371 return dentry;
374} 372}
375 373
376struct dentry *hypfs_mkdir(struct super_block *sb, struct dentry *parent, 374struct dentry *hypfs_mkdir(struct dentry *parent, const char *name)
377 const char *name)
378{ 375{
379 struct dentry *dentry; 376 struct dentry *dentry;
380 377
381 dentry = hypfs_create_file(sb, parent, name, NULL, S_IFDIR | DIR_MODE); 378 dentry = hypfs_create_file(parent, name, NULL, S_IFDIR | DIR_MODE);
382 if (IS_ERR(dentry)) 379 if (IS_ERR(dentry))
383 return dentry; 380 return dentry;
384 hypfs_add_dentry(dentry); 381 hypfs_add_dentry(dentry);
385 return dentry; 382 return dentry;
386} 383}
387 384
388static struct dentry *hypfs_create_update_file(struct super_block *sb, 385static struct dentry *hypfs_create_update_file(struct dentry *dir)
389 struct dentry *dir)
390{ 386{
391 struct dentry *dentry; 387 struct dentry *dentry;
392 388
393 dentry = hypfs_create_file(sb, dir, "update", NULL, 389 dentry = hypfs_create_file(dir, "update", NULL,
394 S_IFREG | UPDATE_FILE_MODE); 390 S_IFREG | UPDATE_FILE_MODE);
395 /* 391 /*
396 * We do not put the update file on the 'delete' list with 392 * We do not put the update file on the 'delete' list with
@@ -400,7 +396,7 @@ static struct dentry *hypfs_create_update_file(struct super_block *sb,
400 return dentry; 396 return dentry;
401} 397}
402 398
403struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir, 399struct dentry *hypfs_create_u64(struct dentry *dir,
404 const char *name, __u64 value) 400 const char *name, __u64 value)
405{ 401{
406 char *buffer; 402 char *buffer;
@@ -412,7 +408,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
412 if (!buffer) 408 if (!buffer)
413 return ERR_PTR(-ENOMEM); 409 return ERR_PTR(-ENOMEM);
414 dentry = 410 dentry =
415 hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE); 411 hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
416 if (IS_ERR(dentry)) { 412 if (IS_ERR(dentry)) {
417 kfree(buffer); 413 kfree(buffer);
418 return ERR_PTR(-ENOMEM); 414 return ERR_PTR(-ENOMEM);
@@ -421,7 +417,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir,
421 return dentry; 417 return dentry;
422} 418}
423 419
424struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir, 420struct dentry *hypfs_create_str(struct dentry *dir,
425 const char *name, char *string) 421 const char *name, char *string)
426{ 422{
427 char *buffer; 423 char *buffer;
@@ -432,7 +428,7 @@ struct dentry *hypfs_create_str(struct super_block *sb, struct dentry *dir,
432 return ERR_PTR(-ENOMEM); 428 return ERR_PTR(-ENOMEM);
433 sprintf(buffer, "%s\n", string); 429 sprintf(buffer, "%s\n", string);
434 dentry = 430 dentry =
435 hypfs_create_file(sb, dir, name, buffer, S_IFREG | REG_FILE_MODE); 431 hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
436 if (IS_ERR(dentry)) { 432 if (IS_ERR(dentry)) {
437 kfree(buffer); 433 kfree(buffer);
438 return ERR_PTR(-ENOMEM); 434 return ERR_PTR(-ENOMEM);
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index 4066cee0c2d2..4bbb5957ed1b 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -9,6 +9,8 @@
9#ifndef _ASM_S390_AIRQ_H 9#ifndef _ASM_S390_AIRQ_H
10#define _ASM_S390_AIRQ_H 10#define _ASM_S390_AIRQ_H
11 11
12#include <linux/bit_spinlock.h>
13
12struct airq_struct { 14struct airq_struct {
13 struct hlist_node list; /* Handler queueing. */ 15 struct hlist_node list; /* Handler queueing. */
14 void (*handler)(struct airq_struct *); /* Thin-interrupt handler */ 16 void (*handler)(struct airq_struct *); /* Thin-interrupt handler */
@@ -23,4 +25,69 @@ struct airq_struct {
23int register_adapter_interrupt(struct airq_struct *airq); 25int register_adapter_interrupt(struct airq_struct *airq);
24void unregister_adapter_interrupt(struct airq_struct *airq); 26void unregister_adapter_interrupt(struct airq_struct *airq);
25 27
28/* Adapter interrupt bit vector */
29struct airq_iv {
30 unsigned long *vector; /* Adapter interrupt bit vector */
31 unsigned long *avail; /* Allocation bit mask for the bit vector */
32 unsigned long *bitlock; /* Lock bit mask for the bit vector */
33 unsigned long *ptr; /* Pointer associated with each bit */
34 unsigned int *data; /* 32 bit value associated with each bit */
35 unsigned long bits; /* Number of bits in the vector */
36 unsigned long end; /* Number of highest allocated bit + 1 */
37 spinlock_t lock; /* Lock to protect alloc & free */
38};
39
40#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */
41#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */
42#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
43#define AIRQ_IV_DATA 8 /* Allocate the data array */
44
45struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
46void airq_iv_release(struct airq_iv *iv);
47unsigned long airq_iv_alloc_bit(struct airq_iv *iv);
48void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit);
49unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
50 unsigned long end);
51
52static inline unsigned long airq_iv_end(struct airq_iv *iv)
53{
54 return iv->end;
55}
56
57static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit)
58{
59 const unsigned long be_to_le = BITS_PER_LONG - 1;
60 bit_spin_lock(bit ^ be_to_le, iv->bitlock);
61}
62
63static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit)
64{
65 const unsigned long be_to_le = BITS_PER_LONG - 1;
66 bit_spin_unlock(bit ^ be_to_le, iv->bitlock);
67}
68
69static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit,
70 unsigned int data)
71{
72 iv->data[bit] = data;
73}
74
75static inline unsigned int airq_iv_get_data(struct airq_iv *iv,
76 unsigned long bit)
77{
78 return iv->data[bit];
79}
80
81static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit,
82 unsigned long ptr)
83{
84 iv->ptr[bit] = ptr;
85}
86
87static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv,
88 unsigned long bit)
89{
90 return iv->ptr[bit];
91}
92
26#endif /* _ASM_S390_AIRQ_H */ 93#endif /* _ASM_S390_AIRQ_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 7d4676758733..10135a38673c 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -216,7 +216,7 @@ static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr)
216 addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); 216 addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
217 asm volatile( 217 asm volatile(
218 " oc %O0(1,%R0),%1" 218 " oc %O0(1,%R0),%1"
219 : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); 219 : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc");
220} 220}
221 221
222static inline void 222static inline void
@@ -244,7 +244,7 @@ __clear_bit(unsigned long nr, volatile unsigned long *ptr)
244 addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); 244 addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
245 asm volatile( 245 asm volatile(
246 " nc %O0(1,%R0),%1" 246 " nc %O0(1,%R0),%1"
247 : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc" ); 247 : "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc");
248} 248}
249 249
250static inline void 250static inline void
@@ -271,7 +271,7 @@ static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr)
271 addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); 271 addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
272 asm volatile( 272 asm volatile(
273 " xc %O0(1,%R0),%1" 273 " xc %O0(1,%R0),%1"
274 : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); 274 : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc");
275} 275}
276 276
277static inline void 277static inline void
@@ -301,7 +301,7 @@ test_and_set_bit_simple(unsigned long nr, volatile unsigned long *ptr)
301 ch = *(unsigned char *) addr; 301 ch = *(unsigned char *) addr;
302 asm volatile( 302 asm volatile(
303 " oc %O0(1,%R0),%1" 303 " oc %O0(1,%R0),%1"
304 : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) 304 : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7])
305 : "cc", "memory"); 305 : "cc", "memory");
306 return (ch >> (nr & 7)) & 1; 306 return (ch >> (nr & 7)) & 1;
307} 307}
@@ -320,7 +320,7 @@ test_and_clear_bit_simple(unsigned long nr, volatile unsigned long *ptr)
320 ch = *(unsigned char *) addr; 320 ch = *(unsigned char *) addr;
321 asm volatile( 321 asm volatile(
322 " nc %O0(1,%R0),%1" 322 " nc %O0(1,%R0),%1"
323 : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) 323 : "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7])
324 : "cc", "memory"); 324 : "cc", "memory");
325 return (ch >> (nr & 7)) & 1; 325 return (ch >> (nr & 7)) & 1;
326} 326}
@@ -339,7 +339,7 @@ test_and_change_bit_simple(unsigned long nr, volatile unsigned long *ptr)
339 ch = *(unsigned char *) addr; 339 ch = *(unsigned char *) addr;
340 asm volatile( 340 asm volatile(
341 " xc %O0(1,%R0),%1" 341 " xc %O0(1,%R0),%1"
342 : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) 342 : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7])
343 : "cc", "memory"); 343 : "cc", "memory");
344 return (ch >> (nr & 7)) & 1; 344 return (ch >> (nr & 7)) & 1;
345} 345}
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index ffb898961c8d..d42625053c37 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -296,6 +296,7 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1,
296 return 0; 296 return 0;
297} 297}
298 298
299void channel_subsystem_reinit(void);
299extern void css_schedule_reprobe(void); 300extern void css_schedule_reprobe(void);
300 301
301extern void reipl_ccw_dev(struct ccw_dev_id *id); 302extern void reipl_ccw_dev(struct ccw_dev_id *id);
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index d2ff41370c0c..f65bd3634519 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -13,9 +13,6 @@
13#include <asm/div64.h> 13#include <asm/div64.h>
14 14
15 15
16#define __ARCH_HAS_VTIME_ACCOUNT
17#define __ARCH_HAS_VTIME_TASK_SWITCH
18
19/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ 16/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
20 17
21typedef unsigned long long __nocast cputime_t; 18typedef unsigned long long __nocast cputime_t;
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index 0c82ba86e997..a908d2941c5d 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -20,4 +20,9 @@
20 20
21#define HARDIRQ_BITS 8 21#define HARDIRQ_BITS 8
22 22
23static inline void ack_bad_irq(unsigned int irq)
24{
25 printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
26}
27
23#endif /* __ASM_HARDIRQ_H */ 28#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index bd90359d6d22..11eae5f55b70 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -17,6 +17,9 @@
17 17
18void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 18void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
19 pte_t *ptep, pte_t pte); 19 pte_t *ptep, pte_t pte);
20pte_t huge_ptep_get(pte_t *ptep);
21pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
22 unsigned long addr, pte_t *ptep);
20 23
21/* 24/*
22 * If the arch doesn't supply something else, assume that hugepage 25 * If the arch doesn't supply something else, assume that hugepage
@@ -38,147 +41,75 @@ static inline int prepare_hugepage_range(struct file *file,
38int arch_prepare_hugepage(struct page *page); 41int arch_prepare_hugepage(struct page *page);
39void arch_release_hugepage(struct page *page); 42void arch_release_hugepage(struct page *page);
40 43
41static inline pte_t huge_pte_wrprotect(pte_t pte) 44static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
45 pte_t *ptep)
42{ 46{
43 pte_val(pte) |= _PAGE_RO; 47 pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
44 return pte;
45} 48}
46 49
47static inline int huge_pte_none(pte_t pte) 50static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
51 unsigned long address, pte_t *ptep)
48{ 52{
49 return (pte_val(pte) & _SEGMENT_ENTRY_INV) && 53 huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
50 !(pte_val(pte) & _SEGMENT_ENTRY_RO);
51} 54}
52 55
53static inline pte_t huge_ptep_get(pte_t *ptep) 56static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
57 unsigned long addr, pte_t *ptep,
58 pte_t pte, int dirty)
54{ 59{
55 pte_t pte = *ptep; 60 int changed = !pte_same(huge_ptep_get(ptep), pte);
56 unsigned long mask; 61 if (changed) {
57 62 huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
58 if (!MACHINE_HAS_HPAGE) { 63 set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
59 ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN);
60 if (ptep) {
61 mask = pte_val(pte) &
62 (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
63 pte = pte_mkhuge(*ptep);
64 pte_val(pte) |= mask;
65 }
66 } 64 }
67 return pte; 65 return changed;
68} 66}
69 67
70static inline void __pmd_csp(pmd_t *pmdp) 68static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
69 unsigned long addr, pte_t *ptep)
71{ 70{
72 register unsigned long reg2 asm("2") = pmd_val(*pmdp); 71 pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
73 register unsigned long reg3 asm("3") = pmd_val(*pmdp) | 72 set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
74 _SEGMENT_ENTRY_INV;
75 register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
76
77 asm volatile(
78 " csp %1,%3"
79 : "=m" (*pmdp)
80 : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
81} 73}
82 74
83static inline void huge_ptep_invalidate(struct mm_struct *mm, 75static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
84 unsigned long address, pte_t *ptep)
85{
86 pmd_t *pmdp = (pmd_t *) ptep;
87
88 if (MACHINE_HAS_IDTE)
89 __pmd_idte(address, pmdp);
90 else
91 __pmd_csp(pmdp);
92 pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY;
93}
94
95static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
96 unsigned long addr, pte_t *ptep)
97{
98 pte_t pte = huge_ptep_get(ptep);
99
100 huge_ptep_invalidate(mm, addr, ptep);
101 return pte;
102}
103
104#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
105({ \
106 int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \
107 if (__changed) { \
108 huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \
109 set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \
110 } \
111 __changed; \
112})
113
114#define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \
115({ \
116 pte_t __pte = huge_ptep_get(__ptep); \
117 if (huge_pte_write(__pte)) { \
118 huge_ptep_invalidate(__mm, __addr, __ptep); \
119 set_huge_pte_at(__mm, __addr, __ptep, \
120 huge_pte_wrprotect(__pte)); \
121 } \
122})
123
124static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
125 unsigned long address, pte_t *ptep)
126{ 76{
127 huge_ptep_invalidate(vma->vm_mm, address, ptep); 77 return mk_pte(page, pgprot);
128} 78}
129 79
130static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) 80static inline int huge_pte_none(pte_t pte)
131{ 81{
132 pte_t pte; 82 return pte_none(pte);
133 pmd_t pmd;
134
135 pmd = mk_pmd_phys(page_to_phys(page), pgprot);
136 pte_val(pte) = pmd_val(pmd);
137 return pte;
138} 83}
139 84
140static inline int huge_pte_write(pte_t pte) 85static inline int huge_pte_write(pte_t pte)
141{ 86{
142 pmd_t pmd; 87 return pte_write(pte);
143
144 pmd_val(pmd) = pte_val(pte);
145 return pmd_write(pmd);
146} 88}
147 89
148static inline int huge_pte_dirty(pte_t pte) 90static inline int huge_pte_dirty(pte_t pte)
149{ 91{
150 /* No dirty bit in the segment table entry. */ 92 return pte_dirty(pte);
151 return 0;
152} 93}
153 94
154static inline pte_t huge_pte_mkwrite(pte_t pte) 95static inline pte_t huge_pte_mkwrite(pte_t pte)
155{ 96{
156 pmd_t pmd; 97 return pte_mkwrite(pte);
157
158 pmd_val(pmd) = pte_val(pte);
159 pte_val(pte) = pmd_val(pmd_mkwrite(pmd));
160 return pte;
161} 98}
162 99
163static inline pte_t huge_pte_mkdirty(pte_t pte) 100static inline pte_t huge_pte_mkdirty(pte_t pte)
164{ 101{
165 /* No dirty bit in the segment table entry. */ 102 return pte_mkdirty(pte);
166 return pte;
167} 103}
168 104
169static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) 105static inline pte_t huge_pte_wrprotect(pte_t pte)
170{ 106{
171 pmd_t pmd; 107 return pte_wrprotect(pte);
172
173 pmd_val(pmd) = pte_val(pte);
174 pte_val(pte) = pmd_val(pmd_modify(pmd, newprot));
175 return pte;
176} 108}
177 109
178static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, 110static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
179 pte_t *ptep)
180{ 111{
181 pmd_clear((pmd_t *) ptep); 112 return pte_modify(pte, newprot);
182} 113}
183 114
184#endif /* _ASM_S390_HUGETLB_H */ 115#endif /* _ASM_S390_HUGETLB_H */
diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
index 7e3d2586c1ff..ee96a8b697f9 100644
--- a/arch/s390/include/asm/hw_irq.h
+++ b/arch/s390/include/asm/hw_irq.h
@@ -4,19 +4,8 @@
4#include <linux/msi.h> 4#include <linux/msi.h>
5#include <linux/pci.h> 5#include <linux/pci.h>
6 6
7static inline struct msi_desc *irq_get_msi_desc(unsigned int irq) 7void __init init_airq_interrupts(void);
8{ 8void __init init_cio_interrupts(void);
9 return __irq_get_msi_desc(irq); 9void __init init_ext_interrupts(void);
10}
11
12/* Must be called with msi map lock held */
13static inline int irq_set_msi_desc(unsigned int irq, struct msi_desc *msi)
14{
15 if (!msi)
16 return -EINVAL;
17
18 msi->irq = irq;
19 return 0;
20}
21 10
22#endif 11#endif
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 87c17bfb2968..1eaa3625803c 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -1,17 +1,28 @@
1#ifndef _ASM_IRQ_H 1#ifndef _ASM_IRQ_H
2#define _ASM_IRQ_H 2#define _ASM_IRQ_H
3 3
4#define EXT_INTERRUPT 1
5#define IO_INTERRUPT 2
6#define THIN_INTERRUPT 3
7
8#define NR_IRQS_BASE 4
9
10#ifdef CONFIG_PCI_NR_MSI
11# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
12#else
13# define NR_IRQS NR_IRQS_BASE
14#endif
15
16/* This number is used when no interrupt has been assigned */
17#define NO_IRQ 0
18
19#ifndef __ASSEMBLY__
20
4#include <linux/hardirq.h> 21#include <linux/hardirq.h>
5#include <linux/percpu.h> 22#include <linux/percpu.h>
6#include <linux/cache.h> 23#include <linux/cache.h>
7#include <linux/types.h> 24#include <linux/types.h>
8 25
9enum interruption_main_class {
10 EXTERNAL_INTERRUPT,
11 IO_INTERRUPT,
12 NR_IRQS
13};
14
15enum interruption_class { 26enum interruption_class {
16 IRQEXT_CLK, 27 IRQEXT_CLK,
17 IRQEXT_EXC, 28 IRQEXT_EXC,
@@ -72,14 +83,8 @@ void service_subclass_irq_unregister(void);
72void measurement_alert_subclass_register(void); 83void measurement_alert_subclass_register(void);
73void measurement_alert_subclass_unregister(void); 84void measurement_alert_subclass_unregister(void);
74 85
75#ifdef CONFIG_LOCKDEP 86#define irq_canonicalize(irq) (irq)
76# define disable_irq_nosync_lockdep(irq) disable_irq_nosync(irq) 87
77# define disable_irq_nosync_lockdep_irqsave(irq, flags) \ 88#endif /* __ASSEMBLY__ */
78 disable_irq_nosync(irq)
79# define disable_irq_lockdep(irq) disable_irq(irq)
80# define enable_irq_lockdep(irq) enable_irq(irq)
81# define enable_irq_lockdep_irqrestore(irq, flags) \
82 enable_irq(irq)
83#endif
84 89
85#endif /* _ASM_IRQ_H */ 90#endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3238d4004e84..e87ecaa2c569 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -274,6 +274,14 @@ struct kvm_arch{
274 int css_support; 274 int css_support;
275}; 275};
276 276
277#define KVM_HVA_ERR_BAD (-1UL)
278#define KVM_HVA_ERR_RO_BAD (-2UL)
279
280static inline bool kvm_is_error_hva(unsigned long addr)
281{
282 return IS_ERR_VALUE(addr);
283}
284
277extern int sie64a(struct kvm_s390_sie_block *, u64 *); 285extern int sie64a(struct kvm_s390_sie_block *, u64 *);
278extern char sie_exit; 286extern char sie_exit;
279#endif 287#endif
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 6340178748bf..ff132ac64ddd 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -12,8 +12,6 @@ typedef struct {
12 unsigned long asce_bits; 12 unsigned long asce_bits;
13 unsigned long asce_limit; 13 unsigned long asce_limit;
14 unsigned long vdso_base; 14 unsigned long vdso_base;
15 /* Cloned contexts will be created with extended page tables. */
16 unsigned int alloc_pgste:1;
17 /* The mmu context has extended page tables. */ 15 /* The mmu context has extended page tables. */
18 unsigned int has_pgste:1; 16 unsigned int has_pgste:1;
19} mm_context_t; 17} mm_context_t;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 084e7755ed9b..9f973d8de90e 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -21,24 +21,7 @@ static inline int init_new_context(struct task_struct *tsk,
21#ifdef CONFIG_64BIT 21#ifdef CONFIG_64BIT
22 mm->context.asce_bits |= _ASCE_TYPE_REGION3; 22 mm->context.asce_bits |= _ASCE_TYPE_REGION3;
23#endif 23#endif
24 if (current->mm && current->mm->context.alloc_pgste) { 24 mm->context.has_pgste = 0;
25 /*
26 * alloc_pgste indicates, that any NEW context will be created
27 * with extended page tables. The old context is unchanged. The
28 * page table allocation and the page table operations will
29 * look at has_pgste to distinguish normal and extended page
30 * tables. The only way to create extended page tables is to
31 * set alloc_pgste and then create a new context (e.g. dup_mm).
32 * The page table allocation is called after init_new_context
33 * and if has_pgste is set, it will create extended page
34 * tables.
35 */
36 mm->context.has_pgste = 1;
37 mm->context.alloc_pgste = 1;
38 } else {
39 mm->context.has_pgste = 0;
40 mm->context.alloc_pgste = 0;
41 }
42 mm->context.asce_limit = STACK_TOP_MAX; 25 mm->context.asce_limit = STACK_TOP_MAX;
43 crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); 26 crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
44 return 0; 27 return 0;
@@ -77,8 +60,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
77 WARN_ON(atomic_read(&prev->context.attach_count) < 0); 60 WARN_ON(atomic_read(&prev->context.attach_count) < 0);
78 atomic_inc(&next->context.attach_count); 61 atomic_inc(&next->context.attach_count);
79 /* Check for TLBs not flushed yet */ 62 /* Check for TLBs not flushed yet */
80 if (next->context.flush_mm) 63 __tlb_flush_mm_lazy(next);
81 __tlb_flush_mm(next);
82} 64}
83 65
84#define enter_lazy_tlb(mm,tsk) do { } while (0) 66#define enter_lazy_tlb(mm,tsk) do { } while (0)
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 5d64fb7619cc..1e51f2915b2e 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -32,16 +32,6 @@
32 32
33void storage_key_init_range(unsigned long start, unsigned long end); 33void storage_key_init_range(unsigned long start, unsigned long end);
34 34
35static inline unsigned long pfmf(unsigned long function, unsigned long address)
36{
37 asm volatile(
38 " .insn rre,0xb9af0000,%[function],%[address]"
39 : [address] "+a" (address)
40 : [function] "d" (function)
41 : "memory");
42 return address;
43}
44
45static inline void clear_page(void *page) 35static inline void clear_page(void *page)
46{ 36{
47 register unsigned long reg1 asm ("1") = 0; 37 register unsigned long reg1 asm ("1") = 0;
@@ -150,15 +140,6 @@ static inline int page_reset_referenced(unsigned long addr)
150#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */ 140#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */
151#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ 141#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */
152 142
153/*
154 * Test and clear referenced bit in storage key.
155 */
156#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
157static inline int page_test_and_clear_young(unsigned long pfn)
158{
159 return page_reset_referenced(pfn << PAGE_SHIFT);
160}
161
162struct page; 143struct page;
163void arch_free_page(struct page *page, int order); 144void arch_free_page(struct page *page, int order);
164void arch_alloc_page(struct page *page, int order); 145void arch_alloc_page(struct page *page, int order);
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 262b91bb8811..1cc185da9d38 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -6,6 +6,7 @@
6/* must be set before including pci_clp.h */ 6/* must be set before including pci_clp.h */
7#define PCI_BAR_COUNT 6 7#define PCI_BAR_COUNT 6
8 8
9#include <linux/pci.h>
9#include <asm-generic/pci.h> 10#include <asm-generic/pci.h>
10#include <asm-generic/pci-dma-compat.h> 11#include <asm-generic/pci-dma-compat.h>
11#include <asm/pci_clp.h> 12#include <asm/pci_clp.h>
@@ -49,14 +50,9 @@ struct zpci_fmb {
49 atomic64_t unmapped_pages; 50 atomic64_t unmapped_pages;
50} __packed __aligned(16); 51} __packed __aligned(16);
51 52
52struct msi_map { 53#define ZPCI_MSI_VEC_BITS 11
53 unsigned long irq; 54#define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS)
54 struct msi_desc *msi; 55#define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1)
55 struct hlist_node msi_chain;
56};
57
58#define ZPCI_NR_MSI_VECS 64
59#define ZPCI_MSI_MASK (ZPCI_NR_MSI_VECS - 1)
60 56
61enum zpci_state { 57enum zpci_state {
62 ZPCI_FN_STATE_RESERVED, 58 ZPCI_FN_STATE_RESERVED,
@@ -87,8 +83,7 @@ struct zpci_dev {
87 83
88 /* IRQ stuff */ 84 /* IRQ stuff */
89 u64 msi_addr; /* MSI address */ 85 u64 msi_addr; /* MSI address */
90 struct zdev_irq_map *irq_map; 86 struct airq_iv *aibv; /* adapter interrupt bit vector */
91 struct msi_map *msi_map[ZPCI_NR_MSI_VECS];
92 unsigned int aisb; /* number of the summary bit */ 87 unsigned int aisb; /* number of the summary bit */
93 88
94 /* DMA stuff */ 89 /* DMA stuff */
@@ -118,11 +113,6 @@ struct zpci_dev {
118 struct dentry *debugfs_perf; 113 struct dentry *debugfs_perf;
119}; 114};
120 115
121struct pci_hp_callback_ops {
122 int (*create_slot) (struct zpci_dev *zdev);
123 void (*remove_slot) (struct zpci_dev *zdev);
124};
125
126static inline bool zdev_enabled(struct zpci_dev *zdev) 116static inline bool zdev_enabled(struct zpci_dev *zdev)
127{ 117{
128 return (zdev->fh & (1UL << 31)) ? true : false; 118 return (zdev->fh & (1UL << 31)) ? true : false;
@@ -142,32 +132,38 @@ int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
142int zpci_unregister_ioat(struct zpci_dev *, u8); 132int zpci_unregister_ioat(struct zpci_dev *, u8);
143 133
144/* CLP */ 134/* CLP */
145int clp_find_pci_devices(void); 135int clp_scan_pci_devices(void);
136int clp_rescan_pci_devices(void);
137int clp_rescan_pci_devices_simple(void);
146int clp_add_pci_device(u32, u32, int); 138int clp_add_pci_device(u32, u32, int);
147int clp_enable_fh(struct zpci_dev *, u8); 139int clp_enable_fh(struct zpci_dev *, u8);
148int clp_disable_fh(struct zpci_dev *); 140int clp_disable_fh(struct zpci_dev *);
149 141
150/* MSI */
151struct msi_desc *__irq_get_msi_desc(unsigned int);
152int zpci_msi_set_mask_bits(struct msi_desc *, u32, u32);
153int zpci_setup_msi_irq(struct zpci_dev *, struct msi_desc *, unsigned int, int);
154void zpci_teardown_msi_irq(struct zpci_dev *, struct msi_desc *);
155int zpci_msihash_init(void);
156void zpci_msihash_exit(void);
157
158#ifdef CONFIG_PCI 142#ifdef CONFIG_PCI
159/* Error handling and recovery */ 143/* Error handling and recovery */
160void zpci_event_error(void *); 144void zpci_event_error(void *);
161void zpci_event_availability(void *); 145void zpci_event_availability(void *);
146void zpci_rescan(void);
162#else /* CONFIG_PCI */ 147#else /* CONFIG_PCI */
163static inline void zpci_event_error(void *e) {} 148static inline void zpci_event_error(void *e) {}
164static inline void zpci_event_availability(void *e) {} 149static inline void zpci_event_availability(void *e) {}
150static inline void zpci_rescan(void) {}
165#endif /* CONFIG_PCI */ 151#endif /* CONFIG_PCI */
166 152
153#ifdef CONFIG_HOTPLUG_PCI_S390
154int zpci_init_slot(struct zpci_dev *);
155void zpci_exit_slot(struct zpci_dev *);
156#else /* CONFIG_HOTPLUG_PCI_S390 */
157static inline int zpci_init_slot(struct zpci_dev *zdev)
158{
159 return 0;
160}
161static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
162#endif /* CONFIG_HOTPLUG_PCI_S390 */
163
167/* Helpers */ 164/* Helpers */
168struct zpci_dev *get_zdev(struct pci_dev *); 165struct zpci_dev *get_zdev(struct pci_dev *);
169struct zpci_dev *get_zdev_by_fid(u32); 166struct zpci_dev *get_zdev_by_fid(u32);
170bool zpci_fid_present(u32);
171 167
172/* sysfs */ 168/* sysfs */
173int zpci_sysfs_add_device(struct device *); 169int zpci_sysfs_add_device(struct device *);
@@ -177,14 +173,6 @@ void zpci_sysfs_remove_device(struct device *);
177int zpci_dma_init(void); 173int zpci_dma_init(void);
178void zpci_dma_exit(void); 174void zpci_dma_exit(void);
179 175
180/* Hotplug */
181extern struct mutex zpci_list_lock;
182extern struct list_head zpci_list;
183extern unsigned int s390_pci_probe;
184
185void zpci_register_hp_ops(struct pci_hp_callback_ops *);
186void zpci_deregister_hp_ops(void);
187
188/* FMB */ 176/* FMB */
189int zpci_fmb_enable_device(struct zpci_dev *); 177int zpci_fmb_enable_device(struct zpci_dev *);
190int zpci_fmb_disable_device(struct zpci_dev *); 178int zpci_fmb_disable_device(struct zpci_dev *);
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index e6a2bdd4d705..df6eac9f0cb4 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -79,11 +79,11 @@ struct zpci_fib {
79} __packed; 79} __packed;
80 80
81 81
82int s390pci_mod_fc(u64 req, struct zpci_fib *fib); 82int zpci_mod_fc(u64 req, struct zpci_fib *fib);
83int s390pci_refresh_trans(u64 fn, u64 addr, u64 range); 83int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
84int s390pci_load(u64 *data, u64 req, u64 offset); 84int zpci_load(u64 *data, u64 req, u64 offset);
85int s390pci_store(u64 data, u64 req, u64 offset); 85int zpci_store(u64 data, u64 req, u64 offset);
86int s390pci_store_block(const u64 *data, u64 req, u64 offset); 86int zpci_store_block(const u64 *data, u64 req, u64 offset);
87void set_irq_ctrl(u16 ctl, char *unused, u8 isc); 87void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
88 88
89#endif 89#endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index 83a9caa6ae53..d194d544d694 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -36,7 +36,7 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \
36 u64 data; \ 36 u64 data; \
37 int rc; \ 37 int rc; \
38 \ 38 \
39 rc = s390pci_load(&data, req, ZPCI_OFFSET(addr)); \ 39 rc = zpci_load(&data, req, ZPCI_OFFSET(addr)); \
40 if (rc) \ 40 if (rc) \
41 data = -1ULL; \ 41 data = -1ULL; \
42 return (RETTYPE) data; \ 42 return (RETTYPE) data; \
@@ -50,7 +50,7 @@ static inline void zpci_write_##VALTYPE(VALTYPE val, \
50 u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \ 50 u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \
51 u64 data = (VALTYPE) val; \ 51 u64 data = (VALTYPE) val; \
52 \ 52 \
53 s390pci_store(data, req, ZPCI_OFFSET(addr)); \ 53 zpci_store(data, req, ZPCI_OFFSET(addr)); \
54} 54}
55 55
56zpci_read(8, u64) 56zpci_read(8, u64)
@@ -83,7 +83,7 @@ static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len
83 val = 0; /* let FW report error */ 83 val = 0; /* let FW report error */
84 break; 84 break;
85 } 85 }
86 return s390pci_store(val, req, offset); 86 return zpci_store(val, req, offset);
87} 87}
88 88
89static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) 89static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
@@ -91,7 +91,7 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
91 u64 data; 91 u64 data;
92 int cc; 92 int cc;
93 93
94 cc = s390pci_load(&data, req, offset); 94 cc = zpci_load(&data, req, offset);
95 if (cc) 95 if (cc)
96 goto out; 96 goto out;
97 97
@@ -115,7 +115,7 @@ out:
115 115
116static inline int zpci_write_block(u64 req, const u64 *data, u64 offset) 116static inline int zpci_write_block(u64 req, const u64 *data, u64 offset)
117{ 117{
118 return s390pci_store_block(data, req, offset); 118 return zpci_store_block(data, req, offset);
119} 119}
120 120
121static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max) 121static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 75fb726de91f..9b60a36c348d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -217,63 +217,57 @@ extern unsigned long MODULES_END;
217 217
218/* Hardware bits in the page table entry */ 218/* Hardware bits in the page table entry */
219#define _PAGE_CO 0x100 /* HW Change-bit override */ 219#define _PAGE_CO 0x100 /* HW Change-bit override */
220#define _PAGE_RO 0x200 /* HW read-only bit */ 220#define _PAGE_PROTECT 0x200 /* HW read-only bit */
221#define _PAGE_INVALID 0x400 /* HW invalid bit */ 221#define _PAGE_INVALID 0x400 /* HW invalid bit */
222#define _PAGE_LARGE 0x800 /* Bit to mark a large pte */
222 223
223/* Software bits in the page table entry */ 224/* Software bits in the page table entry */
224#define _PAGE_SWT 0x001 /* SW pte type bit t */ 225#define _PAGE_PRESENT 0x001 /* SW pte present bit */
225#define _PAGE_SWX 0x002 /* SW pte type bit x */ 226#define _PAGE_TYPE 0x002 /* SW pte type bit */
226#define _PAGE_SWC 0x004 /* SW pte changed bit */ 227#define _PAGE_YOUNG 0x004 /* SW pte young bit */
227#define _PAGE_SWR 0x008 /* SW pte referenced bit */ 228#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */
228#define _PAGE_SWW 0x010 /* SW pte write bit */ 229#define _PAGE_READ 0x010 /* SW pte read bit */
229#define _PAGE_SPECIAL 0x020 /* SW associated with special page */ 230#define _PAGE_WRITE 0x020 /* SW pte write bit */
231#define _PAGE_SPECIAL 0x040 /* SW associated with special page */
230#define __HAVE_ARCH_PTE_SPECIAL 232#define __HAVE_ARCH_PTE_SPECIAL
231 233
232/* Set of bits not changed in pte_modify */ 234/* Set of bits not changed in pte_modify */
233#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \ 235#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \
234 _PAGE_SWC | _PAGE_SWR) 236 _PAGE_DIRTY | _PAGE_YOUNG)
235
236/* Six different types of pages. */
237#define _PAGE_TYPE_EMPTY 0x400
238#define _PAGE_TYPE_NONE 0x401
239#define _PAGE_TYPE_SWAP 0x403
240#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */
241#define _PAGE_TYPE_RO 0x200
242#define _PAGE_TYPE_RW 0x000
243
244/*
245 * Only four types for huge pages, using the invalid bit and protection bit
246 * of a segment table entry.
247 */
248#define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */
249#define _HPAGE_TYPE_NONE 0x220
250#define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */
251#define _HPAGE_TYPE_RW 0x000
252 237
253/* 238/*
254 * PTE type bits are rather complicated. handle_pte_fault uses pte_present, 239 * handle_pte_fault uses pte_present, pte_none and pte_file to find out the
255 * pte_none and pte_file to find out the pte type WITHOUT holding the page 240 * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit
256 * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to 241 * is used to distinguish present from not-present ptes. It is changed only
257 * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs 242 * with the page table lock held.
258 * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards. 243 *
259 * This change is done while holding the lock, but the intermediate step 244 * The following table gives the different possible bit combinations for
260 * of a previously valid pte with the hw invalid bit set can be observed by 245 * the pte hardware and software bits in the last 12 bits of a pte:
261 * handle_pte_fault. That makes it necessary that all valid pte types with
262 * the hw invalid bit set must be distinguishable from the four pte types
263 * empty, none, swap and file.
264 * 246 *
265 * irxt ipte irxt 247 * 842100000000
266 * _PAGE_TYPE_EMPTY 1000 -> 1000 248 * 000084210000
267 * _PAGE_TYPE_NONE 1001 -> 1001 249 * 000000008421
268 * _PAGE_TYPE_SWAP 1011 -> 1011 250 * .IR...wrdytp
269 * _PAGE_TYPE_FILE 11?1 -> 11?1 251 * empty .10...000000
270 * _PAGE_TYPE_RO 0100 -> 1100 252 * swap .10...xxxx10
271 * _PAGE_TYPE_RW 0000 -> 1000 253 * file .11...xxxxx0
254 * prot-none, clean, old .11...000001
255 * prot-none, clean, young .11...000101
256 * prot-none, dirty, old .10...001001
257 * prot-none, dirty, young .10...001101
258 * read-only, clean, old .11...010001
259 * read-only, clean, young .01...010101
260 * read-only, dirty, old .11...011001
261 * read-only, dirty, young .01...011101
262 * read-write, clean, old .11...110001
263 * read-write, clean, young .01...110101
264 * read-write, dirty, old .10...111001
265 * read-write, dirty, young .00...111101
272 * 266 *
273 * pte_none is true for bits combinations 1000, 1010, 1100, 1110 267 * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
274 * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 268 * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
275 * pte_file is true for bits combinations 1101, 1111 269 * pte_file is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600
276 * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. 270 * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
277 */ 271 */
278 272
279#ifndef CONFIG_64BIT 273#ifndef CONFIG_64BIT
@@ -286,14 +280,25 @@ extern unsigned long MODULES_END;
286#define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */ 280#define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */
287 281
288/* Bits in the segment table entry */ 282/* Bits in the segment table entry */
283#define _SEGMENT_ENTRY_BITS 0x7fffffffUL /* Valid segment table bits */
289#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ 284#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */
290#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ 285#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
291#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ 286#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
292#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ 287#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */
293#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ 288#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */
289#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT
294 290
295#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) 291#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL)
296#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) 292#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
293
294/*
295 * Segment table entry encoding (I = invalid, R = read-only bit):
296 * ..R...I.....
297 * prot-none ..1...1.....
298 * read-only ..1...0.....
299 * read-write ..0...0.....
300 * empty ..0...1.....
301 */
297 302
298/* Page status table bits for virtualization */ 303/* Page status table bits for virtualization */
299#define PGSTE_ACC_BITS 0xf0000000UL 304#define PGSTE_ACC_BITS 0xf0000000UL
@@ -303,9 +308,7 @@ extern unsigned long MODULES_END;
303#define PGSTE_HC_BIT 0x00200000UL 308#define PGSTE_HC_BIT 0x00200000UL
304#define PGSTE_GR_BIT 0x00040000UL 309#define PGSTE_GR_BIT 0x00040000UL
305#define PGSTE_GC_BIT 0x00020000UL 310#define PGSTE_GC_BIT 0x00020000UL
306#define PGSTE_UR_BIT 0x00008000UL 311#define PGSTE_IN_BIT 0x00008000UL /* IPTE notify bit */
307#define PGSTE_UC_BIT 0x00004000UL /* user dirty (migration) */
308#define PGSTE_IN_BIT 0x00002000UL /* IPTE notify bit */
309 312
310#else /* CONFIG_64BIT */ 313#else /* CONFIG_64BIT */
311 314
@@ -324,8 +327,8 @@ extern unsigned long MODULES_END;
324 327
325/* Bits in the region table entry */ 328/* Bits in the region table entry */
326#define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */ 329#define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */
327#define _REGION_ENTRY_RO 0x200 /* region protection bit */ 330#define _REGION_ENTRY_PROTECT 0x200 /* region protection bit */
328#define _REGION_ENTRY_INV 0x20 /* invalid region table entry */ 331#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */
329#define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */ 332#define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */
330#define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */ 333#define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */
331#define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */ 334#define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */
@@ -333,29 +336,47 @@ extern unsigned long MODULES_END;
333#define _REGION_ENTRY_LENGTH 0x03 /* region third length */ 336#define _REGION_ENTRY_LENGTH 0x03 /* region third length */
334 337
335#define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH) 338#define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH)
336#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV) 339#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
337#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) 340#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
338#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV) 341#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
339#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) 342#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
340#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV) 343#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
341 344
342#define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */ 345#define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */
343#define _REGION3_ENTRY_RO 0x200 /* page protection bit */ 346#define _REGION3_ENTRY_RO 0x200 /* page protection bit */
344#define _REGION3_ENTRY_CO 0x100 /* change-recording override */ 347#define _REGION3_ENTRY_CO 0x100 /* change-recording override */
345 348
346/* Bits in the segment table entry */ 349/* Bits in the segment table entry */
350#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
351#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL
347#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ 352#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
348#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ 353#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */
349#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ 354#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
350#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ 355#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
351 356
352#define _SEGMENT_ENTRY (0) 357#define _SEGMENT_ENTRY (0)
353#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) 358#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
354 359
355#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ 360#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
356#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ 361#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
362#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */
363#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */
364#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG
365
366/*
367 * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
368 * ..R...I...y.
369 * prot-none, old ..0...1...1.
370 * prot-none, young ..1...1...1.
371 * read-only, old ..1...1...0.
372 * read-only, young ..1...0...1.
373 * read-write, old ..0...1...0.
374 * read-write, young ..0...0...1.
375 * The segment table origin is used to distinguish empty (origin==0) from
376 * read-write, old segment table entries (origin!=0)
377 */
378
357#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ 379#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */
358#define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT)
359 380
360/* Set of bits not changed in pmd_modify */ 381/* Set of bits not changed in pmd_modify */
361#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \ 382#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
@@ -369,9 +390,7 @@ extern unsigned long MODULES_END;
369#define PGSTE_HC_BIT 0x0020000000000000UL 390#define PGSTE_HC_BIT 0x0020000000000000UL
370#define PGSTE_GR_BIT 0x0004000000000000UL 391#define PGSTE_GR_BIT 0x0004000000000000UL
371#define PGSTE_GC_BIT 0x0002000000000000UL 392#define PGSTE_GC_BIT 0x0002000000000000UL
372#define PGSTE_UR_BIT 0x0000800000000000UL 393#define PGSTE_IN_BIT 0x0000800000000000UL /* IPTE notify bit */
373#define PGSTE_UC_BIT 0x0000400000000000UL /* user dirty (migration) */
374#define PGSTE_IN_BIT 0x0000200000000000UL /* IPTE notify bit */
375 394
376#endif /* CONFIG_64BIT */ 395#endif /* CONFIG_64BIT */
377 396
@@ -386,14 +405,18 @@ extern unsigned long MODULES_END;
386/* 405/*
387 * Page protection definitions. 406 * Page protection definitions.
388 */ 407 */
389#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) 408#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID)
390#define PAGE_RO __pgprot(_PAGE_TYPE_RO) 409#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \
391#define PAGE_RW __pgprot(_PAGE_TYPE_RO | _PAGE_SWW) 410 _PAGE_INVALID | _PAGE_PROTECT)
392#define PAGE_RWC __pgprot(_PAGE_TYPE_RW | _PAGE_SWW | _PAGE_SWC) 411#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
393 412 _PAGE_INVALID | _PAGE_PROTECT)
394#define PAGE_KERNEL PAGE_RWC 413
395#define PAGE_SHARED PAGE_KERNEL 414#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
396#define PAGE_COPY PAGE_RO 415 _PAGE_YOUNG | _PAGE_DIRTY)
416#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
417 _PAGE_YOUNG | _PAGE_DIRTY)
418#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
419 _PAGE_PROTECT)
397 420
398/* 421/*
399 * On s390 the page table entry has an invalid bit and a read-only bit. 422 * On s390 the page table entry has an invalid bit and a read-only bit.
@@ -402,35 +425,31 @@ extern unsigned long MODULES_END;
402 */ 425 */
403 /*xwr*/ 426 /*xwr*/
404#define __P000 PAGE_NONE 427#define __P000 PAGE_NONE
405#define __P001 PAGE_RO 428#define __P001 PAGE_READ
406#define __P010 PAGE_RO 429#define __P010 PAGE_READ
407#define __P011 PAGE_RO 430#define __P011 PAGE_READ
408#define __P100 PAGE_RO 431#define __P100 PAGE_READ
409#define __P101 PAGE_RO 432#define __P101 PAGE_READ
410#define __P110 PAGE_RO 433#define __P110 PAGE_READ
411#define __P111 PAGE_RO 434#define __P111 PAGE_READ
412 435
413#define __S000 PAGE_NONE 436#define __S000 PAGE_NONE
414#define __S001 PAGE_RO 437#define __S001 PAGE_READ
415#define __S010 PAGE_RW 438#define __S010 PAGE_WRITE
416#define __S011 PAGE_RW 439#define __S011 PAGE_WRITE
417#define __S100 PAGE_RO 440#define __S100 PAGE_READ
418#define __S101 PAGE_RO 441#define __S101 PAGE_READ
419#define __S110 PAGE_RW 442#define __S110 PAGE_WRITE
420#define __S111 PAGE_RW 443#define __S111 PAGE_WRITE
421 444
422/* 445/*
423 * Segment entry (large page) protection definitions. 446 * Segment entry (large page) protection definitions.
424 */ 447 */
425#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE) 448#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
426#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO) 449 _SEGMENT_ENTRY_NONE)
427#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW) 450#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \
428 451 _SEGMENT_ENTRY_PROTECT)
429static inline int mm_exclusive(struct mm_struct *mm) 452#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID)
430{
431 return likely(mm == current->active_mm &&
432 atomic_read(&mm->context.attach_count) <= 1);
433}
434 453
435static inline int mm_has_pgste(struct mm_struct *mm) 454static inline int mm_has_pgste(struct mm_struct *mm)
436{ 455{
@@ -467,7 +486,7 @@ static inline int pgd_none(pgd_t pgd)
467{ 486{
468 if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) 487 if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
469 return 0; 488 return 0;
470 return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; 489 return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
471} 490}
472 491
473static inline int pgd_bad(pgd_t pgd) 492static inline int pgd_bad(pgd_t pgd)
@@ -478,7 +497,7 @@ static inline int pgd_bad(pgd_t pgd)
478 * invalid for either table entry. 497 * invalid for either table entry.
479 */ 498 */
480 unsigned long mask = 499 unsigned long mask =
481 ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & 500 ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
482 ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; 501 ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
483 return (pgd_val(pgd) & mask) != 0; 502 return (pgd_val(pgd) & mask) != 0;
484} 503}
@@ -494,7 +513,7 @@ static inline int pud_none(pud_t pud)
494{ 513{
495 if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) 514 if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
496 return 0; 515 return 0;
497 return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; 516 return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL;
498} 517}
499 518
500static inline int pud_large(pud_t pud) 519static inline int pud_large(pud_t pud)
@@ -512,7 +531,7 @@ static inline int pud_bad(pud_t pud)
512 * invalid for either table entry. 531 * invalid for either table entry.
513 */ 532 */
514 unsigned long mask = 533 unsigned long mask =
515 ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & 534 ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
516 ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; 535 ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
517 return (pud_val(pud) & mask) != 0; 536 return (pud_val(pud) & mask) != 0;
518} 537}
@@ -521,30 +540,36 @@ static inline int pud_bad(pud_t pud)
521 540
522static inline int pmd_present(pmd_t pmd) 541static inline int pmd_present(pmd_t pmd)
523{ 542{
524 unsigned long mask = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO; 543 return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID;
525 return (pmd_val(pmd) & mask) == _HPAGE_TYPE_NONE ||
526 !(pmd_val(pmd) & _SEGMENT_ENTRY_INV);
527} 544}
528 545
529static inline int pmd_none(pmd_t pmd) 546static inline int pmd_none(pmd_t pmd)
530{ 547{
531 return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) && 548 return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
532 !(pmd_val(pmd) & _SEGMENT_ENTRY_RO);
533} 549}
534 550
535static inline int pmd_large(pmd_t pmd) 551static inline int pmd_large(pmd_t pmd)
536{ 552{
537#ifdef CONFIG_64BIT 553#ifdef CONFIG_64BIT
538 return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); 554 return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
539#else 555#else
540 return 0; 556 return 0;
541#endif 557#endif
542} 558}
543 559
560static inline int pmd_prot_none(pmd_t pmd)
561{
562 return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) &&
563 (pmd_val(pmd) & _SEGMENT_ENTRY_NONE);
564}
565
544static inline int pmd_bad(pmd_t pmd) 566static inline int pmd_bad(pmd_t pmd)
545{ 567{
546 unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV; 568#ifdef CONFIG_64BIT
547 return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; 569 if (pmd_large(pmd))
570 return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
571#endif
572 return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
548} 573}
549 574
550#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH 575#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
@@ -563,31 +588,40 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
563#define __HAVE_ARCH_PMD_WRITE 588#define __HAVE_ARCH_PMD_WRITE
564static inline int pmd_write(pmd_t pmd) 589static inline int pmd_write(pmd_t pmd)
565{ 590{
566 return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0; 591 if (pmd_prot_none(pmd))
592 return 0;
593 return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0;
567} 594}
568 595
569static inline int pmd_young(pmd_t pmd) 596static inline int pmd_young(pmd_t pmd)
570{ 597{
571 return 0; 598 int young = 0;
599#ifdef CONFIG_64BIT
600 if (pmd_prot_none(pmd))
601 young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0;
602 else
603 young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
604#endif
605 return young;
572} 606}
573 607
574static inline int pte_none(pte_t pte) 608static inline int pte_present(pte_t pte)
575{ 609{
576 return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT); 610 /* Bit pattern: (pte & 0x001) == 0x001 */
611 return (pte_val(pte) & _PAGE_PRESENT) != 0;
577} 612}
578 613
579static inline int pte_present(pte_t pte) 614static inline int pte_none(pte_t pte)
580{ 615{
581 unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX; 616 /* Bit pattern: pte == 0x400 */
582 return (pte_val(pte) & mask) == _PAGE_TYPE_NONE || 617 return pte_val(pte) == _PAGE_INVALID;
583 (!(pte_val(pte) & _PAGE_INVALID) &&
584 !(pte_val(pte) & _PAGE_SWT));
585} 618}
586 619
587static inline int pte_file(pte_t pte) 620static inline int pte_file(pte_t pte)
588{ 621{
589 unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT; 622 /* Bit pattern: (pte & 0x601) == 0x600 */
590 return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; 623 return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT))
624 == (_PAGE_INVALID | _PAGE_PROTECT);
591} 625}
592 626
593static inline int pte_special(pte_t pte) 627static inline int pte_special(pte_t pte)
@@ -634,6 +668,15 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
634#endif 668#endif
635} 669}
636 670
671static inline pgste_t pgste_get(pte_t *ptep)
672{
673 unsigned long pgste = 0;
674#ifdef CONFIG_PGSTE
675 pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
676#endif
677 return __pgste(pgste);
678}
679
637static inline void pgste_set(pte_t *ptep, pgste_t pgste) 680static inline void pgste_set(pte_t *ptep, pgste_t pgste)
638{ 681{
639#ifdef CONFIG_PGSTE 682#ifdef CONFIG_PGSTE
@@ -644,33 +687,28 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste)
644static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) 687static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
645{ 688{
646#ifdef CONFIG_PGSTE 689#ifdef CONFIG_PGSTE
647 unsigned long address, bits; 690 unsigned long address, bits, skey;
648 unsigned char skey;
649 691
650 if (pte_val(*ptep) & _PAGE_INVALID) 692 if (pte_val(*ptep) & _PAGE_INVALID)
651 return pgste; 693 return pgste;
652 address = pte_val(*ptep) & PAGE_MASK; 694 address = pte_val(*ptep) & PAGE_MASK;
653 skey = page_get_storage_key(address); 695 skey = (unsigned long) page_get_storage_key(address);
654 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 696 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
655 /* Clear page changed & referenced bit in the storage key */ 697 if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
656 if (bits & _PAGE_CHANGED) 698 /* Transfer dirty + referenced bit to host bits in pgste */
699 pgste_val(pgste) |= bits << 52;
657 page_set_storage_key(address, skey ^ bits, 0); 700 page_set_storage_key(address, skey ^ bits, 0);
658 else if (bits) 701 } else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
702 (bits & _PAGE_REFERENCED)) {
703 /* Transfer referenced bit to host bit in pgste */
704 pgste_val(pgste) |= PGSTE_HR_BIT;
659 page_reset_referenced(address); 705 page_reset_referenced(address);
706 }
660 /* Transfer page changed & referenced bit to guest bits in pgste */ 707 /* Transfer page changed & referenced bit to guest bits in pgste */
661 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 708 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
662 /* Get host changed & referenced bits from pgste */
663 bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52;
664 /* Transfer page changed & referenced bit to kvm user bits */
665 pgste_val(pgste) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */
666 /* Clear relevant host bits in pgste. */
667 pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT);
668 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
669 /* Copy page access key and fetch protection bit to pgste */ 709 /* Copy page access key and fetch protection bit to pgste */
670 pgste_val(pgste) |= 710 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
671 (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 711 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
672 /* Transfer referenced bit to pte */
673 pte_val(*ptep) |= (bits & _PAGE_REFERENCED) << 1;
674#endif 712#endif
675 return pgste; 713 return pgste;
676 714
@@ -679,24 +717,11 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
679static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) 717static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
680{ 718{
681#ifdef CONFIG_PGSTE 719#ifdef CONFIG_PGSTE
682 int young;
683
684 if (pte_val(*ptep) & _PAGE_INVALID) 720 if (pte_val(*ptep) & _PAGE_INVALID)
685 return pgste; 721 return pgste;
686 /* Get referenced bit from storage key */ 722 /* Get referenced bit from storage key */
687 young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); 723 if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
688 if (young) 724 pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
689 pgste_val(pgste) |= PGSTE_GR_BIT;
690 /* Get host referenced bit from pgste */
691 if (pgste_val(pgste) & PGSTE_HR_BIT) {
692 pgste_val(pgste) &= ~PGSTE_HR_BIT;
693 young = 1;
694 }
695 /* Transfer referenced bit to kvm user bits and pte */
696 if (young) {
697 pgste_val(pgste) |= PGSTE_UR_BIT;
698 pte_val(*ptep) |= _PAGE_SWR;
699 }
700#endif 725#endif
701 return pgste; 726 return pgste;
702} 727}
@@ -723,13 +748,13 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
723 748
724static inline void pgste_set_pte(pte_t *ptep, pte_t entry) 749static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
725{ 750{
726 if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_SWW)) { 751 if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_WRITE)) {
727 /* 752 /*
728 * Without enhanced suppression-on-protection force 753 * Without enhanced suppression-on-protection force
729 * the dirty bit on for all writable ptes. 754 * the dirty bit on for all writable ptes.
730 */ 755 */
731 pte_val(entry) |= _PAGE_SWC; 756 pte_val(entry) |= _PAGE_DIRTY;
732 pte_val(entry) &= ~_PAGE_RO; 757 pte_val(entry) &= ~_PAGE_PROTECT;
733 } 758 }
734 *ptep = entry; 759 *ptep = entry;
735} 760}
@@ -841,21 +866,17 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
841 */ 866 */
842static inline int pte_write(pte_t pte) 867static inline int pte_write(pte_t pte)
843{ 868{
844 return (pte_val(pte) & _PAGE_SWW) != 0; 869 return (pte_val(pte) & _PAGE_WRITE) != 0;
845} 870}
846 871
847static inline int pte_dirty(pte_t pte) 872static inline int pte_dirty(pte_t pte)
848{ 873{
849 return (pte_val(pte) & _PAGE_SWC) != 0; 874 return (pte_val(pte) & _PAGE_DIRTY) != 0;
850} 875}
851 876
852static inline int pte_young(pte_t pte) 877static inline int pte_young(pte_t pte)
853{ 878{
854#ifdef CONFIG_PGSTE 879 return (pte_val(pte) & _PAGE_YOUNG) != 0;
855 if (pte_val(pte) & _PAGE_SWR)
856 return 1;
857#endif
858 return 0;
859} 880}
860 881
861/* 882/*
@@ -880,12 +901,12 @@ static inline void pud_clear(pud_t *pud)
880 901
881static inline void pmd_clear(pmd_t *pmdp) 902static inline void pmd_clear(pmd_t *pmdp)
882{ 903{
883 pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; 904 pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID;
884} 905}
885 906
886static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 907static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
887{ 908{
888 pte_val(*ptep) = _PAGE_TYPE_EMPTY; 909 pte_val(*ptep) = _PAGE_INVALID;
889} 910}
890 911
891/* 912/*
@@ -896,55 +917,63 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
896{ 917{
897 pte_val(pte) &= _PAGE_CHG_MASK; 918 pte_val(pte) &= _PAGE_CHG_MASK;
898 pte_val(pte) |= pgprot_val(newprot); 919 pte_val(pte) |= pgprot_val(newprot);
899 if ((pte_val(pte) & _PAGE_SWC) && (pte_val(pte) & _PAGE_SWW)) 920 /*
900 pte_val(pte) &= ~_PAGE_RO; 921 * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the
922 * invalid bit set, clear it again for readable, young pages
923 */
924 if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
925 pte_val(pte) &= ~_PAGE_INVALID;
926 /*
927 * newprot for PAGE_READ and PAGE_WRITE has the page protection
928 * bit set, clear it again for writable, dirty pages
929 */
930 if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
931 pte_val(pte) &= ~_PAGE_PROTECT;
901 return pte; 932 return pte;
902} 933}
903 934
904static inline pte_t pte_wrprotect(pte_t pte) 935static inline pte_t pte_wrprotect(pte_t pte)
905{ 936{
906 pte_val(pte) &= ~_PAGE_SWW; 937 pte_val(pte) &= ~_PAGE_WRITE;
907 /* Do not clobber _PAGE_TYPE_NONE pages! */ 938 pte_val(pte) |= _PAGE_PROTECT;
908 if (!(pte_val(pte) & _PAGE_INVALID))
909 pte_val(pte) |= _PAGE_RO;
910 return pte; 939 return pte;
911} 940}
912 941
913static inline pte_t pte_mkwrite(pte_t pte) 942static inline pte_t pte_mkwrite(pte_t pte)
914{ 943{
915 pte_val(pte) |= _PAGE_SWW; 944 pte_val(pte) |= _PAGE_WRITE;
916 if (pte_val(pte) & _PAGE_SWC) 945 if (pte_val(pte) & _PAGE_DIRTY)
917 pte_val(pte) &= ~_PAGE_RO; 946 pte_val(pte) &= ~_PAGE_PROTECT;
918 return pte; 947 return pte;
919} 948}
920 949
921static inline pte_t pte_mkclean(pte_t pte) 950static inline pte_t pte_mkclean(pte_t pte)
922{ 951{
923 pte_val(pte) &= ~_PAGE_SWC; 952 pte_val(pte) &= ~_PAGE_DIRTY;
924 /* Do not clobber _PAGE_TYPE_NONE pages! */ 953 pte_val(pte) |= _PAGE_PROTECT;
925 if (!(pte_val(pte) & _PAGE_INVALID))
926 pte_val(pte) |= _PAGE_RO;
927 return pte; 954 return pte;
928} 955}
929 956
930static inline pte_t pte_mkdirty(pte_t pte) 957static inline pte_t pte_mkdirty(pte_t pte)
931{ 958{
932 pte_val(pte) |= _PAGE_SWC; 959 pte_val(pte) |= _PAGE_DIRTY;
933 if (pte_val(pte) & _PAGE_SWW) 960 if (pte_val(pte) & _PAGE_WRITE)
934 pte_val(pte) &= ~_PAGE_RO; 961 pte_val(pte) &= ~_PAGE_PROTECT;
935 return pte; 962 return pte;
936} 963}
937 964
938static inline pte_t pte_mkold(pte_t pte) 965static inline pte_t pte_mkold(pte_t pte)
939{ 966{
940#ifdef CONFIG_PGSTE 967 pte_val(pte) &= ~_PAGE_YOUNG;
941 pte_val(pte) &= ~_PAGE_SWR; 968 pte_val(pte) |= _PAGE_INVALID;
942#endif
943 return pte; 969 return pte;
944} 970}
945 971
946static inline pte_t pte_mkyoung(pte_t pte) 972static inline pte_t pte_mkyoung(pte_t pte)
947{ 973{
974 pte_val(pte) |= _PAGE_YOUNG;
975 if (pte_val(pte) & _PAGE_READ)
976 pte_val(pte) &= ~_PAGE_INVALID;
948 return pte; 977 return pte;
949} 978}
950 979
@@ -957,7 +986,7 @@ static inline pte_t pte_mkspecial(pte_t pte)
957#ifdef CONFIG_HUGETLB_PAGE 986#ifdef CONFIG_HUGETLB_PAGE
958static inline pte_t pte_mkhuge(pte_t pte) 987static inline pte_t pte_mkhuge(pte_t pte)
959{ 988{
960 pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); 989 pte_val(pte) |= _PAGE_LARGE;
961 return pte; 990 return pte;
962} 991}
963#endif 992#endif
@@ -974,8 +1003,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
974 if (mm_has_pgste(mm)) { 1003 if (mm_has_pgste(mm)) {
975 pgste = pgste_get_lock(ptep); 1004 pgste = pgste_get_lock(ptep);
976 pgste = pgste_update_all(ptep, pgste); 1005 pgste = pgste_update_all(ptep, pgste);
977 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 1006 dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
978 pgste_val(pgste) &= ~PGSTE_UC_BIT; 1007 pgste_val(pgste) &= ~PGSTE_HC_BIT;
979 pgste_set_unlock(ptep, pgste); 1008 pgste_set_unlock(ptep, pgste);
980 return dirty; 1009 return dirty;
981 } 1010 }
@@ -994,59 +1023,75 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
994 if (mm_has_pgste(mm)) { 1023 if (mm_has_pgste(mm)) {
995 pgste = pgste_get_lock(ptep); 1024 pgste = pgste_get_lock(ptep);
996 pgste = pgste_update_young(ptep, pgste); 1025 pgste = pgste_update_young(ptep, pgste);
997 young = !!(pgste_val(pgste) & PGSTE_UR_BIT); 1026 young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
998 pgste_val(pgste) &= ~PGSTE_UR_BIT; 1027 pgste_val(pgste) &= ~PGSTE_HR_BIT;
999 pgste_set_unlock(ptep, pgste); 1028 pgste_set_unlock(ptep, pgste);
1000 } 1029 }
1001 return young; 1030 return young;
1002} 1031}
1003 1032
1033static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
1034{
1035 if (!(pte_val(*ptep) & _PAGE_INVALID)) {
1036#ifndef CONFIG_64BIT
1037 /* pto must point to the start of the segment table */
1038 pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
1039#else
1040 /* ipte in zarch mode can do the math */
1041 pte_t *pto = ptep;
1042#endif
1043 asm volatile(
1044 " ipte %2,%3"
1045 : "=m" (*ptep) : "m" (*ptep),
1046 "a" (pto), "a" (address));
1047 }
1048}
1049
1050static inline void ptep_flush_lazy(struct mm_struct *mm,
1051 unsigned long address, pte_t *ptep)
1052{
1053 int active = (mm == current->active_mm) ? 1 : 0;
1054
1055 if (atomic_read(&mm->context.attach_count) > active)
1056 __ptep_ipte(address, ptep);
1057 else
1058 mm->context.flush_mm = 1;
1059}
1060
1004#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 1061#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1005static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 1062static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
1006 unsigned long addr, pte_t *ptep) 1063 unsigned long addr, pte_t *ptep)
1007{ 1064{
1008 pgste_t pgste; 1065 pgste_t pgste;
1009 pte_t pte; 1066 pte_t pte;
1067 int young;
1010 1068
1011 if (mm_has_pgste(vma->vm_mm)) { 1069 if (mm_has_pgste(vma->vm_mm)) {
1012 pgste = pgste_get_lock(ptep); 1070 pgste = pgste_get_lock(ptep);
1013 pgste = pgste_update_young(ptep, pgste); 1071 pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
1014 pte = *ptep;
1015 *ptep = pte_mkold(pte);
1016 pgste_set_unlock(ptep, pgste);
1017 return pte_young(pte);
1018 } 1072 }
1019 return 0; 1073
1074 pte = *ptep;
1075 __ptep_ipte(addr, ptep);
1076 young = pte_young(pte);
1077 pte = pte_mkold(pte);
1078
1079 if (mm_has_pgste(vma->vm_mm)) {
1080 pgste_set_pte(ptep, pte);
1081 pgste_set_unlock(ptep, pgste);
1082 } else
1083 *ptep = pte;
1084
1085 return young;
1020} 1086}
1021 1087
1022#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH 1088#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
1023static inline int ptep_clear_flush_young(struct vm_area_struct *vma, 1089static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
1024 unsigned long address, pte_t *ptep) 1090 unsigned long address, pte_t *ptep)
1025{ 1091{
1026 /* No need to flush TLB
1027 * On s390 reference bits are in storage key and never in TLB
1028 * With virtualization we handle the reference bit, without we
1029 * we can simply return */
1030 return ptep_test_and_clear_young(vma, address, ptep); 1092 return ptep_test_and_clear_young(vma, address, ptep);
1031} 1093}
1032 1094
1033static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
1034{
1035 if (!(pte_val(*ptep) & _PAGE_INVALID)) {
1036#ifndef CONFIG_64BIT
1037 /* pto must point to the start of the segment table */
1038 pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
1039#else
1040 /* ipte in zarch mode can do the math */
1041 pte_t *pto = ptep;
1042#endif
1043 asm volatile(
1044 " ipte %2,%3"
1045 : "=m" (*ptep) : "m" (*ptep),
1046 "a" (pto), "a" (address));
1047 }
1048}
1049
1050/* 1095/*
1051 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush 1096 * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
1052 * both clear the TLB for the unmapped pte. The reason is that 1097 * both clear the TLB for the unmapped pte. The reason is that
@@ -1067,16 +1112,14 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
1067 pgste_t pgste; 1112 pgste_t pgste;
1068 pte_t pte; 1113 pte_t pte;
1069 1114
1070 mm->context.flush_mm = 1;
1071 if (mm_has_pgste(mm)) { 1115 if (mm_has_pgste(mm)) {
1072 pgste = pgste_get_lock(ptep); 1116 pgste = pgste_get_lock(ptep);
1073 pgste = pgste_ipte_notify(mm, address, ptep, pgste); 1117 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1074 } 1118 }
1075 1119
1076 pte = *ptep; 1120 pte = *ptep;
1077 if (!mm_exclusive(mm)) 1121 ptep_flush_lazy(mm, address, ptep);
1078 __ptep_ipte(address, ptep); 1122 pte_val(*ptep) = _PAGE_INVALID;
1079 pte_val(*ptep) = _PAGE_TYPE_EMPTY;
1080 1123
1081 if (mm_has_pgste(mm)) { 1124 if (mm_has_pgste(mm)) {
1082 pgste = pgste_update_all(&pte, pgste); 1125 pgste = pgste_update_all(&pte, pgste);
@@ -1093,15 +1136,14 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
1093 pgste_t pgste; 1136 pgste_t pgste;
1094 pte_t pte; 1137 pte_t pte;
1095 1138
1096 mm->context.flush_mm = 1;
1097 if (mm_has_pgste(mm)) { 1139 if (mm_has_pgste(mm)) {
1098 pgste = pgste_get_lock(ptep); 1140 pgste = pgste_get_lock(ptep);
1099 pgste_ipte_notify(mm, address, ptep, pgste); 1141 pgste_ipte_notify(mm, address, ptep, pgste);
1100 } 1142 }
1101 1143
1102 pte = *ptep; 1144 pte = *ptep;
1103 if (!mm_exclusive(mm)) 1145 ptep_flush_lazy(mm, address, ptep);
1104 __ptep_ipte(address, ptep); 1146 pte_val(*ptep) |= _PAGE_INVALID;
1105 1147
1106 if (mm_has_pgste(mm)) { 1148 if (mm_has_pgste(mm)) {
1107 pgste = pgste_update_all(&pte, pgste); 1149 pgste = pgste_update_all(&pte, pgste);
@@ -1117,7 +1159,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
1117 pgste_t pgste; 1159 pgste_t pgste;
1118 1160
1119 if (mm_has_pgste(mm)) { 1161 if (mm_has_pgste(mm)) {
1120 pgste = *(pgste_t *)(ptep + PTRS_PER_PTE); 1162 pgste = pgste_get(ptep);
1121 pgste_set_key(ptep, pgste, pte); 1163 pgste_set_key(ptep, pgste, pte);
1122 pgste_set_pte(ptep, pte); 1164 pgste_set_pte(ptep, pte);
1123 pgste_set_unlock(ptep, pgste); 1165 pgste_set_unlock(ptep, pgste);
@@ -1139,7 +1181,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
1139 1181
1140 pte = *ptep; 1182 pte = *ptep;
1141 __ptep_ipte(address, ptep); 1183 __ptep_ipte(address, ptep);
1142 pte_val(*ptep) = _PAGE_TYPE_EMPTY; 1184 pte_val(*ptep) = _PAGE_INVALID;
1143 1185
1144 if (mm_has_pgste(vma->vm_mm)) { 1186 if (mm_has_pgste(vma->vm_mm)) {
1145 pgste = pgste_update_all(&pte, pgste); 1187 pgste = pgste_update_all(&pte, pgste);
@@ -1163,18 +1205,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
1163 pgste_t pgste; 1205 pgste_t pgste;
1164 pte_t pte; 1206 pte_t pte;
1165 1207
1166 if (mm_has_pgste(mm)) { 1208 if (!full && mm_has_pgste(mm)) {
1167 pgste = pgste_get_lock(ptep); 1209 pgste = pgste_get_lock(ptep);
1168 if (!full) 1210 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1169 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1170 } 1211 }
1171 1212
1172 pte = *ptep; 1213 pte = *ptep;
1173 if (!full) 1214 if (!full)
1174 __ptep_ipte(address, ptep); 1215 ptep_flush_lazy(mm, address, ptep);
1175 pte_val(*ptep) = _PAGE_TYPE_EMPTY; 1216 pte_val(*ptep) = _PAGE_INVALID;
1176 1217
1177 if (mm_has_pgste(mm)) { 1218 if (!full && mm_has_pgste(mm)) {
1178 pgste = pgste_update_all(&pte, pgste); 1219 pgste = pgste_update_all(&pte, pgste);
1179 pgste_set_unlock(ptep, pgste); 1220 pgste_set_unlock(ptep, pgste);
1180 } 1221 }
@@ -1189,14 +1230,12 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
1189 pte_t pte = *ptep; 1230 pte_t pte = *ptep;
1190 1231
1191 if (pte_write(pte)) { 1232 if (pte_write(pte)) {
1192 mm->context.flush_mm = 1;
1193 if (mm_has_pgste(mm)) { 1233 if (mm_has_pgste(mm)) {
1194 pgste = pgste_get_lock(ptep); 1234 pgste = pgste_get_lock(ptep);
1195 pgste = pgste_ipte_notify(mm, address, ptep, pgste); 1235 pgste = pgste_ipte_notify(mm, address, ptep, pgste);
1196 } 1236 }
1197 1237
1198 if (!mm_exclusive(mm)) 1238 ptep_flush_lazy(mm, address, ptep);
1199 __ptep_ipte(address, ptep);
1200 pte = pte_wrprotect(pte); 1239 pte = pte_wrprotect(pte);
1201 1240
1202 if (mm_has_pgste(mm)) { 1241 if (mm_has_pgste(mm)) {
@@ -1240,7 +1279,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
1240{ 1279{
1241 pte_t __pte; 1280 pte_t __pte;
1242 pte_val(__pte) = physpage + pgprot_val(pgprot); 1281 pte_val(__pte) = physpage + pgprot_val(pgprot);
1243 return __pte; 1282 return pte_mkyoung(__pte);
1244} 1283}
1245 1284
1246static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) 1285static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
@@ -1248,10 +1287,8 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
1248 unsigned long physpage = page_to_phys(page); 1287 unsigned long physpage = page_to_phys(page);
1249 pte_t __pte = mk_pte_phys(physpage, pgprot); 1288 pte_t __pte = mk_pte_phys(physpage, pgprot);
1250 1289
1251 if ((pte_val(__pte) & _PAGE_SWW) && PageDirty(page)) { 1290 if (pte_write(__pte) && PageDirty(page))
1252 pte_val(__pte) |= _PAGE_SWC; 1291 __pte = pte_mkdirty(__pte);
1253 pte_val(__pte) &= ~_PAGE_RO;
1254 }
1255 return __pte; 1292 return __pte;
1256} 1293}
1257 1294
@@ -1313,7 +1350,7 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
1313 unsigned long sto = (unsigned long) pmdp - 1350 unsigned long sto = (unsigned long) pmdp -
1314 pmd_index(address) * sizeof(pmd_t); 1351 pmd_index(address) * sizeof(pmd_t);
1315 1352
1316 if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) { 1353 if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) {
1317 asm volatile( 1354 asm volatile(
1318 " .insn rrf,0xb98e0000,%2,%3,0,0" 1355 " .insn rrf,0xb98e0000,%2,%3,0,0"
1319 : "=m" (*pmdp) 1356 : "=m" (*pmdp)
@@ -1324,24 +1361,68 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
1324 } 1361 }
1325} 1362}
1326 1363
1364static inline void __pmd_csp(pmd_t *pmdp)
1365{
1366 register unsigned long reg2 asm("2") = pmd_val(*pmdp);
1367 register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
1368 _SEGMENT_ENTRY_INVALID;
1369 register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
1370
1371 asm volatile(
1372 " csp %1,%3"
1373 : "=m" (*pmdp)
1374 : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
1375}
1376
1327#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) 1377#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
1328static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) 1378static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
1329{ 1379{
1330 /* 1380 /*
1331 * pgprot is PAGE_NONE, PAGE_RO, or PAGE_RW (see __Pxxx / __Sxxx) 1381 * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
1332 * Convert to segment table entry format. 1382 * Convert to segment table entry format.
1333 */ 1383 */
1334 if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE)) 1384 if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
1335 return pgprot_val(SEGMENT_NONE); 1385 return pgprot_val(SEGMENT_NONE);
1336 if (pgprot_val(pgprot) == pgprot_val(PAGE_RO)) 1386 if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
1337 return pgprot_val(SEGMENT_RO); 1387 return pgprot_val(SEGMENT_READ);
1338 return pgprot_val(SEGMENT_RW); 1388 return pgprot_val(SEGMENT_WRITE);
1389}
1390
1391static inline pmd_t pmd_mkyoung(pmd_t pmd)
1392{
1393#ifdef CONFIG_64BIT
1394 if (pmd_prot_none(pmd)) {
1395 pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
1396 } else {
1397 pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
1398 pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
1399 }
1400#endif
1401 return pmd;
1402}
1403
1404static inline pmd_t pmd_mkold(pmd_t pmd)
1405{
1406#ifdef CONFIG_64BIT
1407 if (pmd_prot_none(pmd)) {
1408 pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
1409 } else {
1410 pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
1411 pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
1412 }
1413#endif
1414 return pmd;
1339} 1415}
1340 1416
1341static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) 1417static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
1342{ 1418{
1419 int young;
1420
1421 young = pmd_young(pmd);
1343 pmd_val(pmd) &= _SEGMENT_CHG_MASK; 1422 pmd_val(pmd) &= _SEGMENT_CHG_MASK;
1344 pmd_val(pmd) |= massage_pgprot_pmd(newprot); 1423 pmd_val(pmd) |= massage_pgprot_pmd(newprot);
1424 if (young)
1425 pmd = pmd_mkyoung(pmd);
1345 return pmd; 1426 return pmd;
1346} 1427}
1347 1428
@@ -1349,18 +1430,29 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
1349{ 1430{
1350 pmd_t __pmd; 1431 pmd_t __pmd;
1351 pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); 1432 pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
1352 return __pmd; 1433 return pmd_mkyoung(__pmd);
1353} 1434}
1354 1435
1355static inline pmd_t pmd_mkwrite(pmd_t pmd) 1436static inline pmd_t pmd_mkwrite(pmd_t pmd)
1356{ 1437{
1357 /* Do not clobber _HPAGE_TYPE_NONE pages! */ 1438 /* Do not clobber PROT_NONE segments! */
1358 if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INV)) 1439 if (!pmd_prot_none(pmd))
1359 pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO; 1440 pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
1360 return pmd; 1441 return pmd;
1361} 1442}
1362#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ 1443#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
1363 1444
1445static inline void pmdp_flush_lazy(struct mm_struct *mm,
1446 unsigned long address, pmd_t *pmdp)
1447{
1448 int active = (mm == current->active_mm) ? 1 : 0;
1449
1450 if ((atomic_read(&mm->context.attach_count) & 0xffff) > active)
1451 __pmd_idte(address, pmdp);
1452 else
1453 mm->context.flush_mm = 1;
1454}
1455
1364#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1456#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1365 1457
1366#define __HAVE_ARCH_PGTABLE_DEPOSIT 1458#define __HAVE_ARCH_PGTABLE_DEPOSIT
@@ -1378,7 +1470,7 @@ static inline int pmd_trans_splitting(pmd_t pmd)
1378static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, 1470static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
1379 pmd_t *pmdp, pmd_t entry) 1471 pmd_t *pmdp, pmd_t entry)
1380{ 1472{
1381 if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1) 1473 if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1)
1382 pmd_val(entry) |= _SEGMENT_ENTRY_CO; 1474 pmd_val(entry) |= _SEGMENT_ENTRY_CO;
1383 *pmdp = entry; 1475 *pmdp = entry;
1384} 1476}
@@ -1391,7 +1483,9 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
1391 1483
1392static inline pmd_t pmd_wrprotect(pmd_t pmd) 1484static inline pmd_t pmd_wrprotect(pmd_t pmd)
1393{ 1485{
1394 pmd_val(pmd) |= _SEGMENT_ENTRY_RO; 1486 /* Do not clobber PROT_NONE segments! */
1487 if (!pmd_prot_none(pmd))
1488 pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
1395 return pmd; 1489 return pmd;
1396} 1490}
1397 1491
@@ -1401,50 +1495,16 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
1401 return pmd; 1495 return pmd;
1402} 1496}
1403 1497
1404static inline pmd_t pmd_mkold(pmd_t pmd)
1405{
1406 /* No referenced bit in the segment table entry. */
1407 return pmd;
1408}
1409
1410static inline pmd_t pmd_mkyoung(pmd_t pmd)
1411{
1412 /* No referenced bit in the segment table entry. */
1413 return pmd;
1414}
1415
1416#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG 1498#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
1417static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 1499static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
1418 unsigned long address, pmd_t *pmdp) 1500 unsigned long address, pmd_t *pmdp)
1419{ 1501{
1420 unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK; 1502 pmd_t pmd;
1421 long tmp, rc;
1422 int counter;
1423 1503
1424 rc = 0; 1504 pmd = *pmdp;
1425 if (MACHINE_HAS_RRBM) { 1505 __pmd_idte(address, pmdp);
1426 counter = PTRS_PER_PTE >> 6; 1506 *pmdp = pmd_mkold(pmd);
1427 asm volatile( 1507 return pmd_young(pmd);
1428 "0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */
1429 " ogr %1,%0\n"
1430 " la %3,0(%4,%3)\n"
1431 " brct %2,0b\n"
1432 : "=&d" (tmp), "+&d" (rc), "+d" (counter),
1433 "+a" (pmd_addr)
1434 : "a" (64 * 4096UL) : "cc");
1435 rc = !!rc;
1436 } else {
1437 counter = PTRS_PER_PTE;
1438 asm volatile(
1439 "0: rrbe 0,%2\n"
1440 " la %2,0(%3,%2)\n"
1441 " brc 12,1f\n"
1442 " lhi %0,1\n"
1443 "1: brct %1,0b\n"
1444 : "+d" (rc), "+d" (counter), "+a" (pmd_addr)
1445 : "a" (4096UL) : "cc");
1446 }
1447 return rc;
1448} 1508}
1449 1509
1450#define __HAVE_ARCH_PMDP_GET_AND_CLEAR 1510#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
@@ -1510,10 +1570,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
1510 * exception will occur instead of a page translation exception. The 1570 * exception will occur instead of a page translation exception. The
1511 * specifiation exception has the bad habit not to store necessary 1571 * specifiation exception has the bad habit not to store necessary
1512 * information in the lowcore. 1572 * information in the lowcore.
1513 * Bit 21 and bit 22 are the page invalid bit and the page protection 1573 * Bits 21, 22, 30 and 31 are used to indicate the page type.
1514 * bit. We set both to indicate a swapped page. 1574 * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
1515 * Bit 30 and 31 are used to distinguish the different page types. For
1516 * a swapped page these bits need to be zero.
1517 * This leaves the bits 1-19 and bits 24-29 to store type and offset. 1575 * This leaves the bits 1-19 and bits 24-29 to store type and offset.
1518 * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 1576 * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19
1519 * plus 24 for the offset. 1577 * plus 24 for the offset.
@@ -1527,10 +1585,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
1527 * exception will occur instead of a page translation exception. The 1585 * exception will occur instead of a page translation exception. The
1528 * specifiation exception has the bad habit not to store necessary 1586 * specifiation exception has the bad habit not to store necessary
1529 * information in the lowcore. 1587 * information in the lowcore.
1530 * Bit 53 and bit 54 are the page invalid bit and the page protection 1588 * Bits 53, 54, 62 and 63 are used to indicate the page type.
1531 * bit. We set both to indicate a swapped page. 1589 * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402
1532 * Bit 62 and 63 are used to distinguish the different page types. For
1533 * a swapped page these bits need to be zero.
1534 * This leaves the bits 0-51 and bits 56-61 to store type and offset. 1590 * This leaves the bits 0-51 and bits 56-61 to store type and offset.
1535 * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 1591 * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51
1536 * plus 56 for the offset. 1592 * plus 56 for the offset.
@@ -1547,7 +1603,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
1547{ 1603{
1548 pte_t pte; 1604 pte_t pte;
1549 offset &= __SWP_OFFSET_MASK; 1605 offset &= __SWP_OFFSET_MASK;
1550 pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) | 1606 pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) |
1551 ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); 1607 ((offset & 1UL) << 7) | ((offset & ~1UL) << 11);
1552 return pte; 1608 return pte;
1553} 1609}
@@ -1570,7 +1626,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
1570 1626
1571#define pgoff_to_pte(__off) \ 1627#define pgoff_to_pte(__off) \
1572 ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \ 1628 ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \
1573 | _PAGE_TYPE_FILE }) 1629 | _PAGE_INVALID | _PAGE_PROTECT })
1574 1630
1575#endif /* !__ASSEMBLY__ */ 1631#endif /* !__ASSEMBLY__ */
1576 1632
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index b0e6435b2f02..0eb37505cab1 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -43,6 +43,7 @@ extern void execve_tail(void);
43#ifndef CONFIG_64BIT 43#ifndef CONFIG_64BIT
44 44
45#define TASK_SIZE (1UL << 31) 45#define TASK_SIZE (1UL << 31)
46#define TASK_MAX_SIZE (1UL << 31)
46#define TASK_UNMAPPED_BASE (1UL << 30) 47#define TASK_UNMAPPED_BASE (1UL << 30)
47 48
48#else /* CONFIG_64BIT */ 49#else /* CONFIG_64BIT */
@@ -51,6 +52,7 @@ extern void execve_tail(void);
51#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ 52#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
52 (1UL << 30) : (1UL << 41)) 53 (1UL << 30) : (1UL << 41))
53#define TASK_SIZE TASK_SIZE_OF(current) 54#define TASK_SIZE TASK_SIZE_OF(current)
55#define TASK_MAX_SIZE (1UL << 53)
54 56
55#endif /* CONFIG_64BIT */ 57#endif /* CONFIG_64BIT */
56 58
diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h
new file mode 100644
index 000000000000..5b3e48ef534b
--- /dev/null
+++ b/arch/s390/include/asm/serial.h
@@ -0,0 +1,6 @@
1#ifndef _ASM_S390_SERIAL_H
2#define _ASM_S390_SERIAL_H
3
4#define BASE_BAUD 0
5
6#endif /* _ASM_S390_SERIAL_H */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 80b6f11263c4..6dbd559763c9 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -8,6 +8,7 @@
8#define __ASM_SWITCH_TO_H 8#define __ASM_SWITCH_TO_H
9 9
10#include <linux/thread_info.h> 10#include <linux/thread_info.h>
11#include <asm/ptrace.h>
11 12
12extern struct task_struct *__switch_to(void *, void *); 13extern struct task_struct *__switch_to(void *, void *);
13extern void update_cr_regs(struct task_struct *task); 14extern void update_cr_regs(struct task_struct *task);
@@ -68,12 +69,16 @@ static inline void restore_fp_regs(s390_fp_regs *fpregs)
68 69
69static inline void save_access_regs(unsigned int *acrs) 70static inline void save_access_regs(unsigned int *acrs)
70{ 71{
71 asm volatile("stam 0,15,%0" : "=Q" (*acrs)); 72 typedef struct { int _[NUM_ACRS]; } acrstype;
73
74 asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs));
72} 75}
73 76
74static inline void restore_access_regs(unsigned int *acrs) 77static inline void restore_access_regs(unsigned int *acrs)
75{ 78{
76 asm volatile("lam 0,15,%0" : : "Q" (*acrs)); 79 typedef struct { int _[NUM_ACRS]; } acrstype;
80
81 asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
77} 82}
78 83
79#define switch_to(prev,next,last) do { \ 84#define switch_to(prev,next,last) do { \
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index b75d7d686684..2cb846c4b37f 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -32,6 +32,7 @@ struct mmu_gather {
32 struct mm_struct *mm; 32 struct mm_struct *mm;
33 struct mmu_table_batch *batch; 33 struct mmu_table_batch *batch;
34 unsigned int fullmm; 34 unsigned int fullmm;
35 unsigned long start, end;
35}; 36};
36 37
37struct mmu_table_batch { 38struct mmu_table_batch {
@@ -48,10 +49,13 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
48 49
49static inline void tlb_gather_mmu(struct mmu_gather *tlb, 50static inline void tlb_gather_mmu(struct mmu_gather *tlb,
50 struct mm_struct *mm, 51 struct mm_struct *mm,
51 unsigned int full_mm_flush) 52 unsigned long start,
53 unsigned long end)
52{ 54{
53 tlb->mm = mm; 55 tlb->mm = mm;
54 tlb->fullmm = full_mm_flush; 56 tlb->start = start;
57 tlb->end = end;
58 tlb->fullmm = !(start | (end+1));
55 tlb->batch = NULL; 59 tlb->batch = NULL;
56 if (tlb->fullmm) 60 if (tlb->fullmm)
57 __tlb_flush_mm(mm); 61 __tlb_flush_mm(mm);
@@ -59,13 +63,14 @@ static inline void tlb_gather_mmu(struct mmu_gather *tlb,
59 63
60static inline void tlb_flush_mmu(struct mmu_gather *tlb) 64static inline void tlb_flush_mmu(struct mmu_gather *tlb)
61{ 65{
66 __tlb_flush_mm_lazy(tlb->mm);
62 tlb_table_flush(tlb); 67 tlb_table_flush(tlb);
63} 68}
64 69
65static inline void tlb_finish_mmu(struct mmu_gather *tlb, 70static inline void tlb_finish_mmu(struct mmu_gather *tlb,
66 unsigned long start, unsigned long end) 71 unsigned long start, unsigned long end)
67{ 72{
68 tlb_table_flush(tlb); 73 tlb_flush_mmu(tlb);
69} 74}
70 75
71/* 76/*
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 6b32af30878c..f9fef0425fee 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -86,7 +86,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
86 __tlb_flush_full(mm); 86 __tlb_flush_full(mm);
87} 87}
88 88
89static inline void __tlb_flush_mm_cond(struct mm_struct * mm) 89static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
90{ 90{
91 if (mm->context.flush_mm) { 91 if (mm->context.flush_mm) {
92 __tlb_flush_mm(mm); 92 __tlb_flush_mm(mm);
@@ -118,13 +118,13 @@ static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
118 118
119static inline void flush_tlb_mm(struct mm_struct *mm) 119static inline void flush_tlb_mm(struct mm_struct *mm)
120{ 120{
121 __tlb_flush_mm_cond(mm); 121 __tlb_flush_mm_lazy(mm);
122} 122}
123 123
124static inline void flush_tlb_range(struct vm_area_struct *vma, 124static inline void flush_tlb_range(struct vm_area_struct *vma,
125 unsigned long start, unsigned long end) 125 unsigned long start, unsigned long end)
126{ 126{
127 __tlb_flush_mm_cond(vma->vm_mm); 127 __tlb_flush_mm_lazy(vma->vm_mm);
128} 128}
129 129
130static inline void flush_tlb_kernel_range(unsigned long start, 130static inline void flush_tlb_kernel_range(unsigned long start,
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
new file mode 100644
index 000000000000..af9896c53eb3
--- /dev/null
+++ b/arch/s390/include/asm/vtime.h
@@ -0,0 +1,7 @@
1#ifndef _S390_VTIME_H
2#define _S390_VTIME_H
3
4#define __ARCH_HAS_VTIME_ACCOUNT
5#define __ARCH_HAS_VTIME_TASK_SWITCH
6
7#endif /* _S390_VTIME_H */
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index be7a408be7a1..cc30d1fb000c 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -18,6 +18,7 @@
18#include <asm/unistd.h> 18#include <asm/unistd.h>
19#include <asm/page.h> 19#include <asm/page.h>
20#include <asm/sigp.h> 20#include <asm/sigp.h>
21#include <asm/irq.h>
21 22
22__PT_R0 = __PT_GPRS 23__PT_R0 = __PT_GPRS
23__PT_R1 = __PT_GPRS + 4 24__PT_R1 = __PT_GPRS + 4
@@ -435,6 +436,11 @@ io_skip:
435io_loop: 436io_loop:
436 l %r1,BASED(.Ldo_IRQ) 437 l %r1,BASED(.Ldo_IRQ)
437 lr %r2,%r11 # pass pointer to pt_regs 438 lr %r2,%r11 # pass pointer to pt_regs
439 lhi %r3,IO_INTERRUPT
440 tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ?
441 jz io_call
442 lhi %r3,THIN_INTERRUPT
443io_call:
438 basr %r14,%r1 # call do_IRQ 444 basr %r14,%r1 # call do_IRQ
439 tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR 445 tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR
440 jz io_return 446 jz io_return
@@ -584,9 +590,10 @@ ext_skip:
584 mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR 590 mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
585 mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS 591 mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
586 TRACE_IRQS_OFF 592 TRACE_IRQS_OFF
593 l %r1,BASED(.Ldo_IRQ)
587 lr %r2,%r11 # pass pointer to pt_regs 594 lr %r2,%r11 # pass pointer to pt_regs
588 l %r1,BASED(.Ldo_extint) 595 lhi %r3,EXT_INTERRUPT
589 basr %r14,%r1 # call do_extint 596 basr %r14,%r1 # call do_IRQ
590 j io_return 597 j io_return
591 598
592/* 599/*
@@ -879,13 +886,13 @@ cleanup_idle:
879 stm %r9,%r10,__LC_SYSTEM_TIMER 886 stm %r9,%r10,__LC_SYSTEM_TIMER
880 mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) 887 mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
881 # prepare return psw 888 # prepare return psw
882 n %r8,BASED(cleanup_idle_wait) # clear wait state bit 889 n %r8,BASED(cleanup_idle_wait) # clear irq & wait state bits
883 l %r9,24(%r11) # return from psw_idle 890 l %r9,24(%r11) # return from psw_idle
884 br %r14 891 br %r14
885cleanup_idle_insn: 892cleanup_idle_insn:
886 .long psw_idle_lpsw + 0x80000000 893 .long psw_idle_lpsw + 0x80000000
887cleanup_idle_wait: 894cleanup_idle_wait:
888 .long 0xfffdffff 895 .long 0xfcfdffff
889 896
890/* 897/*
891 * Integer constants 898 * Integer constants
@@ -902,7 +909,6 @@ cleanup_idle_wait:
902.Ldo_machine_check: .long s390_do_machine_check 909.Ldo_machine_check: .long s390_do_machine_check
903.Lhandle_mcck: .long s390_handle_mcck 910.Lhandle_mcck: .long s390_handle_mcck
904.Ldo_IRQ: .long do_IRQ 911.Ldo_IRQ: .long do_IRQ
905.Ldo_extint: .long do_extint
906.Ldo_signal: .long do_signal 912.Ldo_signal: .long do_signal
907.Ldo_notify_resume: .long do_notify_resume 913.Ldo_notify_resume: .long do_notify_resume
908.Ldo_per_trap: .long do_per_trap 914.Ldo_per_trap: .long do_per_trap
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 1c039d0c24c7..2b2188b97c6a 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -19,6 +19,7 @@
19#include <asm/unistd.h> 19#include <asm/unistd.h>
20#include <asm/page.h> 20#include <asm/page.h>
21#include <asm/sigp.h> 21#include <asm/sigp.h>
22#include <asm/irq.h>
22 23
23__PT_R0 = __PT_GPRS 24__PT_R0 = __PT_GPRS
24__PT_R1 = __PT_GPRS + 8 25__PT_R1 = __PT_GPRS + 8
@@ -468,6 +469,11 @@ io_skip:
468 xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) 469 xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
469io_loop: 470io_loop:
470 lgr %r2,%r11 # pass pointer to pt_regs 471 lgr %r2,%r11 # pass pointer to pt_regs
472 lghi %r3,IO_INTERRUPT
473 tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ?
474 jz io_call
475 lghi %r3,THIN_INTERRUPT
476io_call:
471 brasl %r14,do_IRQ 477 brasl %r14,do_IRQ
472 tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR 478 tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR
473 jz io_return 479 jz io_return
@@ -623,7 +629,8 @@ ext_skip:
623 TRACE_IRQS_OFF 629 TRACE_IRQS_OFF
624 xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) 630 xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
625 lgr %r2,%r11 # pass pointer to pt_regs 631 lgr %r2,%r11 # pass pointer to pt_regs
626 brasl %r14,do_extint 632 lghi %r3,EXT_INTERRUPT
633 brasl %r14,do_IRQ
627 j io_return 634 j io_return
628 635
629/* 636/*
@@ -922,7 +929,7 @@ cleanup_idle:
922 stg %r9,__LC_SYSTEM_TIMER 929 stg %r9,__LC_SYSTEM_TIMER
923 mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) 930 mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
924 # prepare return psw 931 # prepare return psw
925 nihh %r8,0xfffd # clear wait state bit 932 nihh %r8,0xfcfd # clear irq & wait state bits
926 lg %r9,48(%r11) # return from psw_idle 933 lg %r9,48(%r11) # return from psw_idle
927 br %r14 934 br %r14
928cleanup_idle_insn: 935cleanup_idle_insn:
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 54b0995514e8..b34ba0ea96a9 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -22,6 +22,7 @@
22#include <asm/cputime.h> 22#include <asm/cputime.h>
23#include <asm/lowcore.h> 23#include <asm/lowcore.h>
24#include <asm/irq.h> 24#include <asm/irq.h>
25#include <asm/hw_irq.h>
25#include "entry.h" 26#include "entry.h"
26 27
27DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); 28DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
@@ -42,9 +43,10 @@ struct irq_class {
42 * Since the external and I/O interrupt fields are already sums we would end 43 * Since the external and I/O interrupt fields are already sums we would end
43 * up with having a sum which accounts each interrupt twice. 44 * up with having a sum which accounts each interrupt twice.
44 */ 45 */
45static const struct irq_class irqclass_main_desc[NR_IRQS] = { 46static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
46 [EXTERNAL_INTERRUPT] = {.name = "EXT"}, 47 [EXT_INTERRUPT] = {.name = "EXT"},
47 [IO_INTERRUPT] = {.name = "I/O"} 48 [IO_INTERRUPT] = {.name = "I/O"},
49 [THIN_INTERRUPT] = {.name = "AIO"},
48}; 50};
49 51
50/* 52/*
@@ -86,6 +88,28 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
86 [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, 88 [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"},
87}; 89};
88 90
91void __init init_IRQ(void)
92{
93 irq_reserve_irqs(0, THIN_INTERRUPT);
94 init_cio_interrupts();
95 init_airq_interrupts();
96 init_ext_interrupts();
97}
98
99void do_IRQ(struct pt_regs *regs, int irq)
100{
101 struct pt_regs *old_regs;
102
103 old_regs = set_irq_regs(regs);
104 irq_enter();
105 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
106 /* Serve timer interrupts first. */
107 clock_comparator_work();
108 generic_handle_irq(irq);
109 irq_exit();
110 set_irq_regs(old_regs);
111}
112
89/* 113/*
90 * show_interrupts is needed by /proc/interrupts. 114 * show_interrupts is needed by /proc/interrupts.
91 */ 115 */
@@ -100,27 +124,36 @@ int show_interrupts(struct seq_file *p, void *v)
100 for_each_online_cpu(cpu) 124 for_each_online_cpu(cpu)
101 seq_printf(p, "CPU%d ", cpu); 125 seq_printf(p, "CPU%d ", cpu);
102 seq_putc(p, '\n'); 126 seq_putc(p, '\n');
127 goto out;
103 } 128 }
104 if (irq < NR_IRQS) { 129 if (irq < NR_IRQS) {
130 if (irq >= NR_IRQS_BASE)
131 goto out;
105 seq_printf(p, "%s: ", irqclass_main_desc[irq].name); 132 seq_printf(p, "%s: ", irqclass_main_desc[irq].name);
106 for_each_online_cpu(cpu) 133 for_each_online_cpu(cpu)
107 seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[irq]); 134 seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
108 seq_putc(p, '\n'); 135 seq_putc(p, '\n');
109 goto skip_arch_irqs; 136 goto out;
110 } 137 }
111 for (irq = 0; irq < NR_ARCH_IRQS; irq++) { 138 for (irq = 0; irq < NR_ARCH_IRQS; irq++) {
112 seq_printf(p, "%s: ", irqclass_sub_desc[irq].name); 139 seq_printf(p, "%s: ", irqclass_sub_desc[irq].name);
113 for_each_online_cpu(cpu) 140 for_each_online_cpu(cpu)
114 seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]); 141 seq_printf(p, "%10u ",
142 per_cpu(irq_stat, cpu).irqs[irq]);
115 if (irqclass_sub_desc[irq].desc) 143 if (irqclass_sub_desc[irq].desc)
116 seq_printf(p, " %s", irqclass_sub_desc[irq].desc); 144 seq_printf(p, " %s", irqclass_sub_desc[irq].desc);
117 seq_putc(p, '\n'); 145 seq_putc(p, '\n');
118 } 146 }
119skip_arch_irqs: 147out:
120 put_online_cpus(); 148 put_online_cpus();
121 return 0; 149 return 0;
122} 150}
123 151
152int arch_show_interrupts(struct seq_file *p, int prec)
153{
154 return 0;
155}
156
124/* 157/*
125 * Switch to the asynchronous interrupt stack for softirq execution. 158 * Switch to the asynchronous interrupt stack for softirq execution.
126 */ 159 */
@@ -159,14 +192,6 @@ asmlinkage void do_softirq(void)
159 local_irq_restore(flags); 192 local_irq_restore(flags);
160} 193}
161 194
162#ifdef CONFIG_PROC_FS
163void init_irq_proc(void)
164{
165 if (proc_mkdir("irq", NULL))
166 create_prof_cpu_mask();
167}
168#endif
169
170/* 195/*
171 * ext_int_hash[index] is the list head for all external interrupts that hash 196 * ext_int_hash[index] is the list head for all external interrupts that hash
172 * to this index. 197 * to this index.
@@ -183,14 +208,6 @@ struct ext_int_info {
183/* ext_int_hash_lock protects the handler lists for external interrupts */ 208/* ext_int_hash_lock protects the handler lists for external interrupts */
184DEFINE_SPINLOCK(ext_int_hash_lock); 209DEFINE_SPINLOCK(ext_int_hash_lock);
185 210
186static void __init init_external_interrupts(void)
187{
188 int idx;
189
190 for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
191 INIT_LIST_HEAD(&ext_int_hash[idx]);
192}
193
194static inline int ext_hash(u16 code) 211static inline int ext_hash(u16 code)
195{ 212{
196 return (code + (code >> 9)) & 0xff; 213 return (code + (code >> 9)) & 0xff;
@@ -234,20 +251,13 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler)
234} 251}
235EXPORT_SYMBOL(unregister_external_interrupt); 252EXPORT_SYMBOL(unregister_external_interrupt);
236 253
237void __irq_entry do_extint(struct pt_regs *regs) 254static irqreturn_t do_ext_interrupt(int irq, void *dummy)
238{ 255{
256 struct pt_regs *regs = get_irq_regs();
239 struct ext_code ext_code; 257 struct ext_code ext_code;
240 struct pt_regs *old_regs;
241 struct ext_int_info *p; 258 struct ext_int_info *p;
242 int index; 259 int index;
243 260
244 old_regs = set_irq_regs(regs);
245 irq_enter();
246 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) {
247 /* Serve timer interrupts first. */
248 clock_comparator_work();
249 }
250 kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL);
251 ext_code = *(struct ext_code *) &regs->int_code; 261 ext_code = *(struct ext_code *) &regs->int_code;
252 if (ext_code.code != 0x1004) 262 if (ext_code.code != 0x1004)
253 __get_cpu_var(s390_idle).nohz_delay = 1; 263 __get_cpu_var(s390_idle).nohz_delay = 1;
@@ -259,13 +269,25 @@ void __irq_entry do_extint(struct pt_regs *regs)
259 p->handler(ext_code, regs->int_parm, 269 p->handler(ext_code, regs->int_parm,
260 regs->int_parm_long); 270 regs->int_parm_long);
261 rcu_read_unlock(); 271 rcu_read_unlock();
262 irq_exit(); 272
263 set_irq_regs(old_regs); 273 return IRQ_HANDLED;
264} 274}
265 275
266void __init init_IRQ(void) 276static struct irqaction external_interrupt = {
277 .name = "EXT",
278 .handler = do_ext_interrupt,
279};
280
281void __init init_ext_interrupts(void)
267{ 282{
268 init_external_interrupts(); 283 int idx;
284
285 for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
286 INIT_LIST_HEAD(&ext_int_hash[idx]);
287
288 irq_set_chip_and_handler(EXT_INTERRUPT,
289 &dummy_irq_chip, handle_percpu_irq);
290 setup_irq(EXT_INTERRUPT, &external_interrupt);
269} 291}
270 292
271static DEFINE_SPINLOCK(sc_irq_lock); 293static DEFINE_SPINLOCK(sc_irq_lock);
@@ -313,69 +335,3 @@ void measurement_alert_subclass_unregister(void)
313 spin_unlock(&ma_subclass_lock); 335 spin_unlock(&ma_subclass_lock);
314} 336}
315EXPORT_SYMBOL(measurement_alert_subclass_unregister); 337EXPORT_SYMBOL(measurement_alert_subclass_unregister);
316
317#ifdef CONFIG_SMP
318void synchronize_irq(unsigned int irq)
319{
320 /*
321 * Not needed, the handler is protected by a lock and IRQs that occur
322 * after the handler is deleted are just NOPs.
323 */
324}
325EXPORT_SYMBOL_GPL(synchronize_irq);
326#endif
327
328#ifndef CONFIG_PCI
329
330/* Only PCI devices have dynamically-defined IRQ handlers */
331
332int request_irq(unsigned int irq, irq_handler_t handler,
333 unsigned long irqflags, const char *devname, void *dev_id)
334{
335 return -EINVAL;
336}
337EXPORT_SYMBOL_GPL(request_irq);
338
339void free_irq(unsigned int irq, void *dev_id)
340{
341 WARN_ON(1);
342}
343EXPORT_SYMBOL_GPL(free_irq);
344
345void enable_irq(unsigned int irq)
346{
347 WARN_ON(1);
348}
349EXPORT_SYMBOL_GPL(enable_irq);
350
351void disable_irq(unsigned int irq)
352{
353 WARN_ON(1);
354}
355EXPORT_SYMBOL_GPL(disable_irq);
356
357#endif /* !CONFIG_PCI */
358
359void disable_irq_nosync(unsigned int irq)
360{
361 disable_irq(irq);
362}
363EXPORT_SYMBOL_GPL(disable_irq_nosync);
364
365unsigned long probe_irq_on(void)
366{
367 return 0;
368}
369EXPORT_SYMBOL_GPL(probe_irq_on);
370
371int probe_irq_off(unsigned long val)
372{
373 return 0;
374}
375EXPORT_SYMBOL_GPL(probe_irq_off);
376
377unsigned int probe_irq_mask(unsigned long val)
378{
379 return val;
380}
381EXPORT_SYMBOL_GPL(probe_irq_mask);
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 3388b2b2a07d..adbbe7f1cb0d 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -105,14 +105,31 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
105 fixup |= FIXUP_RETURN_REGISTER; 105 fixup |= FIXUP_RETURN_REGISTER;
106 break; 106 break;
107 case 0xeb: 107 case 0xeb:
108 if ((insn[2] & 0xff) == 0x44 || /* bxhg */ 108 switch (insn[2] & 0xff) {
109 (insn[2] & 0xff) == 0x45) /* bxleg */ 109 case 0x44: /* bxhg */
110 case 0x45: /* bxleg */
110 fixup = FIXUP_BRANCH_NOT_TAKEN; 111 fixup = FIXUP_BRANCH_NOT_TAKEN;
112 break;
113 }
111 break; 114 break;
112 case 0xe3: /* bctg */ 115 case 0xe3: /* bctg */
113 if ((insn[2] & 0xff) == 0x46) 116 if ((insn[2] & 0xff) == 0x46)
114 fixup = FIXUP_BRANCH_NOT_TAKEN; 117 fixup = FIXUP_BRANCH_NOT_TAKEN;
115 break; 118 break;
119 case 0xec:
120 switch (insn[2] & 0xff) {
121 case 0xe5: /* clgrb */
122 case 0xe6: /* cgrb */
123 case 0xf6: /* crb */
124 case 0xf7: /* clrb */
125 case 0xfc: /* cgib */
126 case 0xfd: /* cglib */
127 case 0xfe: /* cib */
128 case 0xff: /* clib */
129 fixup = FIXUP_BRANCH_NOT_TAKEN;
130 break;
131 }
132 break;
116 } 133 }
117 return fixup; 134 return fixup;
118} 135}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 504175ebf8b0..c4c033819879 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -214,10 +214,7 @@ static int notrace s390_revalidate_registers(struct mci *mci)
214 : "0", "cc"); 214 : "0", "cc");
215#endif 215#endif
216 /* Revalidate clock comparator register */ 216 /* Revalidate clock comparator register */
217 if (S390_lowcore.clock_comparator == -1) 217 set_clock_comparator(S390_lowcore.clock_comparator);
218 set_clock_comparator(S390_lowcore.mcck_clock);
219 else
220 set_clock_comparator(S390_lowcore.clock_comparator);
221 /* Check if old PSW is valid */ 218 /* Check if old PSW is valid */
222 if (!mci->wp) 219 if (!mci->wp)
223 /* 220 /*
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 2bc3eddae34a..c5dbb335716d 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -71,6 +71,7 @@ void arch_cpu_idle(void)
71 } 71 }
72 /* Halt the cpu and keep track of cpu time accounting. */ 72 /* Halt the cpu and keep track of cpu time accounting. */
73 vtime_stop_cpu(); 73 vtime_stop_cpu();
74 local_irq_enable();
74} 75}
75 76
76void arch_cpu_idle_exit(void) 77void arch_cpu_idle_exit(void)
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index e9fadb04e3c6..9556905bd3ce 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -60,11 +60,11 @@ void update_cr_regs(struct task_struct *task)
60 60
61 __ctl_store(cr, 0, 2); 61 __ctl_store(cr, 0, 2);
62 cr_new[1] = cr[1]; 62 cr_new[1] = cr[1];
63 /* Set or clear transaction execution TXC/PIFO bits 8 and 9. */ 63 /* Set or clear transaction execution TXC bit 8. */
64 if (task->thread.per_flags & PER_FLAG_NO_TE) 64 if (task->thread.per_flags & PER_FLAG_NO_TE)
65 cr_new[0] = cr[0] & ~(3UL << 54); 65 cr_new[0] = cr[0] & ~(1UL << 55);
66 else 66 else
67 cr_new[0] = cr[0] | (3UL << 54); 67 cr_new[0] = cr[0] | (1UL << 55);
68 /* Set or clear transaction execution TDC bits 62 and 63. */ 68 /* Set or clear transaction execution TDC bits 62 and 63. */
69 cr_new[2] = cr[2] & ~3UL; 69 cr_new[2] = cr[2] & ~3UL;
70 if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { 70 if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
@@ -1299,7 +1299,7 @@ int regs_query_register_offset(const char *name)
1299 1299
1300 if (!name || *name != 'r') 1300 if (!name || *name != 'r')
1301 return -EINVAL; 1301 return -EINVAL;
1302 if (strict_strtoul(name + 1, 10, &offset)) 1302 if (kstrtoul(name + 1, 10, &offset))
1303 return -EINVAL; 1303 return -EINVAL;
1304 if (offset >= NUM_GPRS) 1304 if (offset >= NUM_GPRS)
1305 return -EINVAL; 1305 return -EINVAL;
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index c479d2f9605b..737bff38e3ee 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -10,6 +10,9 @@
10#include <linux/suspend.h> 10#include <linux/suspend.h>
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <asm/ctl_reg.h> 12#include <asm/ctl_reg.h>
13#include <asm/ipl.h>
14#include <asm/cio.h>
15#include <asm/pci.h>
13 16
14/* 17/*
15 * References to section boundaries 18 * References to section boundaries
@@ -211,3 +214,11 @@ void restore_processor_state(void)
211 __ctl_set_bit(0,28); 214 __ctl_set_bit(0,28);
212 local_mcck_enable(); 215 local_mcck_enable();
213} 216}
217
218/* Called at the end of swsusp_arch_resume */
219void s390_early_resume(void)
220{
221 lgr_info_log();
222 channel_subsystem_reinit();
223 zpci_rescan();
224}
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
index c487be4cfc81..6b09fdffbd2f 100644
--- a/arch/s390/kernel/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -281,11 +281,8 @@ restore_registers:
281 lghi %r2,0 281 lghi %r2,0
282 brasl %r14,arch_set_page_states 282 brasl %r14,arch_set_page_states
283 283
284 /* Log potential guest relocation */ 284 /* Call arch specific early resume code */
285 brasl %r14,lgr_info_log 285 brasl %r14,s390_early_resume
286
287 /* Reinitialize the channel subsystem */
288 brasl %r14,channel_subsystem_reinit
289 286
290 /* Return 0 */ 287 /* Return 0 */
291 lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) 288 lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 876546b9cfa1..064c3082ab33 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -92,7 +92,6 @@ void clock_comparator_work(void)
92 struct clock_event_device *cd; 92 struct clock_event_device *cd;
93 93
94 S390_lowcore.clock_comparator = -1ULL; 94 S390_lowcore.clock_comparator = -1ULL;
95 set_clock_comparator(S390_lowcore.clock_comparator);
96 cd = &__get_cpu_var(comparators); 95 cd = &__get_cpu_var(comparators);
97 cd->event_handler(cd); 96 cd->event_handler(cd);
98} 97}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index d7776281cb60..05d75c413137 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -63,7 +63,7 @@ static int __init vdso_setup(char *s)
63 else if (strncmp(s, "off", 4) == 0) 63 else if (strncmp(s, "off", 4) == 0)
64 vdso_enabled = 0; 64 vdso_enabled = 0;
65 else { 65 else {
66 rc = strict_strtoul(s, 0, &val); 66 rc = kstrtoul(s, 0, &val);
67 vdso_enabled = rc ? 0 : !!val; 67 vdso_enabled = rc ? 0 : !!val;
68 } 68 }
69 return !rc; 69 return !rc;
@@ -113,11 +113,11 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore)
113 113
114 clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, 114 clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
115 PAGE_SIZE << SEGMENT_ORDER); 115 PAGE_SIZE << SEGMENT_ORDER);
116 clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY, 116 clear_table((unsigned long *) page_table, _PAGE_INVALID,
117 256*sizeof(unsigned long)); 117 256*sizeof(unsigned long));
118 118
119 *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; 119 *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
120 *(unsigned long *) page_table = _PAGE_RO + page_frame; 120 *(unsigned long *) page_table = _PAGE_PROTECT + page_frame;
121 121
122 psal = (u32 *) (page_table + 256*sizeof(unsigned long)); 122 psal = (u32 *) (page_table + 256*sizeof(unsigned long));
123 aste = psal + 32; 123 aste = psal + 32;
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 9b9c1b78ec67..abcfab55f99b 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -19,6 +19,7 @@
19#include <asm/irq_regs.h> 19#include <asm/irq_regs.h>
20#include <asm/cputime.h> 20#include <asm/cputime.h>
21#include <asm/vtimer.h> 21#include <asm/vtimer.h>
22#include <asm/vtime.h>
22#include <asm/irq.h> 23#include <asm/irq.h>
23#include "entry.h" 24#include "entry.h"
24 25
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 3074475c8ae0..3a74d8af0d69 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -119,12 +119,21 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
119 * The layout is as follows: 119 * The layout is as follows:
120 * - gpr 2 contains the subchannel id (passed as addr) 120 * - gpr 2 contains the subchannel id (passed as addr)
121 * - gpr 3 contains the virtqueue index (passed as datamatch) 121 * - gpr 3 contains the virtqueue index (passed as datamatch)
122 * - gpr 4 contains the index on the bus (optionally)
122 */ 123 */
123 ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, 124 ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
124 vcpu->run->s.regs.gprs[2], 125 vcpu->run->s.regs.gprs[2],
125 8, &vcpu->run->s.regs.gprs[3]); 126 8, &vcpu->run->s.regs.gprs[3],
127 vcpu->run->s.regs.gprs[4]);
126 srcu_read_unlock(&vcpu->kvm->srcu, idx); 128 srcu_read_unlock(&vcpu->kvm->srcu, idx);
127 /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ 129
130 /*
131 * Return cookie in gpr 2, but don't overwrite the register if the
132 * diagnose will be handled by userspace.
133 */
134 if (ret != -EOPNOTSUPP)
135 vcpu->run->s.regs.gprs[2] = ret;
136 /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */
128 return ret < 0 ? ret : 0; 137 return ret < 0 ? ret : 0;
129} 138}
130 139
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 302e0e52b009..99d789e8a018 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -42,9 +42,11 @@ static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
42({ \ 42({ \
43 __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ 43 __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
44 int __mask = sizeof(__typeof__(*(gptr))) - 1; \ 44 int __mask = sizeof(__typeof__(*(gptr))) - 1; \
45 int __ret = PTR_RET((void __force *)__uptr); \ 45 int __ret; \
46 \ 46 \
47 if (!__ret) { \ 47 if (IS_ERR((void __force *)__uptr)) { \
48 __ret = PTR_ERR((void __force *)__uptr); \
49 } else { \
48 BUG_ON((unsigned long)__uptr & __mask); \ 50 BUG_ON((unsigned long)__uptr & __mask); \
49 __ret = get_user(x, __uptr); \ 51 __ret = get_user(x, __uptr); \
50 } \ 52 } \
@@ -55,9 +57,11 @@ static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
55({ \ 57({ \
56 __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\ 58 __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
57 int __mask = sizeof(__typeof__(*(gptr))) - 1; \ 59 int __mask = sizeof(__typeof__(*(gptr))) - 1; \
58 int __ret = PTR_RET((void __force *)__uptr); \ 60 int __ret; \
59 \ 61 \
60 if (!__ret) { \ 62 if (IS_ERR((void __force *)__uptr)) { \
63 __ret = PTR_ERR((void __force *)__uptr); \
64 } else { \
61 BUG_ON((unsigned long)__uptr & __mask); \ 65 BUG_ON((unsigned long)__uptr & __mask); \
62 __ret = put_user(x, __uptr); \ 66 __ret = put_user(x, __uptr); \
63 } \ 67 } \
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 34c1c9a90be2..776dafe918db 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -28,6 +28,7 @@
28#include <asm/pgtable.h> 28#include <asm/pgtable.h>
29#include <asm/nmi.h> 29#include <asm/nmi.h>
30#include <asm/switch_to.h> 30#include <asm/switch_to.h>
31#include <asm/facility.h>
31#include <asm/sclp.h> 32#include <asm/sclp.h>
32#include "kvm-s390.h" 33#include "kvm-s390.h"
33#include "gaccess.h" 34#include "gaccess.h"
@@ -84,9 +85,15 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
84 { NULL } 85 { NULL }
85}; 86};
86 87
87static unsigned long long *facilities; 88unsigned long *vfacilities;
88static struct gmap_notifier gmap_notifier; 89static struct gmap_notifier gmap_notifier;
89 90
91/* test availability of vfacility */
92static inline int test_vfacility(unsigned long nr)
93{
94 return __test_facility(nr, (void *) vfacilities);
95}
96
90/* Section: not file related */ 97/* Section: not file related */
91int kvm_arch_hardware_enable(void *garbage) 98int kvm_arch_hardware_enable(void *garbage)
92{ 99{
@@ -387,7 +394,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
387 vcpu->arch.sie_block->ecb = 6; 394 vcpu->arch.sie_block->ecb = 6;
388 vcpu->arch.sie_block->ecb2 = 8; 395 vcpu->arch.sie_block->ecb2 = 8;
389 vcpu->arch.sie_block->eca = 0xC1002001U; 396 vcpu->arch.sie_block->eca = 0xC1002001U;
390 vcpu->arch.sie_block->fac = (int) (long) facilities; 397 vcpu->arch.sie_block->fac = (int) (long) vfacilities;
391 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 398 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
392 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, 399 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
393 (unsigned long) vcpu); 400 (unsigned long) vcpu);
@@ -1063,6 +1070,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
1063 return 0; 1070 return 0;
1064} 1071}
1065 1072
1073void kvm_arch_memslots_updated(struct kvm *kvm)
1074{
1075}
1076
1066/* Section: memory related */ 1077/* Section: memory related */
1067int kvm_arch_prepare_memory_region(struct kvm *kvm, 1078int kvm_arch_prepare_memory_region(struct kvm *kvm,
1068 struct kvm_memory_slot *memslot, 1079 struct kvm_memory_slot *memslot,
@@ -1129,20 +1140,20 @@ static int __init kvm_s390_init(void)
1129 * to hold the maximum amount of facilities. On the other hand, we 1140 * to hold the maximum amount of facilities. On the other hand, we
1130 * only set facilities that are known to work in KVM. 1141 * only set facilities that are known to work in KVM.
1131 */ 1142 */
1132 facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); 1143 vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
1133 if (!facilities) { 1144 if (!vfacilities) {
1134 kvm_exit(); 1145 kvm_exit();
1135 return -ENOMEM; 1146 return -ENOMEM;
1136 } 1147 }
1137 memcpy(facilities, S390_lowcore.stfle_fac_list, 16); 1148 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
1138 facilities[0] &= 0xff82fff3f47c0000ULL; 1149 vfacilities[0] &= 0xff82fff3f47c0000UL;
1139 facilities[1] &= 0x001c000000000000ULL; 1150 vfacilities[1] &= 0x001c000000000000UL;
1140 return 0; 1151 return 0;
1141} 1152}
1142 1153
1143static void __exit kvm_s390_exit(void) 1154static void __exit kvm_s390_exit(void)
1144{ 1155{
1145 free_page((unsigned long) facilities); 1156 free_page((unsigned long) vfacilities);
1146 kvm_exit(); 1157 kvm_exit();
1147} 1158}
1148 1159
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 028ca9fd2158..dc99f1ca4267 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -24,6 +24,9 @@
24 24
25typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); 25typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
26 26
27/* declare vfacilities extern */
28extern unsigned long *vfacilities;
29
27/* negativ values are error codes, positive values for internal conditions */ 30/* negativ values are error codes, positive values for internal conditions */
28#define SIE_INTERCEPT_RERUNVCPU (1<<0) 31#define SIE_INTERCEPT_RERUNVCPU (1<<0)
29#define SIE_INTERCEPT_UCONTROL (1<<1) 32#define SIE_INTERCEPT_UCONTROL (1<<1)
@@ -112,6 +115,13 @@ static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
112 return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; 115 return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
113} 116}
114 117
118/* Set the condition code in the guest program status word */
119static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
120{
121 vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44);
122 vcpu->arch.sie_block->gpsw.mask |= cc << 44;
123}
124
115int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); 125int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
116enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); 126enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
117void kvm_s390_tasklet(unsigned long parm); 127void kvm_s390_tasklet(unsigned long parm);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 4cdc54e63ebc..59200ee275e5 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -164,8 +164,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
164 kfree(inti); 164 kfree(inti);
165no_interrupt: 165no_interrupt:
166 /* Set condition code and we're done. */ 166 /* Set condition code and we're done. */
167 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); 167 kvm_s390_set_psw_cc(vcpu, cc);
168 vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44;
169 return 0; 168 return 0;
170} 169}
171 170
@@ -220,15 +219,13 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
220 * Set condition code 3 to stop the guest from issueing channel 219 * Set condition code 3 to stop the guest from issueing channel
221 * I/O instructions. 220 * I/O instructions.
222 */ 221 */
223 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); 222 kvm_s390_set_psw_cc(vcpu, 3);
224 vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
225 return 0; 223 return 0;
226 } 224 }
227} 225}
228 226
229static int handle_stfl(struct kvm_vcpu *vcpu) 227static int handle_stfl(struct kvm_vcpu *vcpu)
230{ 228{
231 unsigned int facility_list;
232 int rc; 229 int rc;
233 230
234 vcpu->stat.instruction_stfl++; 231 vcpu->stat.instruction_stfl++;
@@ -236,15 +233,13 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
236 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) 233 if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
237 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 234 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
238 235
239 /* only pass the facility bits, which we can handle */
240 facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3;
241
242 rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), 236 rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
243 &facility_list, sizeof(facility_list)); 237 vfacilities, 4);
244 if (rc) 238 if (rc)
245 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 239 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
246 VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); 240 VCPU_EVENT(vcpu, 5, "store facility list value %x",
247 trace_kvm_s390_handle_stfl(vcpu, facility_list); 241 *(unsigned int *) vfacilities);
242 trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities);
248 return 0; 243 return 0;
249} 244}
250 245
@@ -387,7 +382,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
387 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); 382 return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
388 383
389 if (fc > 3) { 384 if (fc > 3) {
390 vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; /* cc 3 */ 385 kvm_s390_set_psw_cc(vcpu, 3);
391 return 0; 386 return 0;
392 } 387 }
393 388
@@ -397,7 +392,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
397 392
398 if (fc == 0) { 393 if (fc == 0) {
399 vcpu->run->s.regs.gprs[0] = 3 << 28; 394 vcpu->run->s.regs.gprs[0] = 3 << 28;
400 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); /* cc 0 */ 395 kvm_s390_set_psw_cc(vcpu, 0);
401 return 0; 396 return 0;
402 } 397 }
403 398
@@ -431,12 +426,11 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
431 } 426 }
432 trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); 427 trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
433 free_page(mem); 428 free_page(mem);
434 vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); 429 kvm_s390_set_psw_cc(vcpu, 0);
435 vcpu->run->s.regs.gprs[0] = 0; 430 vcpu->run->s.regs.gprs[0] = 0;
436 return 0; 431 return 0;
437out_no_data: 432out_no_data:
438 /* condition code 3 */ 433 kvm_s390_set_psw_cc(vcpu, 3);
439 vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
440out_exception: 434out_exception:
441 free_page(mem); 435 free_page(mem);
442 return rc; 436 return rc;
@@ -494,12 +488,12 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
494 kvm_s390_get_regs_rre(vcpu, &reg1, &reg2); 488 kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
495 489
496 /* This basically extracts the mask half of the psw. */ 490 /* This basically extracts the mask half of the psw. */
497 vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; 491 vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
498 vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32; 492 vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
499 if (reg2) { 493 if (reg2) {
500 vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000; 494 vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
501 vcpu->run->s.regs.gprs[reg2] |= 495 vcpu->run->s.regs.gprs[reg2] |=
502 vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff; 496 vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
503 } 497 }
504 return 0; 498 return 0;
505} 499}
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index c61b9fad43cc..57c87d7d7ede 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -44,7 +44,6 @@ static void __udelay_disabled(unsigned long long usecs)
44 do { 44 do {
45 set_clock_comparator(end); 45 set_clock_comparator(end);
46 vtime_stop_cpu(); 46 vtime_stop_cpu();
47 local_irq_disable();
48 } while (get_tod_clock() < end); 47 } while (get_tod_clock() < end);
49 lockdep_on(); 48 lockdep_on();
50 __ctl_load(cr0, 0, 0); 49 __ctl_load(cr0, 0, 0);
@@ -64,7 +63,6 @@ static void __udelay_enabled(unsigned long long usecs)
64 set_clock_comparator(end); 63 set_clock_comparator(end);
65 } 64 }
66 vtime_stop_cpu(); 65 vtime_stop_cpu();
67 local_irq_disable();
68 if (clock_saved) 66 if (clock_saved)
69 local_tick_enable(clock_saved); 67 local_tick_enable(clock_saved);
70 } while (get_tod_clock() < end); 68 } while (get_tod_clock() < end);
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 50ea137a2d3c..1694d738b175 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -86,28 +86,28 @@ static unsigned long follow_table(struct mm_struct *mm,
86 switch (mm->context.asce_bits & _ASCE_TYPE_MASK) { 86 switch (mm->context.asce_bits & _ASCE_TYPE_MASK) {
87 case _ASCE_TYPE_REGION1: 87 case _ASCE_TYPE_REGION1:
88 table = table + ((address >> 53) & 0x7ff); 88 table = table + ((address >> 53) & 0x7ff);
89 if (unlikely(*table & _REGION_ENTRY_INV)) 89 if (unlikely(*table & _REGION_ENTRY_INVALID))
90 return -0x39UL; 90 return -0x39UL;
91 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 91 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
92 /* fallthrough */ 92 /* fallthrough */
93 case _ASCE_TYPE_REGION2: 93 case _ASCE_TYPE_REGION2:
94 table = table + ((address >> 42) & 0x7ff); 94 table = table + ((address >> 42) & 0x7ff);
95 if (unlikely(*table & _REGION_ENTRY_INV)) 95 if (unlikely(*table & _REGION_ENTRY_INVALID))
96 return -0x3aUL; 96 return -0x3aUL;
97 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 97 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
98 /* fallthrough */ 98 /* fallthrough */
99 case _ASCE_TYPE_REGION3: 99 case _ASCE_TYPE_REGION3:
100 table = table + ((address >> 31) & 0x7ff); 100 table = table + ((address >> 31) & 0x7ff);
101 if (unlikely(*table & _REGION_ENTRY_INV)) 101 if (unlikely(*table & _REGION_ENTRY_INVALID))
102 return -0x3bUL; 102 return -0x3bUL;
103 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 103 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
104 /* fallthrough */ 104 /* fallthrough */
105 case _ASCE_TYPE_SEGMENT: 105 case _ASCE_TYPE_SEGMENT:
106 table = table + ((address >> 20) & 0x7ff); 106 table = table + ((address >> 20) & 0x7ff);
107 if (unlikely(*table & _SEGMENT_ENTRY_INV)) 107 if (unlikely(*table & _SEGMENT_ENTRY_INVALID))
108 return -0x10UL; 108 return -0x10UL;
109 if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) { 109 if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) {
110 if (write && (*table & _SEGMENT_ENTRY_RO)) 110 if (write && (*table & _SEGMENT_ENTRY_PROTECT))
111 return -0x04UL; 111 return -0x04UL;
112 return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) + 112 return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) +
113 (address & ~_SEGMENT_ENTRY_ORIGIN_LARGE); 113 (address & ~_SEGMENT_ENTRY_ORIGIN_LARGE);
@@ -117,7 +117,7 @@ static unsigned long follow_table(struct mm_struct *mm,
117 table = table + ((address >> 12) & 0xff); 117 table = table + ((address >> 12) & 0xff);
118 if (unlikely(*table & _PAGE_INVALID)) 118 if (unlikely(*table & _PAGE_INVALID))
119 return -0x11UL; 119 return -0x11UL;
120 if (write && (*table & _PAGE_RO)) 120 if (write && (*table & _PAGE_PROTECT))
121 return -0x04UL; 121 return -0x04UL;
122 return (*table & PAGE_MASK) + (address & ~PAGE_MASK); 122 return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
123} 123}
@@ -130,13 +130,13 @@ static unsigned long follow_table(struct mm_struct *mm,
130 unsigned long *table = (unsigned long *)__pa(mm->pgd); 130 unsigned long *table = (unsigned long *)__pa(mm->pgd);
131 131
132 table = table + ((address >> 20) & 0x7ff); 132 table = table + ((address >> 20) & 0x7ff);
133 if (unlikely(*table & _SEGMENT_ENTRY_INV)) 133 if (unlikely(*table & _SEGMENT_ENTRY_INVALID))
134 return -0x10UL; 134 return -0x10UL;
135 table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); 135 table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
136 table = table + ((address >> 12) & 0xff); 136 table = table + ((address >> 12) & 0xff);
137 if (unlikely(*table & _PAGE_INVALID)) 137 if (unlikely(*table & _PAGE_INVALID))
138 return -0x11UL; 138 return -0x11UL;
139 if (write && (*table & _PAGE_RO)) 139 if (write && (*table & _PAGE_PROTECT))
140 return -0x04UL; 140 return -0x04UL;
141 return (*table & PAGE_MASK) + (address & ~PAGE_MASK); 141 return (*table & PAGE_MASK) + (address & ~PAGE_MASK);
142} 142}
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 3ad65b04ac15..46d517c3c763 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -53,7 +53,7 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level)
53 seq_printf(m, "I\n"); 53 seq_printf(m, "I\n");
54 return; 54 return;
55 } 55 }
56 seq_printf(m, "%s", pr & _PAGE_RO ? "RO " : "RW "); 56 seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW ");
57 seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " "); 57 seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " ");
58 seq_putc(m, '\n'); 58 seq_putc(m, '\n');
59} 59}
@@ -105,12 +105,12 @@ static void note_page(struct seq_file *m, struct pg_state *st,
105} 105}
106 106
107/* 107/*
108 * The actual page table walker functions. In order to keep the implementation 108 * The actual page table walker functions. In order to keep the
109 * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO 109 * implementation of print_prot() short, we only check and pass
110 * flags to note_page() if a region, segment or page table entry is invalid or 110 * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
111 * read-only. 111 * segment or page table entry is invalid or read-only.
112 * After all it's just a hint that the current level being walked contains an 112 * After all it's just a hint that the current level being walked
113 * invalid or read-only entry. 113 * contains an invalid or read-only entry.
114 */ 114 */
115static void walk_pte_level(struct seq_file *m, struct pg_state *st, 115static void walk_pte_level(struct seq_file *m, struct pg_state *st,
116 pmd_t *pmd, unsigned long addr) 116 pmd_t *pmd, unsigned long addr)
@@ -122,14 +122,14 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st,
122 for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { 122 for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
123 st->current_address = addr; 123 st->current_address = addr;
124 pte = pte_offset_kernel(pmd, addr); 124 pte = pte_offset_kernel(pmd, addr);
125 prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID); 125 prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID);
126 note_page(m, st, prot, 4); 126 note_page(m, st, prot, 4);
127 addr += PAGE_SIZE; 127 addr += PAGE_SIZE;
128 } 128 }
129} 129}
130 130
131#ifdef CONFIG_64BIT 131#ifdef CONFIG_64BIT
132#define _PMD_PROT_MASK (_SEGMENT_ENTRY_RO | _SEGMENT_ENTRY_CO) 132#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO)
133#else 133#else
134#define _PMD_PROT_MASK 0 134#define _PMD_PROT_MASK 0
135#endif 135#endif
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 1f5315d1215c..5d758db27bdc 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -24,7 +24,7 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
24 pte_t *ptep, pte; 24 pte_t *ptep, pte;
25 struct page *page; 25 struct page *page;
26 26
27 mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL; 27 mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
28 28
29 ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); 29 ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
30 do { 30 do {
@@ -55,8 +55,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
55 struct page *head, *page, *tail; 55 struct page *head, *page, *tail;
56 int refs; 56 int refs;
57 57
58 result = write ? 0 : _SEGMENT_ENTRY_RO; 58 result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
59 mask = result | _SEGMENT_ENTRY_INV; 59 mask = result | _SEGMENT_ENTRY_INVALID;
60 if ((pmd_val(pmd) & mask) != result) 60 if ((pmd_val(pmd) & mask) != result)
61 return 0; 61 return 0;
62 VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); 62 VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 121089d57802..248445f92604 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -8,21 +8,127 @@
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/hugetlb.h> 9#include <linux/hugetlb.h>
10 10
11static inline pmd_t __pte_to_pmd(pte_t pte)
12{
13 int none, young, prot;
14 pmd_t pmd;
15
16 /*
17 * Convert encoding pte bits pmd bits
18 * .IR...wrdytp ..R...I...y.
19 * empty .10...000000 -> ..0...1...0.
20 * prot-none, clean, old .11...000001 -> ..0...1...1.
21 * prot-none, clean, young .11...000101 -> ..1...1...1.
22 * prot-none, dirty, old .10...001001 -> ..0...1...1.
23 * prot-none, dirty, young .10...001101 -> ..1...1...1.
24 * read-only, clean, old .11...010001 -> ..1...1...0.
25 * read-only, clean, young .01...010101 -> ..1...0...1.
26 * read-only, dirty, old .11...011001 -> ..1...1...0.
27 * read-only, dirty, young .01...011101 -> ..1...0...1.
28 * read-write, clean, old .11...110001 -> ..0...1...0.
29 * read-write, clean, young .01...110101 -> ..0...0...1.
30 * read-write, dirty, old .10...111001 -> ..0...1...0.
31 * read-write, dirty, young .00...111101 -> ..0...0...1.
32 * Huge ptes are dirty by definition, a clean pte is made dirty
33 * by the conversion.
34 */
35 if (pte_present(pte)) {
36 pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
37 if (pte_val(pte) & _PAGE_INVALID)
38 pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
39 none = (pte_val(pte) & _PAGE_PRESENT) &&
40 !(pte_val(pte) & _PAGE_READ) &&
41 !(pte_val(pte) & _PAGE_WRITE);
42 prot = (pte_val(pte) & _PAGE_PROTECT) &&
43 !(pte_val(pte) & _PAGE_WRITE);
44 young = pte_val(pte) & _PAGE_YOUNG;
45 if (none || young)
46 pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
47 if (prot || (none && young))
48 pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
49 } else
50 pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
51 return pmd;
52}
53
54static inline pte_t __pmd_to_pte(pmd_t pmd)
55{
56 pte_t pte;
57
58 /*
59 * Convert encoding pmd bits pte bits
60 * ..R...I...y. .IR...wrdytp
61 * empty ..0...1...0. -> .10...000000
62 * prot-none, old ..0...1...1. -> .10...001001
63 * prot-none, young ..1...1...1. -> .10...001101
64 * read-only, old ..1...1...0. -> .11...011001
65 * read-only, young ..1...0...1. -> .01...011101
66 * read-write, old ..0...1...0. -> .10...111001
67 * read-write, young ..0...0...1. -> .00...111101
68 * Huge ptes are dirty by definition
69 */
70 if (pmd_present(pmd)) {
71 pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY |
72 (pmd_val(pmd) & PAGE_MASK);
73 if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)
74 pte_val(pte) |= _PAGE_INVALID;
75 if (pmd_prot_none(pmd)) {
76 if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
77 pte_val(pte) |= _PAGE_YOUNG;
78 } else {
79 pte_val(pte) |= _PAGE_READ;
80 if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
81 pte_val(pte) |= _PAGE_PROTECT;
82 else
83 pte_val(pte) |= _PAGE_WRITE;
84 if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)
85 pte_val(pte) |= _PAGE_YOUNG;
86 }
87 } else
88 pte_val(pte) = _PAGE_INVALID;
89 return pte;
90}
11 91
12void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 92void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
13 pte_t *pteptr, pte_t pteval) 93 pte_t *ptep, pte_t pte)
14{ 94{
15 pmd_t *pmdp = (pmd_t *) pteptr; 95 pmd_t pmd;
16 unsigned long mask;
17 96
97 pmd = __pte_to_pmd(pte);
18 if (!MACHINE_HAS_HPAGE) { 98 if (!MACHINE_HAS_HPAGE) {
19 pteptr = (pte_t *) pte_page(pteval)[1].index; 99 pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
20 mask = pte_val(pteval) & 100 pmd_val(pmd) |= pte_page(pte)[1].index;
21 (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); 101 } else
22 pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; 102 pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO;
103 *(pmd_t *) ptep = pmd;
104}
105
106pte_t huge_ptep_get(pte_t *ptep)
107{
108 unsigned long origin;
109 pmd_t pmd;
110
111 pmd = *(pmd_t *) ptep;
112 if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) {
113 origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
114 pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
115 pmd_val(pmd) |= *(unsigned long *) origin;
23 } 116 }
117 return __pmd_to_pte(pmd);
118}
24 119
25 pmd_val(*pmdp) = pte_val(pteval); 120pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
121 unsigned long addr, pte_t *ptep)
122{
123 pmd_t *pmdp = (pmd_t *) ptep;
124 pte_t pte = huge_ptep_get(ptep);
125
126 if (MACHINE_HAS_IDTE)
127 __pmd_idte(addr, pmdp);
128 else
129 __pmd_csp(pmdp);
130 pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
131 return pte;
26} 132}
27 133
28int arch_prepare_hugepage(struct page *page) 134int arch_prepare_hugepage(struct page *page)
@@ -58,7 +164,7 @@ void arch_release_hugepage(struct page *page)
58 ptep = (pte_t *) page[1].index; 164 ptep = (pte_t *) page[1].index;
59 if (!ptep) 165 if (!ptep)
60 return; 166 return;
61 clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY, 167 clear_table((unsigned long *) ptep, _PAGE_INVALID,
62 PTRS_PER_PTE * sizeof(pte_t)); 168 PTRS_PER_PTE * sizeof(pte_t));
63 page_table_free(&init_mm, (unsigned long *) ptep); 169 page_table_free(&init_mm, (unsigned long *) ptep);
64 page[1].index = 0; 170 page[1].index = 0;
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 80adfbf75065..990397420e6b 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -118,7 +118,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
118 pte = pte_offset_kernel(pmd, address); 118 pte = pte_offset_kernel(pmd, address);
119 if (!enable) { 119 if (!enable) {
120 __ptep_ipte(address, pte); 120 __ptep_ipte(address, pte);
121 pte_val(*pte) = _PAGE_TYPE_EMPTY; 121 pte_val(*pte) = _PAGE_INVALID;
122 continue; 122 continue;
123 } 123 }
124 pte_val(*pte) = __pa(address); 124 pte_val(*pte) = __pa(address);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index a8154a1a2c94..bf7c0dc64a76 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -161,7 +161,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
161 struct gmap_rmap *rmap; 161 struct gmap_rmap *rmap;
162 struct page *page; 162 struct page *page;
163 163
164 if (*table & _SEGMENT_ENTRY_INV) 164 if (*table & _SEGMENT_ENTRY_INVALID)
165 return 0; 165 return 0;
166 page = pfn_to_page(*table >> PAGE_SHIFT); 166 page = pfn_to_page(*table >> PAGE_SHIFT);
167 mp = (struct gmap_pgtable *) page->index; 167 mp = (struct gmap_pgtable *) page->index;
@@ -172,7 +172,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
172 kfree(rmap); 172 kfree(rmap);
173 break; 173 break;
174 } 174 }
175 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; 175 *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
176 return 1; 176 return 1;
177} 177}
178 178
@@ -258,7 +258,7 @@ static int gmap_alloc_table(struct gmap *gmap,
258 return -ENOMEM; 258 return -ENOMEM;
259 new = (unsigned long *) page_to_phys(page); 259 new = (unsigned long *) page_to_phys(page);
260 crst_table_init(new, init); 260 crst_table_init(new, init);
261 if (*table & _REGION_ENTRY_INV) { 261 if (*table & _REGION_ENTRY_INVALID) {
262 list_add(&page->lru, &gmap->crst_list); 262 list_add(&page->lru, &gmap->crst_list);
263 *table = (unsigned long) new | _REGION_ENTRY_LENGTH | 263 *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
264 (*table & _REGION_ENTRY_TYPE_MASK); 264 (*table & _REGION_ENTRY_TYPE_MASK);
@@ -292,22 +292,22 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
292 for (off = 0; off < len; off += PMD_SIZE) { 292 for (off = 0; off < len; off += PMD_SIZE) {
293 /* Walk the guest addr space page table */ 293 /* Walk the guest addr space page table */
294 table = gmap->table + (((to + off) >> 53) & 0x7ff); 294 table = gmap->table + (((to + off) >> 53) & 0x7ff);
295 if (*table & _REGION_ENTRY_INV) 295 if (*table & _REGION_ENTRY_INVALID)
296 goto out; 296 goto out;
297 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 297 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
298 table = table + (((to + off) >> 42) & 0x7ff); 298 table = table + (((to + off) >> 42) & 0x7ff);
299 if (*table & _REGION_ENTRY_INV) 299 if (*table & _REGION_ENTRY_INVALID)
300 goto out; 300 goto out;
301 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 301 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
302 table = table + (((to + off) >> 31) & 0x7ff); 302 table = table + (((to + off) >> 31) & 0x7ff);
303 if (*table & _REGION_ENTRY_INV) 303 if (*table & _REGION_ENTRY_INVALID)
304 goto out; 304 goto out;
305 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 305 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
306 table = table + (((to + off) >> 20) & 0x7ff); 306 table = table + (((to + off) >> 20) & 0x7ff);
307 307
308 /* Clear segment table entry in guest address space. */ 308 /* Clear segment table entry in guest address space. */
309 flush |= gmap_unlink_segment(gmap, table); 309 flush |= gmap_unlink_segment(gmap, table);
310 *table = _SEGMENT_ENTRY_INV; 310 *table = _SEGMENT_ENTRY_INVALID;
311 } 311 }
312out: 312out:
313 spin_unlock(&gmap->mm->page_table_lock); 313 spin_unlock(&gmap->mm->page_table_lock);
@@ -335,7 +335,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
335 335
336 if ((from | to | len) & (PMD_SIZE - 1)) 336 if ((from | to | len) & (PMD_SIZE - 1))
337 return -EINVAL; 337 return -EINVAL;
338 if (len == 0 || from + len > PGDIR_SIZE || 338 if (len == 0 || from + len > TASK_MAX_SIZE ||
339 from + len < from || to + len < to) 339 from + len < from || to + len < to)
340 return -EINVAL; 340 return -EINVAL;
341 341
@@ -345,17 +345,17 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
345 for (off = 0; off < len; off += PMD_SIZE) { 345 for (off = 0; off < len; off += PMD_SIZE) {
346 /* Walk the gmap address space page table */ 346 /* Walk the gmap address space page table */
347 table = gmap->table + (((to + off) >> 53) & 0x7ff); 347 table = gmap->table + (((to + off) >> 53) & 0x7ff);
348 if ((*table & _REGION_ENTRY_INV) && 348 if ((*table & _REGION_ENTRY_INVALID) &&
349 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) 349 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
350 goto out_unmap; 350 goto out_unmap;
351 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 351 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
352 table = table + (((to + off) >> 42) & 0x7ff); 352 table = table + (((to + off) >> 42) & 0x7ff);
353 if ((*table & _REGION_ENTRY_INV) && 353 if ((*table & _REGION_ENTRY_INVALID) &&
354 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) 354 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
355 goto out_unmap; 355 goto out_unmap;
356 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 356 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
357 table = table + (((to + off) >> 31) & 0x7ff); 357 table = table + (((to + off) >> 31) & 0x7ff);
358 if ((*table & _REGION_ENTRY_INV) && 358 if ((*table & _REGION_ENTRY_INVALID) &&
359 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) 359 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
360 goto out_unmap; 360 goto out_unmap;
361 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); 361 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
@@ -363,7 +363,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
363 363
364 /* Store 'from' address in an invalid segment table entry. */ 364 /* Store 'from' address in an invalid segment table entry. */
365 flush |= gmap_unlink_segment(gmap, table); 365 flush |= gmap_unlink_segment(gmap, table);
366 *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); 366 *table = (from + off) | (_SEGMENT_ENTRY_INVALID |
367 _SEGMENT_ENTRY_PROTECT);
367 } 368 }
368 spin_unlock(&gmap->mm->page_table_lock); 369 spin_unlock(&gmap->mm->page_table_lock);
369 up_read(&gmap->mm->mmap_sem); 370 up_read(&gmap->mm->mmap_sem);
@@ -384,15 +385,15 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
384 unsigned long *table; 385 unsigned long *table;
385 386
386 table = gmap->table + ((address >> 53) & 0x7ff); 387 table = gmap->table + ((address >> 53) & 0x7ff);
387 if (unlikely(*table & _REGION_ENTRY_INV)) 388 if (unlikely(*table & _REGION_ENTRY_INVALID))
388 return ERR_PTR(-EFAULT); 389 return ERR_PTR(-EFAULT);
389 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 390 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
390 table = table + ((address >> 42) & 0x7ff); 391 table = table + ((address >> 42) & 0x7ff);
391 if (unlikely(*table & _REGION_ENTRY_INV)) 392 if (unlikely(*table & _REGION_ENTRY_INVALID))
392 return ERR_PTR(-EFAULT); 393 return ERR_PTR(-EFAULT);
393 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 394 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
394 table = table + ((address >> 31) & 0x7ff); 395 table = table + ((address >> 31) & 0x7ff);
395 if (unlikely(*table & _REGION_ENTRY_INV)) 396 if (unlikely(*table & _REGION_ENTRY_INVALID))
396 return ERR_PTR(-EFAULT); 397 return ERR_PTR(-EFAULT);
397 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 398 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
398 table = table + ((address >> 20) & 0x7ff); 399 table = table + ((address >> 20) & 0x7ff);
@@ -422,11 +423,11 @@ unsigned long __gmap_translate(unsigned long address, struct gmap *gmap)
422 return PTR_ERR(segment_ptr); 423 return PTR_ERR(segment_ptr);
423 /* Convert the gmap address to an mm address. */ 424 /* Convert the gmap address to an mm address. */
424 segment = *segment_ptr; 425 segment = *segment_ptr;
425 if (!(segment & _SEGMENT_ENTRY_INV)) { 426 if (!(segment & _SEGMENT_ENTRY_INVALID)) {
426 page = pfn_to_page(segment >> PAGE_SHIFT); 427 page = pfn_to_page(segment >> PAGE_SHIFT);
427 mp = (struct gmap_pgtable *) page->index; 428 mp = (struct gmap_pgtable *) page->index;
428 return mp->vmaddr | (address & ~PMD_MASK); 429 return mp->vmaddr | (address & ~PMD_MASK);
429 } else if (segment & _SEGMENT_ENTRY_RO) { 430 } else if (segment & _SEGMENT_ENTRY_PROTECT) {
430 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; 431 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
431 return vmaddr | (address & ~PMD_MASK); 432 return vmaddr | (address & ~PMD_MASK);
432 } 433 }
@@ -517,8 +518,8 @@ static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
517 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 518 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
518 mp = (struct gmap_pgtable *) page->index; 519 mp = (struct gmap_pgtable *) page->index;
519 list_for_each_entry_safe(rmap, next, &mp->mapper, list) { 520 list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
520 *rmap->entry = 521 *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
521 _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; 522 _SEGMENT_ENTRY_PROTECT);
522 list_del(&rmap->list); 523 list_del(&rmap->list);
523 kfree(rmap); 524 kfree(rmap);
524 flush = 1; 525 flush = 1;
@@ -545,13 +546,13 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
545 /* Convert the gmap address to an mm address. */ 546 /* Convert the gmap address to an mm address. */
546 while (1) { 547 while (1) {
547 segment = *segment_ptr; 548 segment = *segment_ptr;
548 if (!(segment & _SEGMENT_ENTRY_INV)) { 549 if (!(segment & _SEGMENT_ENTRY_INVALID)) {
549 /* Page table is present */ 550 /* Page table is present */
550 page = pfn_to_page(segment >> PAGE_SHIFT); 551 page = pfn_to_page(segment >> PAGE_SHIFT);
551 mp = (struct gmap_pgtable *) page->index; 552 mp = (struct gmap_pgtable *) page->index;
552 return mp->vmaddr | (address & ~PMD_MASK); 553 return mp->vmaddr | (address & ~PMD_MASK);
553 } 554 }
554 if (!(segment & _SEGMENT_ENTRY_RO)) 555 if (!(segment & _SEGMENT_ENTRY_PROTECT))
555 /* Nothing mapped in the gmap address space. */ 556 /* Nothing mapped in the gmap address space. */
556 break; 557 break;
557 rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); 558 rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
@@ -586,25 +587,25 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
586 while (address < to) { 587 while (address < to) {
587 /* Walk the gmap address space page table */ 588 /* Walk the gmap address space page table */
588 table = gmap->table + ((address >> 53) & 0x7ff); 589 table = gmap->table + ((address >> 53) & 0x7ff);
589 if (unlikely(*table & _REGION_ENTRY_INV)) { 590 if (unlikely(*table & _REGION_ENTRY_INVALID)) {
590 address = (address + PMD_SIZE) & PMD_MASK; 591 address = (address + PMD_SIZE) & PMD_MASK;
591 continue; 592 continue;
592 } 593 }
593 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 594 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
594 table = table + ((address >> 42) & 0x7ff); 595 table = table + ((address >> 42) & 0x7ff);
595 if (unlikely(*table & _REGION_ENTRY_INV)) { 596 if (unlikely(*table & _REGION_ENTRY_INVALID)) {
596 address = (address + PMD_SIZE) & PMD_MASK; 597 address = (address + PMD_SIZE) & PMD_MASK;
597 continue; 598 continue;
598 } 599 }
599 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 600 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
600 table = table + ((address >> 31) & 0x7ff); 601 table = table + ((address >> 31) & 0x7ff);
601 if (unlikely(*table & _REGION_ENTRY_INV)) { 602 if (unlikely(*table & _REGION_ENTRY_INVALID)) {
602 address = (address + PMD_SIZE) & PMD_MASK; 603 address = (address + PMD_SIZE) & PMD_MASK;
603 continue; 604 continue;
604 } 605 }
605 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 606 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
606 table = table + ((address >> 20) & 0x7ff); 607 table = table + ((address >> 20) & 0x7ff);
607 if (unlikely(*table & _SEGMENT_ENTRY_INV)) { 608 if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
608 address = (address + PMD_SIZE) & PMD_MASK; 609 address = (address + PMD_SIZE) & PMD_MASK;
609 continue; 610 continue;
610 } 611 }
@@ -687,7 +688,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
687 continue; 688 continue;
688 /* Set notification bit in the pgste of the pte */ 689 /* Set notification bit in the pgste of the pte */
689 entry = *ptep; 690 entry = *ptep;
690 if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { 691 if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
691 pgste = pgste_get_lock(ptep); 692 pgste = pgste_get_lock(ptep);
692 pgste_val(pgste) |= PGSTE_IN_BIT; 693 pgste_val(pgste) |= PGSTE_IN_BIT;
693 pgste_set_unlock(ptep, pgste); 694 pgste_set_unlock(ptep, pgste);
@@ -731,6 +732,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte)
731 spin_unlock(&gmap_notifier_lock); 732 spin_unlock(&gmap_notifier_lock);
732} 733}
733 734
735static inline int page_table_with_pgste(struct page *page)
736{
737 return atomic_read(&page->_mapcount) == 0;
738}
739
734static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 740static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
735 unsigned long vmaddr) 741 unsigned long vmaddr)
736{ 742{
@@ -750,10 +756,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
750 mp->vmaddr = vmaddr & PMD_MASK; 756 mp->vmaddr = vmaddr & PMD_MASK;
751 INIT_LIST_HEAD(&mp->mapper); 757 INIT_LIST_HEAD(&mp->mapper);
752 page->index = (unsigned long) mp; 758 page->index = (unsigned long) mp;
753 atomic_set(&page->_mapcount, 3); 759 atomic_set(&page->_mapcount, 0);
754 table = (unsigned long *) page_to_phys(page); 760 table = (unsigned long *) page_to_phys(page);
755 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); 761 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
756 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); 762 clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
763 PAGE_SIZE/2);
757 return table; 764 return table;
758} 765}
759 766
@@ -791,26 +798,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
791 pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 798 pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
792 pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 799 pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
793 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 800 if (!(pte_val(*ptep) & _PAGE_INVALID)) {
794 unsigned long address, bits; 801 unsigned long address, bits, skey;
795 unsigned char skey;
796 802
797 address = pte_val(*ptep) & PAGE_MASK; 803 address = pte_val(*ptep) & PAGE_MASK;
798 skey = page_get_storage_key(address); 804 skey = (unsigned long) page_get_storage_key(address);
799 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 805 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
806 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
800 /* Set storage key ACC and FP */ 807 /* Set storage key ACC and FP */
801 page_set_storage_key(address, 808 page_set_storage_key(address, skey, !nq);
802 (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)),
803 !nq);
804
805 /* Merge host changed & referenced into pgste */ 809 /* Merge host changed & referenced into pgste */
806 pgste_val(new) |= bits << 52; 810 pgste_val(new) |= bits << 52;
807 /* Transfer skey changed & referenced bit to kvm user bits */
808 pgste_val(new) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */
809 } 811 }
810 /* changing the guest storage key is considered a change of the page */ 812 /* changing the guest storage key is considered a change of the page */
811 if ((pgste_val(new) ^ pgste_val(old)) & 813 if ((pgste_val(new) ^ pgste_val(old)) &
812 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 814 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
813 pgste_val(new) |= PGSTE_UC_BIT; 815 pgste_val(new) |= PGSTE_HC_BIT;
814 816
815 pgste_set_unlock(ptep, new); 817 pgste_set_unlock(ptep, new);
816 pte_unmap_unlock(*ptep, ptl); 818 pte_unmap_unlock(*ptep, ptl);
@@ -821,6 +823,11 @@ EXPORT_SYMBOL(set_guest_storage_key);
821 823
822#else /* CONFIG_PGSTE */ 824#else /* CONFIG_PGSTE */
823 825
826static inline int page_table_with_pgste(struct page *page)
827{
828 return 0;
829}
830
824static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, 831static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
825 unsigned long vmaddr) 832 unsigned long vmaddr)
826{ 833{
@@ -878,7 +885,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
878 pgtable_page_ctor(page); 885 pgtable_page_ctor(page);
879 atomic_set(&page->_mapcount, 1); 886 atomic_set(&page->_mapcount, 1);
880 table = (unsigned long *) page_to_phys(page); 887 table = (unsigned long *) page_to_phys(page);
881 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 888 clear_table(table, _PAGE_INVALID, PAGE_SIZE);
882 spin_lock_bh(&mm->context.list_lock); 889 spin_lock_bh(&mm->context.list_lock);
883 list_add(&page->lru, &mm->context.pgtable_list); 890 list_add(&page->lru, &mm->context.pgtable_list);
884 } else { 891 } else {
@@ -897,12 +904,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
897 struct page *page; 904 struct page *page;
898 unsigned int bit, mask; 905 unsigned int bit, mask;
899 906
900 if (mm_has_pgste(mm)) { 907 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
908 if (page_table_with_pgste(page)) {
901 gmap_disconnect_pgtable(mm, table); 909 gmap_disconnect_pgtable(mm, table);
902 return page_table_free_pgste(table); 910 return page_table_free_pgste(table);
903 } 911 }
904 /* Free 1K/2K page table fragment of a 4K page */ 912 /* Free 1K/2K page table fragment of a 4K page */
905 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
906 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); 913 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
907 spin_lock_bh(&mm->context.list_lock); 914 spin_lock_bh(&mm->context.list_lock);
908 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 915 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
@@ -940,14 +947,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
940 unsigned int bit, mask; 947 unsigned int bit, mask;
941 948
942 mm = tlb->mm; 949 mm = tlb->mm;
943 if (mm_has_pgste(mm)) { 950 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
951 if (page_table_with_pgste(page)) {
944 gmap_disconnect_pgtable(mm, table); 952 gmap_disconnect_pgtable(mm, table);
945 table = (unsigned long *) (__pa(table) | FRAG_MASK); 953 table = (unsigned long *) (__pa(table) | FRAG_MASK);
946 tlb_remove_table(tlb, table); 954 tlb_remove_table(tlb, table);
947 return; 955 return;
948 } 956 }
949 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); 957 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
950 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
951 spin_lock_bh(&mm->context.list_lock); 958 spin_lock_bh(&mm->context.list_lock);
952 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) 959 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
953 list_del(&page->lru); 960 list_del(&page->lru);
@@ -1007,7 +1014,6 @@ void tlb_table_flush(struct mmu_gather *tlb)
1007 struct mmu_table_batch **batch = &tlb->batch; 1014 struct mmu_table_batch **batch = &tlb->batch;
1008 1015
1009 if (*batch) { 1016 if (*batch) {
1010 __tlb_flush_mm(tlb->mm);
1011 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); 1017 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
1012 *batch = NULL; 1018 *batch = NULL;
1013 } 1019 }
@@ -1017,11 +1023,12 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
1017{ 1023{
1018 struct mmu_table_batch **batch = &tlb->batch; 1024 struct mmu_table_batch **batch = &tlb->batch;
1019 1025
1026 tlb->mm->context.flush_mm = 1;
1020 if (*batch == NULL) { 1027 if (*batch == NULL) {
1021 *batch = (struct mmu_table_batch *) 1028 *batch = (struct mmu_table_batch *)
1022 __get_free_page(GFP_NOWAIT | __GFP_NOWARN); 1029 __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
1023 if (*batch == NULL) { 1030 if (*batch == NULL) {
1024 __tlb_flush_mm(tlb->mm); 1031 __tlb_flush_mm_lazy(tlb->mm);
1025 tlb_remove_table_one(table); 1032 tlb_remove_table_one(table);
1026 return; 1033 return;
1027 } 1034 }
@@ -1029,40 +1036,124 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
1029 } 1036 }
1030 (*batch)->tables[(*batch)->nr++] = table; 1037 (*batch)->tables[(*batch)->nr++] = table;
1031 if ((*batch)->nr == MAX_TABLE_BATCH) 1038 if ((*batch)->nr == MAX_TABLE_BATCH)
1032 tlb_table_flush(tlb); 1039 tlb_flush_mmu(tlb);
1033} 1040}
1034 1041
1035#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1042#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1036void thp_split_vma(struct vm_area_struct *vma) 1043static inline void thp_split_vma(struct vm_area_struct *vma)
1037{ 1044{
1038 unsigned long addr; 1045 unsigned long addr;
1039 struct page *page;
1040 1046
1041 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { 1047 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
1042 page = follow_page(vma, addr, FOLL_SPLIT); 1048 follow_page(vma, addr, FOLL_SPLIT);
1043 }
1044} 1049}
1045 1050
1046void thp_split_mm(struct mm_struct *mm) 1051static inline void thp_split_mm(struct mm_struct *mm)
1047{ 1052{
1048 struct vm_area_struct *vma = mm->mmap; 1053 struct vm_area_struct *vma;
1049 1054
1050 while (vma != NULL) { 1055 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
1051 thp_split_vma(vma); 1056 thp_split_vma(vma);
1052 vma->vm_flags &= ~VM_HUGEPAGE; 1057 vma->vm_flags &= ~VM_HUGEPAGE;
1053 vma->vm_flags |= VM_NOHUGEPAGE; 1058 vma->vm_flags |= VM_NOHUGEPAGE;
1054 vma = vma->vm_next;
1055 } 1059 }
1060 mm->def_flags |= VM_NOHUGEPAGE;
1061}
1062#else
1063static inline void thp_split_mm(struct mm_struct *mm)
1064{
1056} 1065}
1057#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1066#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1058 1067
1068static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
1069 struct mm_struct *mm, pud_t *pud,
1070 unsigned long addr, unsigned long end)
1071{
1072 unsigned long next, *table, *new;
1073 struct page *page;
1074 pmd_t *pmd;
1075
1076 pmd = pmd_offset(pud, addr);
1077 do {
1078 next = pmd_addr_end(addr, end);
1079again:
1080 if (pmd_none_or_clear_bad(pmd))
1081 continue;
1082 table = (unsigned long *) pmd_deref(*pmd);
1083 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1084 if (page_table_with_pgste(page))
1085 continue;
1086 /* Allocate new page table with pgstes */
1087 new = page_table_alloc_pgste(mm, addr);
1088 if (!new) {
1089 mm->context.has_pgste = 0;
1090 continue;
1091 }
1092 spin_lock(&mm->page_table_lock);
1093 if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
1094 /* Nuke pmd entry pointing to the "short" page table */
1095 pmdp_flush_lazy(mm, addr, pmd);
1096 pmd_clear(pmd);
1097 /* Copy ptes from old table to new table */
1098 memcpy(new, table, PAGE_SIZE/2);
1099 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
1100 /* Establish new table */
1101 pmd_populate(mm, pmd, (pte_t *) new);
1102 /* Free old table with rcu, there might be a walker! */
1103 page_table_free_rcu(tlb, table);
1104 new = NULL;
1105 }
1106 spin_unlock(&mm->page_table_lock);
1107 if (new) {
1108 page_table_free_pgste(new);
1109 goto again;
1110 }
1111 } while (pmd++, addr = next, addr != end);
1112
1113 return addr;
1114}
1115
1116static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
1117 struct mm_struct *mm, pgd_t *pgd,
1118 unsigned long addr, unsigned long end)
1119{
1120 unsigned long next;
1121 pud_t *pud;
1122
1123 pud = pud_offset(pgd, addr);
1124 do {
1125 next = pud_addr_end(addr, end);
1126 if (pud_none_or_clear_bad(pud))
1127 continue;
1128 next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
1129 } while (pud++, addr = next, addr != end);
1130
1131 return addr;
1132}
1133
1134static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
1135 unsigned long addr, unsigned long end)
1136{
1137 unsigned long next;
1138 pgd_t *pgd;
1139
1140 pgd = pgd_offset(mm, addr);
1141 do {
1142 next = pgd_addr_end(addr, end);
1143 if (pgd_none_or_clear_bad(pgd))
1144 continue;
1145 next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
1146 } while (pgd++, addr = next, addr != end);
1147}
1148
1059/* 1149/*
1060 * switch on pgstes for its userspace process (for kvm) 1150 * switch on pgstes for its userspace process (for kvm)
1061 */ 1151 */
1062int s390_enable_sie(void) 1152int s390_enable_sie(void)
1063{ 1153{
1064 struct task_struct *tsk = current; 1154 struct task_struct *tsk = current;
1065 struct mm_struct *mm, *old_mm; 1155 struct mm_struct *mm = tsk->mm;
1156 struct mmu_gather tlb;
1066 1157
1067 /* Do we have switched amode? If no, we cannot do sie */ 1158 /* Do we have switched amode? If no, we cannot do sie */
1068 if (s390_user_mode == HOME_SPACE_MODE) 1159 if (s390_user_mode == HOME_SPACE_MODE)
@@ -1072,57 +1163,16 @@ int s390_enable_sie(void)
1072 if (mm_has_pgste(tsk->mm)) 1163 if (mm_has_pgste(tsk->mm))
1073 return 0; 1164 return 0;
1074 1165
1075 /* lets check if we are allowed to replace the mm */ 1166 down_write(&mm->mmap_sem);
1076 task_lock(tsk);
1077 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
1078#ifdef CONFIG_AIO
1079 !hlist_empty(&tsk->mm->ioctx_list) ||
1080#endif
1081 tsk->mm != tsk->active_mm) {
1082 task_unlock(tsk);
1083 return -EINVAL;
1084 }
1085 task_unlock(tsk);
1086
1087 /* we copy the mm and let dup_mm create the page tables with_pgstes */
1088 tsk->mm->context.alloc_pgste = 1;
1089 /* make sure that both mms have a correct rss state */
1090 sync_mm_rss(tsk->mm);
1091 mm = dup_mm(tsk);
1092 tsk->mm->context.alloc_pgste = 0;
1093 if (!mm)
1094 return -ENOMEM;
1095
1096#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1097 /* split thp mappings and disable thp for future mappings */ 1167 /* split thp mappings and disable thp for future mappings */
1098 thp_split_mm(mm); 1168 thp_split_mm(mm);
1099 mm->def_flags |= VM_NOHUGEPAGE; 1169 /* Reallocate the page tables with pgstes */
1100#endif 1170 mm->context.has_pgste = 1;
1101 1171 tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
1102 /* Now lets check again if something happened */ 1172 page_table_realloc(&tlb, mm, 0, TASK_SIZE);
1103 task_lock(tsk); 1173 tlb_finish_mmu(&tlb, 0, TASK_SIZE);
1104 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 1174 up_write(&mm->mmap_sem);
1105#ifdef CONFIG_AIO 1175 return mm->context.has_pgste ? 0 : -ENOMEM;
1106 !hlist_empty(&tsk->mm->ioctx_list) ||
1107#endif
1108 tsk->mm != tsk->active_mm) {
1109 mmput(mm);
1110 task_unlock(tsk);
1111 return -EINVAL;
1112 }
1113
1114 /* ok, we are alone. No ptrace, no threads, etc. */
1115 old_mm = tsk->mm;
1116 tsk->mm = tsk->active_mm = mm;
1117 preempt_disable();
1118 update_mm(mm, tsk);
1119 atomic_inc(&mm->context.attach_count);
1120 atomic_dec(&old_mm->context.attach_count);
1121 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
1122 preempt_enable();
1123 task_unlock(tsk);
1124 mmput(old_mm);
1125 return 0;
1126} 1176}
1127EXPORT_SYMBOL_GPL(s390_enable_sie); 1177EXPORT_SYMBOL_GPL(s390_enable_sie);
1128 1178
@@ -1198,9 +1248,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
1198 list_del(lh); 1248 list_del(lh);
1199 } 1249 }
1200 ptep = (pte_t *) pgtable; 1250 ptep = (pte_t *) pgtable;
1201 pte_val(*ptep) = _PAGE_TYPE_EMPTY; 1251 pte_val(*ptep) = _PAGE_INVALID;
1202 ptep++; 1252 ptep++;
1203 pte_val(*ptep) = _PAGE_TYPE_EMPTY; 1253 pte_val(*ptep) = _PAGE_INVALID;
1204 return pgtable; 1254 return pgtable;
1205} 1255}
1206#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1256#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 8b268fcc4612..bcfb70b60be6 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -69,7 +69,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
69 pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); 69 pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
70 if (!pte) 70 if (!pte)
71 return NULL; 71 return NULL;
72 clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, 72 clear_table((unsigned long *) pte, _PAGE_INVALID,
73 PTRS_PER_PTE * sizeof(pte_t)); 73 PTRS_PER_PTE * sizeof(pte_t));
74 return pte; 74 return pte;
75} 75}
@@ -101,7 +101,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
101 !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { 101 !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
102 pud_val(*pu_dir) = __pa(address) | 102 pud_val(*pu_dir) = __pa(address) |
103 _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | 103 _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
104 (ro ? _REGION_ENTRY_RO : 0); 104 (ro ? _REGION_ENTRY_PROTECT : 0);
105 address += PUD_SIZE; 105 address += PUD_SIZE;
106 continue; 106 continue;
107 } 107 }
@@ -118,7 +118,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
118 !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { 118 !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
119 pmd_val(*pm_dir) = __pa(address) | 119 pmd_val(*pm_dir) = __pa(address) |
120 _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | 120 _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
121 (ro ? _SEGMENT_ENTRY_RO : 0); 121 _SEGMENT_ENTRY_YOUNG |
122 (ro ? _SEGMENT_ENTRY_PROTECT : 0);
122 address += PMD_SIZE; 123 address += PMD_SIZE;
123 continue; 124 continue;
124 } 125 }
@@ -131,7 +132,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
131 } 132 }
132 133
133 pt_dir = pte_offset_kernel(pm_dir, address); 134 pt_dir = pte_offset_kernel(pm_dir, address);
134 pte_val(*pt_dir) = __pa(address) | (ro ? _PAGE_RO : 0); 135 pte_val(*pt_dir) = __pa(address) |
136 pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
135 address += PAGE_SIZE; 137 address += PAGE_SIZE;
136 } 138 }
137 ret = 0; 139 ret = 0;
@@ -154,7 +156,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
154 pte_t *pt_dir; 156 pte_t *pt_dir;
155 pte_t pte; 157 pte_t pte;
156 158
157 pte_val(pte) = _PAGE_TYPE_EMPTY; 159 pte_val(pte) = _PAGE_INVALID;
158 while (address < end) { 160 while (address < end) {
159 pg_dir = pgd_offset_k(address); 161 pg_dir = pgd_offset_k(address);
160 if (pgd_none(*pg_dir)) { 162 if (pgd_none(*pg_dir)) {
@@ -255,7 +257,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
255 new_page =__pa(vmem_alloc_pages(0)); 257 new_page =__pa(vmem_alloc_pages(0));
256 if (!new_page) 258 if (!new_page)
257 goto out; 259 goto out;
258 pte_val(*pt_dir) = __pa(new_page); 260 pte_val(*pt_dir) =
261 __pa(new_page) | pgprot_val(PAGE_KERNEL);
259 } 262 }
260 address += PAGE_SIZE; 263 address += PAGE_SIZE;
261 } 264 }
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 930783d2c99b..04e1b6a85362 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -346,16 +346,15 @@ static const struct file_operations timer_enabled_fops = {
346}; 346};
347 347
348 348
349static int oprofile_create_hwsampling_files(struct super_block *sb, 349static int oprofile_create_hwsampling_files(struct dentry *root)
350 struct dentry *root)
351{ 350{
352 struct dentry *dir; 351 struct dentry *dir;
353 352
354 dir = oprofilefs_mkdir(sb, root, "timer"); 353 dir = oprofilefs_mkdir(root, "timer");
355 if (!dir) 354 if (!dir)
356 return -EINVAL; 355 return -EINVAL;
357 356
358 oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops); 357 oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
359 358
360 if (!hwsampler_available) 359 if (!hwsampler_available)
361 return 0; 360 return 0;
@@ -376,17 +375,17 @@ static int oprofile_create_hwsampling_files(struct super_block *sb,
376 * and can only be set to 0. 375 * and can only be set to 0.
377 */ 376 */
378 377
379 dir = oprofilefs_mkdir(sb, root, "0"); 378 dir = oprofilefs_mkdir(root, "0");
380 if (!dir) 379 if (!dir)
381 return -EINVAL; 380 return -EINVAL;
382 381
383 oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops); 382 oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
384 oprofilefs_create_file(sb, dir, "event", &zero_fops); 383 oprofilefs_create_file(dir, "event", &zero_fops);
385 oprofilefs_create_file(sb, dir, "count", &hw_interval_fops); 384 oprofilefs_create_file(dir, "count", &hw_interval_fops);
386 oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops); 385 oprofilefs_create_file(dir, "unit_mask", &zero_fops);
387 oprofilefs_create_file(sb, dir, "kernel", &kernel_fops); 386 oprofilefs_create_file(dir, "kernel", &kernel_fops);
388 oprofilefs_create_file(sb, dir, "user", &user_fops); 387 oprofilefs_create_file(dir, "user", &user_fops);
389 oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", 388 oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
390 &oprofile_sdbt_blocks); 389 &oprofile_sdbt_blocks);
391 390
392 } else { 391 } else {
@@ -396,19 +395,19 @@ static int oprofile_create_hwsampling_files(struct super_block *sb,
396 * space tools. The /dev/oprofile/hwsampling fs is 395 * space tools. The /dev/oprofile/hwsampling fs is
397 * provided in that case. 396 * provided in that case.
398 */ 397 */
399 dir = oprofilefs_mkdir(sb, root, "hwsampling"); 398 dir = oprofilefs_mkdir(root, "hwsampling");
400 if (!dir) 399 if (!dir)
401 return -EINVAL; 400 return -EINVAL;
402 401
403 oprofilefs_create_file(sb, dir, "hwsampler", 402 oprofilefs_create_file(dir, "hwsampler",
404 &hwsampler_fops); 403 &hwsampler_fops);
405 oprofilefs_create_file(sb, dir, "hw_interval", 404 oprofilefs_create_file(dir, "hw_interval",
406 &hw_interval_fops); 405 &hw_interval_fops);
407 oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval", 406 oprofilefs_create_ro_ulong(dir, "hw_min_interval",
408 &oprofile_min_interval); 407 &oprofile_min_interval);
409 oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval", 408 oprofilefs_create_ro_ulong(dir, "hw_max_interval",
410 &oprofile_max_interval); 409 &oprofile_max_interval);
411 oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks", 410 oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
412 &oprofile_sdbt_blocks); 411 &oprofile_sdbt_blocks);
413 } 412 }
414 return 0; 413 return 0;
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 086a2e37935d..a9e1dc4ae442 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -2,5 +2,5 @@
2# Makefile for the s390 PCI subsystem. 2# Makefile for the s390 PCI subsystem.
3# 3#
4 4
5obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_msi.o pci_sysfs.o \ 5obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \
6 pci_event.o pci_debug.o pci_insn.o 6 pci_event.o pci_debug.o pci_insn.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index e2956ad39a4f..f17a8343e360 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -42,45 +42,26 @@
42#define SIC_IRQ_MODE_SINGLE 1 42#define SIC_IRQ_MODE_SINGLE 1
43 43
44#define ZPCI_NR_DMA_SPACES 1 44#define ZPCI_NR_DMA_SPACES 1
45#define ZPCI_MSI_VEC_BITS 6
46#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS 45#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
47 46
48/* list of all detected zpci devices */ 47/* list of all detected zpci devices */
49LIST_HEAD(zpci_list); 48static LIST_HEAD(zpci_list);
50EXPORT_SYMBOL_GPL(zpci_list); 49static DEFINE_SPINLOCK(zpci_list_lock);
51DEFINE_MUTEX(zpci_list_lock);
52EXPORT_SYMBOL_GPL(zpci_list_lock);
53 50
54static struct pci_hp_callback_ops *hotplug_ops; 51static void zpci_enable_irq(struct irq_data *data);
52static void zpci_disable_irq(struct irq_data *data);
55 53
56static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); 54static struct irq_chip zpci_irq_chip = {
57static DEFINE_SPINLOCK(zpci_domain_lock); 55 .name = "zPCI",
58 56 .irq_unmask = zpci_enable_irq,
59struct callback { 57 .irq_mask = zpci_disable_irq,
60 irq_handler_t handler;
61 void *data;
62}; 58};
63 59
64struct zdev_irq_map { 60static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
65 unsigned long aibv; /* AI bit vector */ 61static DEFINE_SPINLOCK(zpci_domain_lock);
66 int msi_vecs; /* consecutive MSI-vectors used */
67 int __unused;
68 struct callback cb[ZPCI_NR_MSI_VECS]; /* callback handler array */
69 spinlock_t lock; /* protect callbacks against de-reg */
70};
71
72struct intr_bucket {
73 /* amap of adapters, one bit per dev, corresponds to one irq nr */
74 unsigned long *alloc;
75 /* AI summary bit, global page for all devices */
76 unsigned long *aisb;
77 /* pointer to aibv and callback data in zdev */
78 struct zdev_irq_map *imap[ZPCI_NR_DEVICES];
79 /* protects the whole bucket struct */
80 spinlock_t lock;
81};
82 62
83static struct intr_bucket *bucket; 63static struct airq_iv *zpci_aisb_iv;
64static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
84 65
85/* Adapter interrupt definitions */ 66/* Adapter interrupt definitions */
86static void zpci_irq_handler(struct airq_struct *airq); 67static void zpci_irq_handler(struct airq_struct *airq);
@@ -96,27 +77,8 @@ static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
96struct zpci_iomap_entry *zpci_iomap_start; 77struct zpci_iomap_entry *zpci_iomap_start;
97EXPORT_SYMBOL_GPL(zpci_iomap_start); 78EXPORT_SYMBOL_GPL(zpci_iomap_start);
98 79
99/* highest irq summary bit */
100static int __read_mostly aisb_max;
101
102static struct kmem_cache *zdev_irq_cache;
103static struct kmem_cache *zdev_fmb_cache; 80static struct kmem_cache *zdev_fmb_cache;
104 81
105static inline int irq_to_msi_nr(unsigned int irq)
106{
107 return irq & ZPCI_MSI_MASK;
108}
109
110static inline int irq_to_dev_nr(unsigned int irq)
111{
112 return irq >> ZPCI_MSI_VEC_BITS;
113}
114
115static inline struct zdev_irq_map *get_imap(unsigned int irq)
116{
117 return bucket->imap[irq_to_dev_nr(irq)];
118}
119
120struct zpci_dev *get_zdev(struct pci_dev *pdev) 82struct zpci_dev *get_zdev(struct pci_dev *pdev)
121{ 83{
122 return (struct zpci_dev *) pdev->sysdata; 84 return (struct zpci_dev *) pdev->sysdata;
@@ -126,22 +88,17 @@ struct zpci_dev *get_zdev_by_fid(u32 fid)
126{ 88{
127 struct zpci_dev *tmp, *zdev = NULL; 89 struct zpci_dev *tmp, *zdev = NULL;
128 90
129 mutex_lock(&zpci_list_lock); 91 spin_lock(&zpci_list_lock);
130 list_for_each_entry(tmp, &zpci_list, entry) { 92 list_for_each_entry(tmp, &zpci_list, entry) {
131 if (tmp->fid == fid) { 93 if (tmp->fid == fid) {
132 zdev = tmp; 94 zdev = tmp;
133 break; 95 break;
134 } 96 }
135 } 97 }
136 mutex_unlock(&zpci_list_lock); 98 spin_unlock(&zpci_list_lock);
137 return zdev; 99 return zdev;
138} 100}
139 101
140bool zpci_fid_present(u32 fid)
141{
142 return (get_zdev_by_fid(fid) != NULL) ? true : false;
143}
144
145static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus) 102static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
146{ 103{
147 return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL; 104 return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
@@ -160,8 +117,7 @@ int pci_proc_domain(struct pci_bus *bus)
160EXPORT_SYMBOL_GPL(pci_proc_domain); 117EXPORT_SYMBOL_GPL(pci_proc_domain);
161 118
162/* Modify PCI: Register adapter interruptions */ 119/* Modify PCI: Register adapter interruptions */
163static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb, 120static int zpci_set_airq(struct zpci_dev *zdev)
164 u64 aibv)
165{ 121{
166 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); 122 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
167 struct zpci_fib *fib; 123 struct zpci_fib *fib;
@@ -172,14 +128,14 @@ static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb,
172 return -ENOMEM; 128 return -ENOMEM;
173 129
174 fib->isc = PCI_ISC; 130 fib->isc = PCI_ISC;
175 fib->noi = zdev->irq_map->msi_vecs;
176 fib->sum = 1; /* enable summary notifications */ 131 fib->sum = 1; /* enable summary notifications */
177 fib->aibv = aibv; 132 fib->noi = airq_iv_end(zdev->aibv);
178 fib->aibvo = 0; /* every function has its own page */ 133 fib->aibv = (unsigned long) zdev->aibv->vector;
179 fib->aisb = (u64) bucket->aisb + aisb / 8; 134 fib->aibvo = 0; /* each zdev has its own interrupt vector */
180 fib->aisbo = aisb & ZPCI_MSI_MASK; 135 fib->aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
136 fib->aisbo = zdev->aisb & 63;
181 137
182 rc = s390pci_mod_fc(req, fib); 138 rc = zpci_mod_fc(req, fib);
183 pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi); 139 pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi);
184 140
185 free_page((unsigned long) fib); 141 free_page((unsigned long) fib);
@@ -209,7 +165,7 @@ static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args
209 fib->iota = args->iota; 165 fib->iota = args->iota;
210 fib->fmb_addr = args->fmb_addr; 166 fib->fmb_addr = args->fmb_addr;
211 167
212 rc = s390pci_mod_fc(req, fib); 168 rc = zpci_mod_fc(req, fib);
213 free_page((unsigned long) fib); 169 free_page((unsigned long) fib);
214 return rc; 170 return rc;
215} 171}
@@ -234,7 +190,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
234} 190}
235 191
236/* Modify PCI: Unregister adapter interruptions */ 192/* Modify PCI: Unregister adapter interruptions */
237static int zpci_unregister_airq(struct zpci_dev *zdev) 193static int zpci_clear_airq(struct zpci_dev *zdev)
238{ 194{
239 struct mod_pci_args args = { 0, 0, 0, 0 }; 195 struct mod_pci_args args = { 0, 0, 0, 0 };
240 196
@@ -283,7 +239,7 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
283 u64 data; 239 u64 data;
284 int rc; 240 int rc;
285 241
286 rc = s390pci_load(&data, req, offset); 242 rc = zpci_load(&data, req, offset);
287 if (!rc) { 243 if (!rc) {
288 data = data << ((8 - len) * 8); 244 data = data << ((8 - len) * 8);
289 data = le64_to_cpu(data); 245 data = le64_to_cpu(data);
@@ -301,25 +257,46 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
301 257
302 data = cpu_to_le64(data); 258 data = cpu_to_le64(data);
303 data = data >> ((8 - len) * 8); 259 data = data >> ((8 - len) * 8);
304 rc = s390pci_store(data, req, offset); 260 rc = zpci_store(data, req, offset);
305 return rc; 261 return rc;
306} 262}
307 263
308void enable_irq(unsigned int irq) 264static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
265{
266 int offset, pos;
267 u32 mask_bits;
268
269 if (msi->msi_attrib.is_msix) {
270 offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
271 PCI_MSIX_ENTRY_VECTOR_CTRL;
272 msi->masked = readl(msi->mask_base + offset);
273 writel(flag, msi->mask_base + offset);
274 } else if (msi->msi_attrib.maskbit) {
275 pos = (long) msi->mask_base;
276 pci_read_config_dword(msi->dev, pos, &mask_bits);
277 mask_bits &= ~(mask);
278 mask_bits |= flag & mask;
279 pci_write_config_dword(msi->dev, pos, mask_bits);
280 } else
281 return 0;
282
283 msi->msi_attrib.maskbit = !!flag;
284 return 1;
285}
286
287static void zpci_enable_irq(struct irq_data *data)
309{ 288{
310 struct msi_desc *msi = irq_get_msi_desc(irq); 289 struct msi_desc *msi = irq_get_msi_desc(data->irq);
311 290
312 zpci_msi_set_mask_bits(msi, 1, 0); 291 zpci_msi_set_mask_bits(msi, 1, 0);
313} 292}
314EXPORT_SYMBOL_GPL(enable_irq);
315 293
316void disable_irq(unsigned int irq) 294static void zpci_disable_irq(struct irq_data *data)
317{ 295{
318 struct msi_desc *msi = irq_get_msi_desc(irq); 296 struct msi_desc *msi = irq_get_msi_desc(data->irq);
319 297
320 zpci_msi_set_mask_bits(msi, 1, 1); 298 zpci_msi_set_mask_bits(msi, 1, 1);
321} 299}
322EXPORT_SYMBOL_GPL(disable_irq);
323 300
324void pcibios_fixup_bus(struct pci_bus *bus) 301void pcibios_fixup_bus(struct pci_bus *bus)
325{ 302{
@@ -404,152 +381,147 @@ static struct pci_ops pci_root_ops = {
404 .write = pci_write, 381 .write = pci_write,
405}; 382};
406 383
407/* store the last handled bit to implement fair scheduling of devices */
408static DEFINE_PER_CPU(unsigned long, next_sbit);
409
410static void zpci_irq_handler(struct airq_struct *airq) 384static void zpci_irq_handler(struct airq_struct *airq)
411{ 385{
412 unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit); 386 unsigned long si, ai;
413 int rescan = 0, max = aisb_max; 387 struct airq_iv *aibv;
414 struct zdev_irq_map *imap; 388 int irqs_on = 0;
415 389
416 inc_irq_stat(IRQIO_PCI); 390 inc_irq_stat(IRQIO_PCI);
417 sbit = start; 391 for (si = 0;;) {
418 392 /* Scan adapter summary indicator bit vector */
419scan: 393 si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
420 /* find summary_bit */ 394 if (si == -1UL) {
421 for_each_set_bit_left_cont(sbit, bucket->aisb, max) { 395 if (irqs_on++)
422 clear_bit(63 - (sbit & 63), bucket->aisb + (sbit >> 6)); 396 /* End of second scan with interrupts on. */
423 last = sbit; 397 break;
398 /* First scan complete, reenable interrupts. */
399 zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
400 si = 0;
401 continue;
402 }
424 403
425 /* find vector bit */ 404 /* Scan the adapter interrupt vector for this device. */
426 imap = bucket->imap[sbit]; 405 aibv = zpci_aibv[si];
427 for_each_set_bit_left(mbit, &imap->aibv, imap->msi_vecs) { 406 for (ai = 0;;) {
407 ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
408 if (ai == -1UL)
409 break;
428 inc_irq_stat(IRQIO_MSI); 410 inc_irq_stat(IRQIO_MSI);
429 clear_bit(63 - mbit, &imap->aibv); 411 airq_iv_lock(aibv, ai);
430 412 generic_handle_irq(airq_iv_get_data(aibv, ai));
431 spin_lock(&imap->lock); 413 airq_iv_unlock(aibv, ai);
432 if (imap->cb[mbit].handler)
433 imap->cb[mbit].handler(mbit,
434 imap->cb[mbit].data);
435 spin_unlock(&imap->lock);
436 } 414 }
437 } 415 }
438
439 if (rescan)
440 goto out;
441
442 /* scan the skipped bits */
443 if (start > 0) {
444 sbit = 0;
445 max = start;
446 start = 0;
447 goto scan;
448 }
449
450 /* enable interrupts again */
451 set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
452
453 /* check again to not lose initiative */
454 rmb();
455 max = aisb_max;
456 sbit = find_first_bit_left(bucket->aisb, max);
457 if (sbit != max) {
458 rescan++;
459 goto scan;
460 }
461out:
462 /* store next device bit to scan */
463 __get_cpu_var(next_sbit) = (++last >= aisb_max) ? 0 : last;
464} 416}
465 417
466/* msi_vecs - number of requested interrupts, 0 place function to error state */ 418int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
467static int zpci_setup_msi(struct pci_dev *pdev, int msi_vecs)
468{ 419{
469 struct zpci_dev *zdev = get_zdev(pdev); 420 struct zpci_dev *zdev = get_zdev(pdev);
470 unsigned int aisb, msi_nr; 421 unsigned int hwirq, irq, msi_vecs;
422 unsigned long aisb;
471 struct msi_desc *msi; 423 struct msi_desc *msi;
424 struct msi_msg msg;
472 int rc; 425 int rc;
473 426
474 /* store the number of used MSI vectors */ 427 pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
475 zdev->irq_map->msi_vecs = min(msi_vecs, ZPCI_NR_MSI_VECS); 428 if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
476 429 return -EINVAL;
477 spin_lock(&bucket->lock); 430 msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX);
478 aisb = find_first_zero_bit(bucket->alloc, PAGE_SIZE); 431 msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI);
479 /* alloc map exhausted? */
480 if (aisb == PAGE_SIZE) {
481 spin_unlock(&bucket->lock);
482 return -EIO;
483 }
484 set_bit(aisb, bucket->alloc);
485 spin_unlock(&bucket->lock);
486 432
433 /* Allocate adapter summary indicator bit */
434 rc = -EIO;
435 aisb = airq_iv_alloc_bit(zpci_aisb_iv);
436 if (aisb == -1UL)
437 goto out;
487 zdev->aisb = aisb; 438 zdev->aisb = aisb;
488 if (aisb + 1 > aisb_max)
489 aisb_max = aisb + 1;
490 439
491 /* wire up IRQ shortcut pointer */ 440 /* Create adapter interrupt vector */
492 bucket->imap[zdev->aisb] = zdev->irq_map; 441 rc = -ENOMEM;
493 pr_debug("%s: imap[%u] linked to %p\n", __func__, zdev->aisb, zdev->irq_map); 442 zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
443 if (!zdev->aibv)
444 goto out_si;
494 445
495 /* TODO: irq number 0 wont be found if we return less than requested MSIs. 446 /* Wire up shortcut pointer */
496 * ignore it for now and fix in common code. 447 zpci_aibv[aisb] = zdev->aibv;
497 */
498 msi_nr = aisb << ZPCI_MSI_VEC_BITS;
499 448
449 /* Request MSI interrupts */
450 hwirq = 0;
500 list_for_each_entry(msi, &pdev->msi_list, list) { 451 list_for_each_entry(msi, &pdev->msi_list, list) {
501 rc = zpci_setup_msi_irq(zdev, msi, msi_nr, 452 rc = -EIO;
502 aisb << ZPCI_MSI_VEC_BITS); 453 irq = irq_alloc_desc(0); /* Alloc irq on node 0 */
454 if (irq == NO_IRQ)
455 goto out_msi;
456 rc = irq_set_msi_desc(irq, msi);
503 if (rc) 457 if (rc)
504 return rc; 458 goto out_msi;
505 msi_nr++; 459 irq_set_chip_and_handler(irq, &zpci_irq_chip,
460 handle_simple_irq);
461 msg.data = hwirq;
462 msg.address_lo = zdev->msi_addr & 0xffffffff;
463 msg.address_hi = zdev->msi_addr >> 32;
464 write_msi_msg(irq, &msg);
465 airq_iv_set_data(zdev->aibv, hwirq, irq);
466 hwirq++;
506 } 467 }
507 468
508 rc = zpci_register_airq(zdev, aisb, (u64) &zdev->irq_map->aibv); 469 /* Enable adapter interrupts */
509 if (rc) { 470 rc = zpci_set_airq(zdev);
510 clear_bit(aisb, bucket->alloc); 471 if (rc)
511 dev_err(&pdev->dev, "register MSI failed with: %d\n", rc); 472 goto out_msi;
512 return rc; 473
474 return (msi_vecs == nvec) ? 0 : msi_vecs;
475
476out_msi:
477 list_for_each_entry(msi, &pdev->msi_list, list) {
478 if (hwirq-- == 0)
479 break;
480 irq_set_msi_desc(msi->irq, NULL);
481 irq_free_desc(msi->irq);
482 msi->msg.address_lo = 0;
483 msi->msg.address_hi = 0;
484 msi->msg.data = 0;
485 msi->irq = 0;
513 } 486 }
514 return (zdev->irq_map->msi_vecs == msi_vecs) ? 487 zpci_aibv[aisb] = NULL;
515 0 : zdev->irq_map->msi_vecs; 488 airq_iv_release(zdev->aibv);
489out_si:
490 airq_iv_free_bit(zpci_aisb_iv, aisb);
491out:
492 dev_err(&pdev->dev, "register MSI failed with: %d\n", rc);
493 return rc;
516} 494}
517 495
518static void zpci_teardown_msi(struct pci_dev *pdev) 496void arch_teardown_msi_irqs(struct pci_dev *pdev)
519{ 497{
520 struct zpci_dev *zdev = get_zdev(pdev); 498 struct zpci_dev *zdev = get_zdev(pdev);
521 struct msi_desc *msi; 499 struct msi_desc *msi;
522 int aisb, rc; 500 int rc;
523 501
524 rc = zpci_unregister_airq(zdev); 502 pr_info("%s: on pdev: %p\n", __func__, pdev);
503
504 /* Disable adapter interrupts */
505 rc = zpci_clear_airq(zdev);
525 if (rc) { 506 if (rc) {
526 dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc); 507 dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc);
527 return; 508 return;
528 } 509 }
529 510
530 msi = list_first_entry(&pdev->msi_list, struct msi_desc, list); 511 /* Release MSI interrupts */
531 aisb = irq_to_dev_nr(msi->irq); 512 list_for_each_entry(msi, &pdev->msi_list, list) {
532 513 zpci_msi_set_mask_bits(msi, 1, 1);
533 list_for_each_entry(msi, &pdev->msi_list, list) 514 irq_set_msi_desc(msi->irq, NULL);
534 zpci_teardown_msi_irq(zdev, msi); 515 irq_free_desc(msi->irq);
535 516 msi->msg.address_lo = 0;
536 clear_bit(aisb, bucket->alloc); 517 msi->msg.address_hi = 0;
537 if (aisb + 1 == aisb_max) 518 msi->msg.data = 0;
538 aisb_max--; 519 msi->irq = 0;
539} 520 }
540
541int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
542{
543 pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec);
544 if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI)
545 return -EINVAL;
546 return zpci_setup_msi(pdev, nvec);
547}
548 521
549void arch_teardown_msi_irqs(struct pci_dev *pdev) 522 zpci_aibv[zdev->aisb] = NULL;
550{ 523 airq_iv_release(zdev->aibv);
551 pr_info("%s: on pdev: %p\n", __func__, pdev); 524 airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
552 zpci_teardown_msi(pdev);
553} 525}
554 526
555static void zpci_map_resources(struct zpci_dev *zdev) 527static void zpci_map_resources(struct zpci_dev *zdev)
@@ -564,8 +536,6 @@ static void zpci_map_resources(struct zpci_dev *zdev)
564 continue; 536 continue;
565 pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0); 537 pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0);
566 pdev->resource[i].end = pdev->resource[i].start + len - 1; 538 pdev->resource[i].end = pdev->resource[i].start + len - 1;
567 pr_debug("BAR%i: -> start: %Lx end: %Lx\n",
568 i, pdev->resource[i].start, pdev->resource[i].end);
569 } 539 }
570} 540}
571 541
@@ -589,162 +559,47 @@ struct zpci_dev *zpci_alloc_device(void)
589 559
590 /* Alloc memory for our private pci device data */ 560 /* Alloc memory for our private pci device data */
591 zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); 561 zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
592 if (!zdev) 562 return zdev ? : ERR_PTR(-ENOMEM);
593 return ERR_PTR(-ENOMEM);
594
595 /* Alloc aibv & callback space */
596 zdev->irq_map = kmem_cache_zalloc(zdev_irq_cache, GFP_KERNEL);
597 if (!zdev->irq_map)
598 goto error;
599 WARN_ON((u64) zdev->irq_map & 0xff);
600 return zdev;
601
602error:
603 kfree(zdev);
604 return ERR_PTR(-ENOMEM);
605} 563}
606 564
607void zpci_free_device(struct zpci_dev *zdev) 565void zpci_free_device(struct zpci_dev *zdev)
608{ 566{
609 kmem_cache_free(zdev_irq_cache, zdev->irq_map);
610 kfree(zdev); 567 kfree(zdev);
611} 568}
612 569
613/*
614 * Too late for any s390 specific setup, since interrupts must be set up
615 * already which requires DMA setup too and the pci scan will access the
616 * config space, which only works if the function handle is enabled.
617 */
618int pcibios_enable_device(struct pci_dev *pdev, int mask)
619{
620 struct resource *res;
621 u16 cmd;
622 int i;
623
624 pci_read_config_word(pdev, PCI_COMMAND, &cmd);
625
626 for (i = 0; i < PCI_BAR_COUNT; i++) {
627 res = &pdev->resource[i];
628
629 if (res->flags & IORESOURCE_IO)
630 return -EINVAL;
631
632 if (res->flags & IORESOURCE_MEM)
633 cmd |= PCI_COMMAND_MEMORY;
634 }
635 pci_write_config_word(pdev, PCI_COMMAND, cmd);
636 return 0;
637}
638
639int pcibios_add_platform_entries(struct pci_dev *pdev) 570int pcibios_add_platform_entries(struct pci_dev *pdev)
640{ 571{
641 return zpci_sysfs_add_device(&pdev->dev); 572 return zpci_sysfs_add_device(&pdev->dev);
642} 573}
643 574
644int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data)
645{
646 int msi_nr = irq_to_msi_nr(irq);
647 struct zdev_irq_map *imap;
648 struct msi_desc *msi;
649
650 msi = irq_get_msi_desc(irq);
651 if (!msi)
652 return -EIO;
653
654 imap = get_imap(irq);
655 spin_lock_init(&imap->lock);
656
657 pr_debug("%s: register handler for IRQ:MSI %d:%d\n", __func__, irq >> 6, msi_nr);
658 imap->cb[msi_nr].handler = handler;
659 imap->cb[msi_nr].data = data;
660
661 /*
662 * The generic MSI code returns with the interrupt disabled on the
663 * card, using the MSI mask bits. Firmware doesn't appear to unmask
664 * at that level, so we do it here by hand.
665 */
666 zpci_msi_set_mask_bits(msi, 1, 0);
667 return 0;
668}
669
670void zpci_free_irq(unsigned int irq)
671{
672 struct zdev_irq_map *imap = get_imap(irq);
673 int msi_nr = irq_to_msi_nr(irq);
674 unsigned long flags;
675
676 pr_debug("%s: for irq: %d\n", __func__, irq);
677
678 spin_lock_irqsave(&imap->lock, flags);
679 imap->cb[msi_nr].handler = NULL;
680 imap->cb[msi_nr].data = NULL;
681 spin_unlock_irqrestore(&imap->lock, flags);
682}
683
684int request_irq(unsigned int irq, irq_handler_t handler,
685 unsigned long irqflags, const char *devname, void *dev_id)
686{
687 pr_debug("%s: irq: %d handler: %p flags: %lx dev: %s\n",
688 __func__, irq, handler, irqflags, devname);
689
690 return zpci_request_irq(irq, handler, dev_id);
691}
692EXPORT_SYMBOL_GPL(request_irq);
693
694void free_irq(unsigned int irq, void *dev_id)
695{
696 zpci_free_irq(irq);
697}
698EXPORT_SYMBOL_GPL(free_irq);
699
700static int __init zpci_irq_init(void) 575static int __init zpci_irq_init(void)
701{ 576{
702 int cpu, rc; 577 int rc;
703
704 bucket = kzalloc(sizeof(*bucket), GFP_KERNEL);
705 if (!bucket)
706 return -ENOMEM;
707
708 bucket->aisb = (unsigned long *) get_zeroed_page(GFP_KERNEL);
709 if (!bucket->aisb) {
710 rc = -ENOMEM;
711 goto out_aisb;
712 }
713
714 bucket->alloc = (unsigned long *) get_zeroed_page(GFP_KERNEL);
715 if (!bucket->alloc) {
716 rc = -ENOMEM;
717 goto out_alloc;
718 }
719 578
720 rc = register_adapter_interrupt(&zpci_airq); 579 rc = register_adapter_interrupt(&zpci_airq);
721 if (rc) 580 if (rc)
722 goto out_ai; 581 goto out;
723 /* Set summary to 1 to be called every time for the ISC. */ 582 /* Set summary to 1 to be called every time for the ISC. */
724 *zpci_airq.lsi_ptr = 1; 583 *zpci_airq.lsi_ptr = 1;
725 584
726 for_each_online_cpu(cpu) 585 rc = -ENOMEM;
727 per_cpu(next_sbit, cpu) = 0; 586 zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
587 if (!zpci_aisb_iv)
588 goto out_airq;
728 589
729 spin_lock_init(&bucket->lock); 590 zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
730 set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
731 return 0; 591 return 0;
732 592
733out_ai: 593out_airq:
734 free_page((unsigned long) bucket->alloc); 594 unregister_adapter_interrupt(&zpci_airq);
735out_alloc: 595out:
736 free_page((unsigned long) bucket->aisb);
737out_aisb:
738 kfree(bucket);
739 return rc; 596 return rc;
740} 597}
741 598
742static void zpci_irq_exit(void) 599static void zpci_irq_exit(void)
743{ 600{
744 free_page((unsigned long) bucket->alloc); 601 airq_iv_release(zpci_aisb_iv);
745 free_page((unsigned long) bucket->aisb);
746 unregister_adapter_interrupt(&zpci_airq); 602 unregister_adapter_interrupt(&zpci_airq);
747 kfree(bucket);
748} 603}
749 604
750static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size, 605static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size,
@@ -801,16 +656,49 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
801int pcibios_add_device(struct pci_dev *pdev) 656int pcibios_add_device(struct pci_dev *pdev)
802{ 657{
803 struct zpci_dev *zdev = get_zdev(pdev); 658 struct zpci_dev *zdev = get_zdev(pdev);
659 struct resource *res;
660 int i;
661
662 zdev->pdev = pdev;
663 zpci_map_resources(zdev);
664
665 for (i = 0; i < PCI_BAR_COUNT; i++) {
666 res = &pdev->resource[i];
667 if (res->parent || !res->flags)
668 continue;
669 pci_claim_resource(pdev, i);
670 }
671
672 return 0;
673}
674
675int pcibios_enable_device(struct pci_dev *pdev, int mask)
676{
677 struct zpci_dev *zdev = get_zdev(pdev);
678 struct resource *res;
679 u16 cmd;
680 int i;
804 681
805 zdev->pdev = pdev; 682 zdev->pdev = pdev;
806 zpci_debug_init_device(zdev); 683 zpci_debug_init_device(zdev);
807 zpci_fmb_enable_device(zdev); 684 zpci_fmb_enable_device(zdev);
808 zpci_map_resources(zdev); 685 zpci_map_resources(zdev);
809 686
687 pci_read_config_word(pdev, PCI_COMMAND, &cmd);
688 for (i = 0; i < PCI_BAR_COUNT; i++) {
689 res = &pdev->resource[i];
690
691 if (res->flags & IORESOURCE_IO)
692 return -EINVAL;
693
694 if (res->flags & IORESOURCE_MEM)
695 cmd |= PCI_COMMAND_MEMORY;
696 }
697 pci_write_config_word(pdev, PCI_COMMAND, cmd);
810 return 0; 698 return 0;
811} 699}
812 700
813void pcibios_release_device(struct pci_dev *pdev) 701void pcibios_disable_device(struct pci_dev *pdev)
814{ 702{
815 struct zpci_dev *zdev = get_zdev(pdev); 703 struct zpci_dev *zdev = get_zdev(pdev);
816 704
@@ -898,6 +786,8 @@ int zpci_enable_device(struct zpci_dev *zdev)
898 rc = zpci_dma_init_device(zdev); 786 rc = zpci_dma_init_device(zdev);
899 if (rc) 787 if (rc)
900 goto out_dma; 788 goto out_dma;
789
790 zdev->state = ZPCI_FN_STATE_ONLINE;
901 return 0; 791 return 0;
902 792
903out_dma: 793out_dma:
@@ -926,18 +816,16 @@ int zpci_create_device(struct zpci_dev *zdev)
926 rc = zpci_enable_device(zdev); 816 rc = zpci_enable_device(zdev);
927 if (rc) 817 if (rc)
928 goto out_free; 818 goto out_free;
929
930 zdev->state = ZPCI_FN_STATE_ONLINE;
931 } 819 }
932 rc = zpci_scan_bus(zdev); 820 rc = zpci_scan_bus(zdev);
933 if (rc) 821 if (rc)
934 goto out_disable; 822 goto out_disable;
935 823
936 mutex_lock(&zpci_list_lock); 824 spin_lock(&zpci_list_lock);
937 list_add_tail(&zdev->entry, &zpci_list); 825 list_add_tail(&zdev->entry, &zpci_list);
938 if (hotplug_ops) 826 spin_unlock(&zpci_list_lock);
939 hotplug_ops->create_slot(zdev); 827
940 mutex_unlock(&zpci_list_lock); 828 zpci_init_slot(zdev);
941 829
942 return 0; 830 return 0;
943 831
@@ -967,15 +855,10 @@ static inline int barsize(u8 size)
967 855
968static int zpci_mem_init(void) 856static int zpci_mem_init(void)
969{ 857{
970 zdev_irq_cache = kmem_cache_create("PCI_IRQ_cache", sizeof(struct zdev_irq_map),
971 L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, NULL);
972 if (!zdev_irq_cache)
973 goto error_zdev;
974
975 zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb), 858 zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
976 16, 0, NULL); 859 16, 0, NULL);
977 if (!zdev_fmb_cache) 860 if (!zdev_fmb_cache)
978 goto error_fmb; 861 goto error_zdev;
979 862
980 /* TODO: use realloc */ 863 /* TODO: use realloc */
981 zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start), 864 zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
@@ -986,8 +869,6 @@ static int zpci_mem_init(void)
986 869
987error_iomap: 870error_iomap:
988 kmem_cache_destroy(zdev_fmb_cache); 871 kmem_cache_destroy(zdev_fmb_cache);
989error_fmb:
990 kmem_cache_destroy(zdev_irq_cache);
991error_zdev: 872error_zdev:
992 return -ENOMEM; 873 return -ENOMEM;
993} 874}
@@ -995,28 +876,10 @@ error_zdev:
995static void zpci_mem_exit(void) 876static void zpci_mem_exit(void)
996{ 877{
997 kfree(zpci_iomap_start); 878 kfree(zpci_iomap_start);
998 kmem_cache_destroy(zdev_irq_cache);
999 kmem_cache_destroy(zdev_fmb_cache); 879 kmem_cache_destroy(zdev_fmb_cache);
1000} 880}
1001 881
1002void zpci_register_hp_ops(struct pci_hp_callback_ops *ops) 882static unsigned int s390_pci_probe;
1003{
1004 mutex_lock(&zpci_list_lock);
1005 hotplug_ops = ops;
1006 mutex_unlock(&zpci_list_lock);
1007}
1008EXPORT_SYMBOL_GPL(zpci_register_hp_ops);
1009
1010void zpci_deregister_hp_ops(void)
1011{
1012 mutex_lock(&zpci_list_lock);
1013 hotplug_ops = NULL;
1014 mutex_unlock(&zpci_list_lock);
1015}
1016EXPORT_SYMBOL_GPL(zpci_deregister_hp_ops);
1017
1018unsigned int s390_pci_probe;
1019EXPORT_SYMBOL_GPL(s390_pci_probe);
1020 883
1021char * __init pcibios_setup(char *str) 884char * __init pcibios_setup(char *str)
1022{ 885{
@@ -1044,16 +907,12 @@ static int __init pci_base_init(void)
1044 907
1045 rc = zpci_debug_init(); 908 rc = zpci_debug_init();
1046 if (rc) 909 if (rc)
1047 return rc; 910 goto out;
1048 911
1049 rc = zpci_mem_init(); 912 rc = zpci_mem_init();
1050 if (rc) 913 if (rc)
1051 goto out_mem; 914 goto out_mem;
1052 915
1053 rc = zpci_msihash_init();
1054 if (rc)
1055 goto out_hash;
1056
1057 rc = zpci_irq_init(); 916 rc = zpci_irq_init();
1058 if (rc) 917 if (rc)
1059 goto out_irq; 918 goto out_irq;
@@ -1062,7 +921,7 @@ static int __init pci_base_init(void)
1062 if (rc) 921 if (rc)
1063 goto out_dma; 922 goto out_dma;
1064 923
1065 rc = clp_find_pci_devices(); 924 rc = clp_scan_pci_devices();
1066 if (rc) 925 if (rc)
1067 goto out_find; 926 goto out_find;
1068 927
@@ -1073,11 +932,15 @@ out_find:
1073out_dma: 932out_dma:
1074 zpci_irq_exit(); 933 zpci_irq_exit();
1075out_irq: 934out_irq:
1076 zpci_msihash_exit();
1077out_hash:
1078 zpci_mem_exit(); 935 zpci_mem_exit();
1079out_mem: 936out_mem:
1080 zpci_debug_exit(); 937 zpci_debug_exit();
938out:
1081 return rc; 939 return rc;
1082} 940}
1083subsys_initcall(pci_base_init); 941subsys_initcall_sync(pci_base_init);
942
943void zpci_rescan(void)
944{
945 clp_rescan_pci_devices_simple();
946}
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 2e9539625d93..475563c3d1e4 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -36,9 +36,9 @@ static inline u8 clp_instr(void *data)
36 return cc; 36 return cc;
37} 37}
38 38
39static void *clp_alloc_block(void) 39static void *clp_alloc_block(gfp_t gfp_mask)
40{ 40{
41 return (void *) __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE)); 41 return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE));
42} 42}
43 43
44static void clp_free_block(void *ptr) 44static void clp_free_block(void *ptr)
@@ -70,7 +70,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
70 struct clp_req_rsp_query_pci_grp *rrb; 70 struct clp_req_rsp_query_pci_grp *rrb;
71 int rc; 71 int rc;
72 72
73 rrb = clp_alloc_block(); 73 rrb = clp_alloc_block(GFP_KERNEL);
74 if (!rrb) 74 if (!rrb)
75 return -ENOMEM; 75 return -ENOMEM;
76 76
@@ -113,7 +113,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
113 struct clp_req_rsp_query_pci *rrb; 113 struct clp_req_rsp_query_pci *rrb;
114 int rc; 114 int rc;
115 115
116 rrb = clp_alloc_block(); 116 rrb = clp_alloc_block(GFP_KERNEL);
117 if (!rrb) 117 if (!rrb)
118 return -ENOMEM; 118 return -ENOMEM;
119 119
@@ -179,9 +179,9 @@ error:
179static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) 179static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
180{ 180{
181 struct clp_req_rsp_set_pci *rrb; 181 struct clp_req_rsp_set_pci *rrb;
182 int rc, retries = 1000; 182 int rc, retries = 100;
183 183
184 rrb = clp_alloc_block(); 184 rrb = clp_alloc_block(GFP_KERNEL);
185 if (!rrb) 185 if (!rrb)
186 return -ENOMEM; 186 return -ENOMEM;
187 187
@@ -199,7 +199,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
199 retries--; 199 retries--;
200 if (retries < 0) 200 if (retries < 0)
201 break; 201 break;
202 msleep(1); 202 msleep(20);
203 } 203 }
204 } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY); 204 } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
205 205
@@ -245,49 +245,12 @@ int clp_disable_fh(struct zpci_dev *zdev)
245 return rc; 245 return rc;
246} 246}
247 247
248static void clp_check_pcifn_entry(struct clp_fh_list_entry *entry) 248static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
249 void (*cb)(struct clp_fh_list_entry *entry))
249{ 250{
250 int present, rc;
251
252 if (!entry->vendor_id)
253 return;
254
255 /* TODO: be a little bit more scalable */
256 present = zpci_fid_present(entry->fid);
257
258 if (present)
259 pr_debug("%s: device %x already present\n", __func__, entry->fid);
260
261 /* skip already used functions */
262 if (present && entry->config_state)
263 return;
264
265 /* aev 306: function moved to stand-by state */
266 if (present && !entry->config_state) {
267 /*
268 * The handle is already disabled, that means no iota/irq freeing via
269 * the firmware interfaces anymore. Need to free resources manually
270 * (DMA memory, debug, sysfs)...
271 */
272 zpci_stop_device(get_zdev_by_fid(entry->fid));
273 return;
274 }
275
276 rc = clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
277 if (rc)
278 pr_err("Failed to add fid: 0x%x\n", entry->fid);
279}
280
281int clp_find_pci_devices(void)
282{
283 struct clp_req_rsp_list_pci *rrb;
284 u64 resume_token = 0; 251 u64 resume_token = 0;
285 int entries, i, rc; 252 int entries, i, rc;
286 253
287 rrb = clp_alloc_block();
288 if (!rrb)
289 return -ENOMEM;
290
291 do { 254 do {
292 memset(rrb, 0, sizeof(*rrb)); 255 memset(rrb, 0, sizeof(*rrb));
293 rrb->request.hdr.len = sizeof(rrb->request); 256 rrb->request.hdr.len = sizeof(rrb->request);
@@ -316,12 +279,101 @@ int clp_find_pci_devices(void)
316 resume_token = rrb->response.resume_token; 279 resume_token = rrb->response.resume_token;
317 280
318 for (i = 0; i < entries; i++) 281 for (i = 0; i < entries; i++)
319 clp_check_pcifn_entry(&rrb->response.fh_list[i]); 282 cb(&rrb->response.fh_list[i]);
320 } while (resume_token); 283 } while (resume_token);
321 284
322 pr_debug("Maximum number of supported PCI functions: %u\n", 285 pr_debug("Maximum number of supported PCI functions: %u\n",
323 rrb->response.max_fn); 286 rrb->response.max_fn);
324out: 287out:
288 return rc;
289}
290
291static void __clp_add(struct clp_fh_list_entry *entry)
292{
293 if (!entry->vendor_id)
294 return;
295
296 clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
297}
298
299static void __clp_rescan(struct clp_fh_list_entry *entry)
300{
301 struct zpci_dev *zdev;
302
303 if (!entry->vendor_id)
304 return;
305
306 zdev = get_zdev_by_fid(entry->fid);
307 if (!zdev) {
308 clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
309 return;
310 }
311
312 if (!entry->config_state) {
313 /*
314 * The handle is already disabled, that means no iota/irq freeing via
315 * the firmware interfaces anymore. Need to free resources manually
316 * (DMA memory, debug, sysfs)...
317 */
318 zpci_stop_device(zdev);
319 }
320}
321
322static void __clp_update(struct clp_fh_list_entry *entry)
323{
324 struct zpci_dev *zdev;
325
326 if (!entry->vendor_id)
327 return;
328
329 zdev = get_zdev_by_fid(entry->fid);
330 if (!zdev)
331 return;
332
333 zdev->fh = entry->fh;
334}
335
336int clp_scan_pci_devices(void)
337{
338 struct clp_req_rsp_list_pci *rrb;
339 int rc;
340
341 rrb = clp_alloc_block(GFP_KERNEL);
342 if (!rrb)
343 return -ENOMEM;
344
345 rc = clp_list_pci(rrb, __clp_add);
346
347 clp_free_block(rrb);
348 return rc;
349}
350
351int clp_rescan_pci_devices(void)
352{
353 struct clp_req_rsp_list_pci *rrb;
354 int rc;
355
356 rrb = clp_alloc_block(GFP_KERNEL);
357 if (!rrb)
358 return -ENOMEM;
359
360 rc = clp_list_pci(rrb, __clp_rescan);
361
362 clp_free_block(rrb);
363 return rc;
364}
365
366int clp_rescan_pci_devices_simple(void)
367{
368 struct clp_req_rsp_list_pci *rrb;
369 int rc;
370
371 rrb = clp_alloc_block(GFP_NOWAIT);
372 if (!rrb)
373 return -ENOMEM;
374
375 rc = clp_list_pci(rrb, __clp_update);
376
325 clp_free_block(rrb); 377 clp_free_block(rrb);
326 return rc; 378 return rc;
327} 379}
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index a2343c1f6e04..7e5573acb063 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -10,6 +10,7 @@
10#include <linux/export.h> 10#include <linux/export.h>
11#include <linux/iommu-helper.h> 11#include <linux/iommu-helper.h>
12#include <linux/dma-mapping.h> 12#include <linux/dma-mapping.h>
13#include <linux/vmalloc.h>
13#include <linux/pci.h> 14#include <linux/pci.h>
14#include <asm/pci_dma.h> 15#include <asm/pci_dma.h>
15 16
@@ -170,8 +171,8 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
170 */ 171 */
171 goto no_refresh; 172 goto no_refresh;
172 173
173 rc = s390pci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, 174 rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
174 nr_pages * PAGE_SIZE); 175 nr_pages * PAGE_SIZE);
175 176
176no_refresh: 177no_refresh:
177 spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); 178 spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
@@ -407,7 +408,6 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
407 408
408int zpci_dma_init_device(struct zpci_dev *zdev) 409int zpci_dma_init_device(struct zpci_dev *zdev)
409{ 410{
410 unsigned int bitmap_order;
411 int rc; 411 int rc;
412 412
413 spin_lock_init(&zdev->iommu_bitmap_lock); 413 spin_lock_init(&zdev->iommu_bitmap_lock);
@@ -421,12 +421,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
421 421
422 zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET; 422 zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET;
423 zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; 423 zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
424 bitmap_order = get_order(zdev->iommu_pages / 8); 424 zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
425 pr_info("iommu_size: 0x%lx iommu_pages: 0x%lx bitmap_order: %i\n",
426 zdev->iommu_size, zdev->iommu_pages, bitmap_order);
427
428 zdev->iommu_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
429 bitmap_order);
430 if (!zdev->iommu_bitmap) { 425 if (!zdev->iommu_bitmap) {
431 rc = -ENOMEM; 426 rc = -ENOMEM;
432 goto out_reg; 427 goto out_reg;
@@ -451,8 +446,7 @@ void zpci_dma_exit_device(struct zpci_dev *zdev)
451{ 446{
452 zpci_unregister_ioat(zdev, 0); 447 zpci_unregister_ioat(zdev, 0);
453 dma_cleanup_tables(zdev); 448 dma_cleanup_tables(zdev);
454 free_pages((unsigned long) zdev->iommu_bitmap, 449 vfree(zdev->iommu_bitmap);
455 get_order(zdev->iommu_pages / 8));
456 zdev->iommu_bitmap = NULL; 450 zdev->iommu_bitmap = NULL;
457 zdev->next_bit = 0; 451 zdev->next_bit = 0;
458} 452}
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index ec62e3a0dc09..0aecaf954845 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -69,7 +69,7 @@ static void zpci_event_log_avail(struct zpci_ccdf_avail *ccdf)
69 clp_add_pci_device(ccdf->fid, ccdf->fh, 0); 69 clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
70 break; 70 break;
71 case 0x0306: 71 case 0x0306:
72 clp_find_pci_devices(); 72 clp_rescan_pci_devices();
73 break; 73 break;
74 default: 74 default:
75 break; 75 break;
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index 22eeb9d7ffeb..85267c058af8 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -27,7 +27,7 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
27 return cc; 27 return cc;
28} 28}
29 29
30int s390pci_mod_fc(u64 req, struct zpci_fib *fib) 30int zpci_mod_fc(u64 req, struct zpci_fib *fib)
31{ 31{
32 u8 cc, status; 32 u8 cc, status;
33 33
@@ -61,7 +61,7 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
61 return cc; 61 return cc;
62} 62}
63 63
64int s390pci_refresh_trans(u64 fn, u64 addr, u64 range) 64int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
65{ 65{
66 u8 cc, status; 66 u8 cc, status;
67 67
@@ -78,7 +78,7 @@ int s390pci_refresh_trans(u64 fn, u64 addr, u64 range)
78} 78}
79 79
80/* Set Interruption Controls */ 80/* Set Interruption Controls */
81void set_irq_ctrl(u16 ctl, char *unused, u8 isc) 81void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
82{ 82{
83 asm volatile ( 83 asm volatile (
84 " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" 84 " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
@@ -109,7 +109,7 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
109 return cc; 109 return cc;
110} 110}
111 111
112int s390pci_load(u64 *data, u64 req, u64 offset) 112int zpci_load(u64 *data, u64 req, u64 offset)
113{ 113{
114 u8 status; 114 u8 status;
115 int cc; 115 int cc;
@@ -125,7 +125,7 @@ int s390pci_load(u64 *data, u64 req, u64 offset)
125 __func__, cc, status, req, offset); 125 __func__, cc, status, req, offset);
126 return (cc > 0) ? -EIO : cc; 126 return (cc > 0) ? -EIO : cc;
127} 127}
128EXPORT_SYMBOL_GPL(s390pci_load); 128EXPORT_SYMBOL_GPL(zpci_load);
129 129
130/* PCI Store */ 130/* PCI Store */
131static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) 131static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
@@ -147,7 +147,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
147 return cc; 147 return cc;
148} 148}
149 149
150int s390pci_store(u64 data, u64 req, u64 offset) 150int zpci_store(u64 data, u64 req, u64 offset)
151{ 151{
152 u8 status; 152 u8 status;
153 int cc; 153 int cc;
@@ -163,7 +163,7 @@ int s390pci_store(u64 data, u64 req, u64 offset)
163 __func__, cc, status, req, offset); 163 __func__, cc, status, req, offset);
164 return (cc > 0) ? -EIO : cc; 164 return (cc > 0) ? -EIO : cc;
165} 165}
166EXPORT_SYMBOL_GPL(s390pci_store); 166EXPORT_SYMBOL_GPL(zpci_store);
167 167
168/* PCI Store Block */ 168/* PCI Store Block */
169static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) 169static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
@@ -183,7 +183,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
183 return cc; 183 return cc;
184} 184}
185 185
186int s390pci_store_block(const u64 *data, u64 req, u64 offset) 186int zpci_store_block(const u64 *data, u64 req, u64 offset)
187{ 187{
188 u8 status; 188 u8 status;
189 int cc; 189 int cc;
@@ -199,4 +199,4 @@ int s390pci_store_block(const u64 *data, u64 req, u64 offset)
199 __func__, cc, status, req, offset); 199 __func__, cc, status, req, offset);
200 return (cc > 0) ? -EIO : cc; 200 return (cc > 0) ? -EIO : cc;
201} 201}
202EXPORT_SYMBOL_GPL(s390pci_store_block); 202EXPORT_SYMBOL_GPL(zpci_store_block);
diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c
deleted file mode 100644
index b097aed05a9b..000000000000
--- a/arch/s390/pci/pci_msi.c
+++ /dev/null
@@ -1,142 +0,0 @@
1/*
2 * Copyright IBM Corp. 2012
3 *
4 * Author(s):
5 * Jan Glauber <jang@linux.vnet.ibm.com>
6 */
7
8#define COMPONENT "zPCI"
9#define pr_fmt(fmt) COMPONENT ": " fmt
10
11#include <linux/kernel.h>
12#include <linux/err.h>
13#include <linux/rculist.h>
14#include <linux/hash.h>
15#include <linux/pci.h>
16#include <linux/msi.h>
17#include <asm/hw_irq.h>
18
19/* mapping of irq numbers to msi_desc */
20static struct hlist_head *msi_hash;
21static const unsigned int msi_hash_bits = 8;
22#define MSI_HASH_BUCKETS (1U << msi_hash_bits)
23#define msi_hashfn(nr) hash_long(nr, msi_hash_bits)
24
25static DEFINE_SPINLOCK(msi_map_lock);
26
27struct msi_desc *__irq_get_msi_desc(unsigned int irq)
28{
29 struct msi_map *map;
30
31 hlist_for_each_entry_rcu(map,
32 &msi_hash[msi_hashfn(irq)], msi_chain)
33 if (map->irq == irq)
34 return map->msi;
35 return NULL;
36}
37
38int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
39{
40 if (msi->msi_attrib.is_msix) {
41 int offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
42 PCI_MSIX_ENTRY_VECTOR_CTRL;
43 msi->masked = readl(msi->mask_base + offset);
44 writel(flag, msi->mask_base + offset);
45 } else {
46 if (msi->msi_attrib.maskbit) {
47 int pos;
48 u32 mask_bits;
49
50 pos = (long) msi->mask_base;
51 pci_read_config_dword(msi->dev, pos, &mask_bits);
52 mask_bits &= ~(mask);
53 mask_bits |= flag & mask;
54 pci_write_config_dword(msi->dev, pos, mask_bits);
55 } else {
56 return 0;
57 }
58 }
59
60 msi->msi_attrib.maskbit = !!flag;
61 return 1;
62}
63
64int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi,
65 unsigned int nr, int offset)
66{
67 struct msi_map *map;
68 struct msi_msg msg;
69 int rc;
70
71 map = kmalloc(sizeof(*map), GFP_KERNEL);
72 if (map == NULL)
73 return -ENOMEM;
74
75 map->irq = nr;
76 map->msi = msi;
77 zdev->msi_map[nr & ZPCI_MSI_MASK] = map;
78 INIT_HLIST_NODE(&map->msi_chain);
79
80 pr_debug("%s hashing irq: %u to bucket nr: %llu\n",
81 __func__, nr, msi_hashfn(nr));
82 hlist_add_head_rcu(&map->msi_chain, &msi_hash[msi_hashfn(nr)]);
83
84 spin_lock(&msi_map_lock);
85 rc = irq_set_msi_desc(nr, msi);
86 if (rc) {
87 spin_unlock(&msi_map_lock);
88 hlist_del_rcu(&map->msi_chain);
89 kfree(map);
90 zdev->msi_map[nr & ZPCI_MSI_MASK] = NULL;
91 return rc;
92 }
93 spin_unlock(&msi_map_lock);
94
95 msg.data = nr - offset;
96 msg.address_lo = zdev->msi_addr & 0xffffffff;
97 msg.address_hi = zdev->msi_addr >> 32;
98 write_msi_msg(nr, &msg);
99 return 0;
100}
101
102void zpci_teardown_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi)
103{
104 int irq = msi->irq & ZPCI_MSI_MASK;
105 struct msi_map *map;
106
107 msi->msg.address_lo = 0;
108 msi->msg.address_hi = 0;
109 msi->msg.data = 0;
110 msi->irq = 0;
111 zpci_msi_set_mask_bits(msi, 1, 1);
112
113 spin_lock(&msi_map_lock);
114 map = zdev->msi_map[irq];
115 hlist_del_rcu(&map->msi_chain);
116 kfree(map);
117 zdev->msi_map[irq] = NULL;
118 spin_unlock(&msi_map_lock);
119}
120
121/*
122 * The msi hash table has 256 entries which is good for 4..20
123 * devices (a typical device allocates 10 + CPUs MSI's). Maybe make
124 * the hash table size adjustable later.
125 */
126int __init zpci_msihash_init(void)
127{
128 unsigned int i;
129
130 msi_hash = kmalloc(MSI_HASH_BUCKETS * sizeof(*msi_hash), GFP_KERNEL);
131 if (!msi_hash)
132 return -ENOMEM;
133
134 for (i = 0; i < MSI_HASH_BUCKETS; i++)
135 INIT_HLIST_HEAD(&msi_hash[i]);
136 return 0;
137}
138
139void __init zpci_msihash_exit(void)
140{
141 kfree(msi_hash);
142}
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index e99a2557f186..cf8a12ff733b 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -48,11 +48,38 @@ static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr,
48} 48}
49static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL); 49static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL);
50 50
51static void recover_callback(struct device *dev)
52{
53 struct pci_dev *pdev = to_pci_dev(dev);
54 struct zpci_dev *zdev = get_zdev(pdev);
55 int ret;
56
57 pci_stop_and_remove_bus_device(pdev);
58 ret = zpci_disable_device(zdev);
59 if (ret)
60 return;
61
62 ret = zpci_enable_device(zdev);
63 if (ret)
64 return;
65
66 pci_rescan_bus(zdev->bus);
67}
68
69static ssize_t store_recover(struct device *dev, struct device_attribute *attr,
70 const char *buf, size_t count)
71{
72 int rc = device_schedule_callback(dev, recover_callback);
73 return rc ? rc : count;
74}
75static DEVICE_ATTR(recover, S_IWUSR, NULL, store_recover);
76
51static struct device_attribute *zpci_dev_attrs[] = { 77static struct device_attribute *zpci_dev_attrs[] = {
52 &dev_attr_function_id, 78 &dev_attr_function_id,
53 &dev_attr_function_handle, 79 &dev_attr_function_handle,
54 &dev_attr_pchid, 80 &dev_attr_pchid,
55 &dev_attr_pfgid, 81 &dev_attr_pfgid,
82 &dev_attr_recover,
56 NULL, 83 NULL,
57}; 84};
58 85