aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/fault.c
diff options
context:
space:
mode:
authorDavid Vrabel <david.vrabel@citrix.com>2014-04-10 13:46:45 -0400
committerStefano Stabellini <stefano.stabellini@eu.citrix.com>2014-09-23 09:36:20 -0400
commit31668511424110ad470315c6a63dec9a10f1a7ba (patch)
tree24d2e812d218a4fdcefc71f4099705d06a7fb494 /arch/x86/mm/fault.c
parent342cd340f6e73a974053dd09ed1bf8a9c1ed4458 (diff)
x86: skip check for spurious faults for non-present faults
If a fault on a kernel address is due to a non-present page, then it cannot be the result of stale TLB entry from a protection change (RO to RW or NX to X). Thus the pagetable walk in spurious_fault() can be skipped. See the initial if in spurious_fault() and the tests in spurious_fault_check()) for the set of possible error codes checked for spurious faults. These are: IRUWP Before x00xx && ( 1xxxx || xxx1x ) After ( 10001 || 00011 ) && ( 1xxxx || xxx1x ) Thus the new condition is a subset of the previous one, excluding only non-present faults (I == 1 and W == 1 are mutually exclusive). This avoids spurious_fault() oopsing in some cases if the pagetables it attempts to walk are not accessible. This obscures the location of the original fault. This also fixes a crash with Xen PV guests when they access entries in the M2P corresponding to device MMIO regions. The M2P is mapped (read-only) by Xen into the kernel address space of the guest and this mapping may contains holes for non-RAM regions. Read faults will result in calls to spurious_fault(), but because the page tables for the M2P mappings are not accessible by the guest the pagetable walk would fault. This was not normally a problem as MMIO mappings would not normally result in a M2P lookup because of the use of the _PAGE_IOMAP bit the PTE. However, removing the _PAGE_IOMAP bit requires M2P lookups for MMIO mappings as well. Signed-off-by: David Vrabel <david.vrabel@citrix.com> Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Acked-by: Dave Hansen <dave.hansen@intel.com>
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r--arch/x86/mm/fault.c22
1 files changed, 20 insertions, 2 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a24194681513..83bb03bfa259 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -933,8 +933,17 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
933 * cross-processor TLB flush, even if no stale TLB entries exist 933 * cross-processor TLB flush, even if no stale TLB entries exist
934 * on other processors. 934 * on other processors.
935 * 935 *
936 * Spurious faults may only occur if the TLB contains an entry with
937 * fewer permission than the page table entry. Non-present (P = 0)
938 * and reserved bit (R = 1) faults are never spurious.
939 *
936 * There are no security implications to leaving a stale TLB when 940 * There are no security implications to leaving a stale TLB when
937 * increasing the permissions on a page. 941 * increasing the permissions on a page.
942 *
943 * Returns non-zero if a spurious fault was handled, zero otherwise.
944 *
945 * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
946 * (Optional Invalidation).
938 */ 947 */
939static noinline int 948static noinline int
940spurious_fault(unsigned long error_code, unsigned long address) 949spurious_fault(unsigned long error_code, unsigned long address)
@@ -945,8 +954,17 @@ spurious_fault(unsigned long error_code, unsigned long address)
945 pte_t *pte; 954 pte_t *pte;
946 int ret; 955 int ret;
947 956
948 /* Reserved-bit violation or user access to kernel space? */ 957 /*
949 if (error_code & (PF_USER | PF_RSVD)) 958 * Only writes to RO or instruction fetches from NX may cause
959 * spurious faults.
960 *
961 * These could be from user or supervisor accesses but the TLB
962 * is only lazily flushed after a kernel mapping protection
963 * change, so user accesses are not expected to cause spurious
964 * faults.
965 */
966 if (error_code != (PF_WRITE | PF_PROT)
967 && error_code != (PF_INSTR | PF_PROT))
950 return 0; 968 return 0;
951 969
952 pgd = init_mm.pgd + pgd_index(address); 970 pgd = init_mm.pgd + pgd_index(address);