71 files changed, 2459 insertions, 1164 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 6ad6219fc47e..fe128816c448 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -48,6 +48,14 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK
 config SYS_SUPPORTS_HUGETLBFS
        def_bool y
+# Support for additional huge page sizes besides HPAGE_SIZE.
+# The software support is currently only present in the TILE-Gx
+# hypervisor. TILEPro in any case does not support page sizes
+# larger than the default HPAGE_SIZE.
+config HUGETLB_SUPER_PAGES
+        depends on HUGETLB_PAGE && TILEGX
+        def_bool y
 # FIXME: tilegx can implement a more efficient rwsem.
 config RWSEM_GENERIC_SPINLOCK
        def_bool y
@@ -107,16 +115,14 @@ config HVC_TILE
        select HVC_DRIVER
        def_bool y
-# Please note: TILE-Gx support is not yet finalized; this is
-# the preliminary support.  TILE-Gx drivers are only provided
-# with the alpha or beta test versions for Tilera customers.
 config TILEGX
-        depends on EXPERIMENTAL
        bool "Building with TILE-Gx (64-bit) compiler and toolchain"
+config TILEPRO
+        def_bool !TILEGX
 config 64BIT
-        depends on TILEGX
+        def_bool TILEGX
-        def_bool y
 config ARCH_DEFCONFIG
        string
@@ -137,6 +143,31 @@ config NR_CPUS
          smaller kernel memory footprint results from using a smaller
          value on chips with fewer tiles.
+if TILEGX
+choice
+        prompt "Kernel page size"
+        default PAGE_SIZE_64KB
+        help
+          This lets you select the page size of the kernel.  For best
+          performance on memory-intensive applications, a page size of 64KB
+          is recommended.  For workloads involving many small files, many
+          connections, etc., it may be better to select 16KB, which uses
+          memory more efficiently at some cost in TLB performance.
+          Note that this option is TILE-Gx specific; currently
+          TILEPro page size is set by rebuilding the hypervisor.
+config PAGE_SIZE_16KB
+        bool "16KB"
+config PAGE_SIZE_64KB
+        bool "64KB"
+endchoice
+endif
 source "kernel/Kconfig.hz"
 config KEXEC
diff --git a/arch/tile/Makefile b/arch/tile/Makefile
index 9520bc5a4b7f..e20b0a0b64a1 100644
--- a/arch/tile/Makefile
+++ b/arch/tile/Makefile
@@ -34,7 +34,12 @@ LIBGCC_PATH     := \
  $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
 # Provide the path to use for "make defconfig".
-KBUILD_DEFCONFIG := $(ARCH)_defconfig
+# We default to the newer TILE-Gx architecture if only "tile" is given.
+ifeq ($(ARCH),tile)
+        KBUILD_DEFCONFIG := tilegx_defconfig
+else
+        KBUILD_DEFCONFIG := $(ARCH)_defconfig
+endif
 # Used as a file extension when useful, e.g. head_$(BITS).o
 # Not needed for (e.g.) "$(CC) -m32" since the compiler automatically
diff --git a/arch/tile/include/arch/spr_def_32.h b/arch/tile/include/arch/spr_def_32.h
index bbc1f4c924ee..78bbce2fb19a 100644
--- a/arch/tile/include/arch/spr_def_32.h
+++ b/arch/tile/include/arch/spr_def_32.h
@@ -65,6 +65,31 @@
 #define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1
 #define SPR_EX_CONTEXT_2_1__ICS_MASK  0x4
 #define SPR_FAIL 0x4e09
+#define SPR_IDN_AVAIL_EN 0x3e05
+#define SPR_IDN_CA_DATA 0x0b00
+#define SPR_IDN_DATA_AVAIL 0x0b03
+#define SPR_IDN_DEADLOCK_TIMEOUT 0x3406
+#define SPR_IDN_DEMUX_CA_COUNT 0x0a05
+#define SPR_IDN_DEMUX_COUNT_0 0x0a06
+#define SPR_IDN_DEMUX_COUNT_1 0x0a07
+#define SPR_IDN_DEMUX_CTL 0x0a08
+#define SPR_IDN_DEMUX_QUEUE_SEL 0x0a0a
+#define SPR_IDN_DEMUX_STATUS 0x0a0b
+#define SPR_IDN_DEMUX_WRITE_FIFO 0x0a0c
+#define SPR_IDN_DIRECTION_PROTECT 0x2e05
+#define SPR_IDN_PENDING 0x0a0e
+#define SPR_IDN_REFILL_EN 0x0e05
+#define SPR_IDN_SP_FIFO_DATA 0x0a0f
+#define SPR_IDN_SP_FIFO_SEL 0x0a10
+#define SPR_IDN_SP_FREEZE 0x0a11
+#define SPR_IDN_SP_FREEZE__SP_FRZ_MASK  0x1
+#define SPR_IDN_SP_FREEZE__DEMUX_FRZ_MASK  0x2
+#define SPR_IDN_SP_FREEZE__NON_DEST_EXT_MASK  0x4
+#define SPR_IDN_SP_STATE 0x0a12
+#define SPR_IDN_TAG_0 0x0a13
+#define SPR_IDN_TAG_1 0x0a14
+#define SPR_IDN_TAG_VALID 0x0a15
+#define SPR_IDN_TILE_COORD 0x0a16
 #define SPR_INTCTRL_0_STATUS 0x4a07
 #define SPR_INTCTRL_1_STATUS 0x4807
 #define SPR_INTCTRL_2_STATUS 0x4607
@@ -87,12 +112,36 @@
 #define SPR_INTERRUPT_MASK_SET_1_1 0x480e
 #define SPR_INTERRUPT_MASK_SET_2_0 0x460c
 #define SPR_INTERRUPT_MASK_SET_2_1 0x460d
+#define SPR_MPL_AUX_PERF_COUNT_SET_0 0x6000
+#define SPR_MPL_AUX_PERF_COUNT_SET_1 0x6001
+#define SPR_MPL_AUX_PERF_COUNT_SET_2 0x6002
 #define SPR_MPL_DMA_CPL_SET_0 0x5800
 #define SPR_MPL_DMA_CPL_SET_1 0x5801
 #define SPR_MPL_DMA_CPL_SET_2 0x5802
 #define SPR_MPL_DMA_NOTIFY_SET_0 0x3800
 #define SPR_MPL_DMA_NOTIFY_SET_1 0x3801
 #define SPR_MPL_DMA_NOTIFY_SET_2 0x3802
+#define SPR_MPL_IDN_ACCESS_SET_0 0x0a00
+#define SPR_MPL_IDN_ACCESS_SET_1 0x0a01
+#define SPR_MPL_IDN_ACCESS_SET_2 0x0a02
+#define SPR_MPL_IDN_AVAIL_SET_0 0x3e00
+#define SPR_MPL_IDN_AVAIL_SET_1 0x3e01
+#define SPR_MPL_IDN_AVAIL_SET_2 0x3e02
+#define SPR_MPL_IDN_CA_SET_0 0x3a00
+#define SPR_MPL_IDN_CA_SET_1 0x3a01
+#define SPR_MPL_IDN_CA_SET_2 0x3a02
+#define SPR_MPL_IDN_COMPLETE_SET_0 0x1200
+#define SPR_MPL_IDN_COMPLETE_SET_1 0x1201
+#define SPR_MPL_IDN_COMPLETE_SET_2 0x1202
+#define SPR_MPL_IDN_FIREWALL_SET_0 0x2e00
+#define SPR_MPL_IDN_FIREWALL_SET_1 0x2e01
+#define SPR_MPL_IDN_FIREWALL_SET_2 0x2e02
+#define SPR_MPL_IDN_REFILL_SET_0 0x0e00
+#define SPR_MPL_IDN_REFILL_SET_1 0x0e01
+#define SPR_MPL_IDN_REFILL_SET_2 0x0e02
+#define SPR_MPL_IDN_TIMER_SET_0 0x3400
+#define SPR_MPL_IDN_TIMER_SET_1 0x3401
+#define SPR_MPL_IDN_TIMER_SET_2 0x3402
 #define SPR_MPL_INTCTRL_0_SET_0 0x4a00
 #define SPR_MPL_INTCTRL_0_SET_1 0x4a01
 #define SPR_MPL_INTCTRL_0_SET_2 0x4a02
@@ -102,6 +151,9 @@
 #define SPR_MPL_INTCTRL_2_SET_0 0x4600
 #define SPR_MPL_INTCTRL_2_SET_1 0x4601
 #define SPR_MPL_INTCTRL_2_SET_2 0x4602
+#define SPR_MPL_PERF_COUNT_SET_0 0x4200
+#define SPR_MPL_PERF_COUNT_SET_1 0x4201
+#define SPR_MPL_PERF_COUNT_SET_2 0x4202
 #define SPR_MPL_SN_ACCESS_SET_0 0x0800
 #define SPR_MPL_SN_ACCESS_SET_1 0x0801
 #define SPR_MPL_SN_ACCESS_SET_2 0x0802
@@ -181,6 +233,7 @@
 #define SPR_UDN_DEMUX_STATUS 0x0c0d
 #define SPR_UDN_DEMUX_WRITE_FIFO 0x0c0e
 #define SPR_UDN_DIRECTION_PROTECT 0x3005
+#define SPR_UDN_PENDING 0x0c10
 #define SPR_UDN_REFILL_EN 0x1005
 #define SPR_UDN_SP_FIFO_DATA 0x0c11
 #define SPR_UDN_SP_FIFO_SEL 0x0c12
@@ -195,6 +248,9 @@
 #define SPR_UDN_TAG_3 0x0c18
 #define SPR_UDN_TAG_VALID 0x0c19
 #define SPR_UDN_TILE_COORD 0x0c1a
+#define SPR_WATCH_CTL 0x4209
+#define SPR_WATCH_MASK 0x420a
+#define SPR_WATCH_VAL 0x420b
 #endif /* !defined(__ARCH_SPR_DEF_H__) */
diff --git a/arch/tile/include/arch/spr_def_64.h b/arch/tile/include/arch/spr_def_64.h
index cd3e5f95d5fd..0da86faa3370 100644
--- a/arch/tile/include/arch/spr_def_64.h
+++ b/arch/tile/include/arch/spr_def_64.h
@@ -52,6 +52,13 @@
 #define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1
 #define SPR_EX_CONTEXT_2_1__ICS_MASK  0x4
 #define SPR_FAIL 0x2707
+#define SPR_IDN_AVAIL_EN 0x1a05
+#define SPR_IDN_DATA_AVAIL 0x0a80
+#define SPR_IDN_DEADLOCK_TIMEOUT 0x1806
+#define SPR_IDN_DEMUX_COUNT_0 0x0a05
+#define SPR_IDN_DEMUX_COUNT_1 0x0a06
+#define SPR_IDN_DIRECTION_PROTECT 0x1405
+#define SPR_IDN_PENDING 0x0a08
 #define SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK 0x1
 #define SPR_INTCTRL_0_STATUS 0x2505
 #define SPR_INTCTRL_1_STATUS 0x2405
@@ -88,9 +95,27 @@
 #define SPR_IPI_MASK_SET_0 0x1f0a
 #define SPR_IPI_MASK_SET_1 0x1e0a
 #define SPR_IPI_MASK_SET_2 0x1d0a
+#define SPR_MPL_AUX_PERF_COUNT_SET_0 0x2100
+#define SPR_MPL_AUX_PERF_COUNT_SET_1 0x2101
+#define SPR_MPL_AUX_PERF_COUNT_SET_2 0x2102
 #define SPR_MPL_AUX_TILE_TIMER_SET_0 0x1700
 #define SPR_MPL_AUX_TILE_TIMER_SET_1 0x1701
 #define SPR_MPL_AUX_TILE_TIMER_SET_2 0x1702
+#define SPR_MPL_IDN_ACCESS_SET_0 0x0a00
+#define SPR_MPL_IDN_ACCESS_SET_1 0x0a01
+#define SPR_MPL_IDN_ACCESS_SET_2 0x0a02
+#define SPR_MPL_IDN_AVAIL_SET_0 0x1a00
+#define SPR_MPL_IDN_AVAIL_SET_1 0x1a01
+#define SPR_MPL_IDN_AVAIL_SET_2 0x1a02
+#define SPR_MPL_IDN_COMPLETE_SET_0 0x0500
+#define SPR_MPL_IDN_COMPLETE_SET_1 0x0501
+#define SPR_MPL_IDN_COMPLETE_SET_2 0x0502
+#define SPR_MPL_IDN_FIREWALL_SET_0 0x1400
+#define SPR_MPL_IDN_FIREWALL_SET_1 0x1401
+#define SPR_MPL_IDN_FIREWALL_SET_2 0x1402
+#define SPR_MPL_IDN_TIMER_SET_0 0x1800
+#define SPR_MPL_IDN_TIMER_SET_1 0x1801
+#define SPR_MPL_IDN_TIMER_SET_2 0x1802
 #define SPR_MPL_INTCTRL_0_SET_0 0x2500
 #define SPR_MPL_INTCTRL_0_SET_1 0x2501
 #define SPR_MPL_INTCTRL_0_SET_2 0x2502
@@ -100,6 +125,21 @@
 #define SPR_MPL_INTCTRL_2_SET_0 0x2300
 #define SPR_MPL_INTCTRL_2_SET_1 0x2301
 #define SPR_MPL_INTCTRL_2_SET_2 0x2302
+#define SPR_MPL_IPI_0 0x1f04
+#define SPR_MPL_IPI_0_SET_0 0x1f00
+#define SPR_MPL_IPI_0_SET_1 0x1f01
+#define SPR_MPL_IPI_0_SET_2 0x1f02
+#define SPR_MPL_IPI_1 0x1e04
+#define SPR_MPL_IPI_1_SET_0 0x1e00
+#define SPR_MPL_IPI_1_SET_1 0x1e01
+#define SPR_MPL_IPI_1_SET_2 0x1e02
+#define SPR_MPL_IPI_2 0x1d04
+#define SPR_MPL_IPI_2_SET_0 0x1d00
+#define SPR_MPL_IPI_2_SET_1 0x1d01
+#define SPR_MPL_IPI_2_SET_2 0x1d02
+#define SPR_MPL_PERF_COUNT_SET_0 0x2000
+#define SPR_MPL_PERF_COUNT_SET_1 0x2001
+#define SPR_MPL_PERF_COUNT_SET_2 0x2002
 #define SPR_MPL_UDN_ACCESS_SET_0 0x0b00
 #define SPR_MPL_UDN_ACCESS_SET_1 0x0b01
 #define SPR_MPL_UDN_ACCESS_SET_2 0x0b02
@@ -167,6 +207,9 @@
 #define SPR_UDN_DEMUX_COUNT_2 0x0b07
 #define SPR_UDN_DEMUX_COUNT_3 0x0b08
 #define SPR_UDN_DIRECTION_PROTECT 0x1505
+#define SPR_UDN_PENDING 0x0b0a
+#define SPR_WATCH_MASK 0x200a
+#define SPR_WATCH_VAL 0x200b
 #endif /* !defined(__ARCH_SPR_DEF_H__) */
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 0bb42642343a..143473e3a0bb 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -2,6 +2,7 @@ include include/asm-generic/Kbuild.asm
 header-y += ../arch/
+header-y += cachectl.h
 header-y += ucontext.h
 header-y += hardwall.h
@@ -21,7 +22,6 @@ generic-y += ipcbuf.h
 generic-y += irq_regs.h
 generic-y += kdebug.h
 generic-y += local.h
-generic-y += module.h
 generic-y += msgbuf.h
 generic-y += mutex.h
 generic-y += param.h
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index 54d1da826f93..e7fb5cfb9597 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -303,7 +303,14 @@ void __init_atomic_per_cpu(void);
 void __atomic_fault_unlock(int *lock_ptr);
 #endif
+/* Return a pointer to the lock for the given address. */
+int *__atomic_hashed_lock(volatile void *v);
 /* Private helper routines in lib/atomic_asm_32.S */
+struct __get_user {
+        unsigned long val;
+        int err;
+};
 extern struct __get_user __atomic_cmpxchg(volatile int *p,
                                          int *lock, int o, int n);
 extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
@@ -319,6 +326,9 @@ extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n);
 extern u64 __atomic64_xchg_add_unless(volatile u64 *p,
                                      int *lock, u64 o, u64 n);
+/* Return failure from the atomic wrappers. */
+struct __get_user __atomic_bad_address(int __user *addr);
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASM_TILE_ATOMIC_32_H */
diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h
index 16f1fa51fea1..bd186c4eaa50 100644
--- a/arch/tile/include/asm/bitops.h
+++ b/arch/tile/include/asm/bitops.h
@@ -77,6 +77,11 @@ static inline int ffs(int x)
        return __builtin_ffs(x);
 }
+static inline int fls64(__u64 w)
+{
+        return (sizeof(__u64) * 8) - __builtin_clzll(w);
+}
 /**
 * fls - find last set bit in word
 * @x: the word to search
@@ -90,12 +95,7 @@ static inline int ffs(int x)
 */
 static inline int fls(int x)
 {
-        return (sizeof(int) * 8) - __builtin_clz(x);
+        return fls64((unsigned int) x);
-}
-static inline int fls64(__u64 w)
-{
-        return (sizeof(__u64) * 8) - __builtin_clzll(w);
 }
 static inline unsigned int __arch_hweight32(unsigned int w)
diff --git a/arch/tile/include/asm/byteorder.h b/arch/tile/include/asm/byteorder.h
index 9558416d578b..fb72ecf49218 100644
--- a/arch/tile/include/asm/byteorder.h
+++ b/arch/tile/include/asm/byteorder.h
@@ -1 +1,21 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+#if defined (__BIG_ENDIAN__)
+#include <linux/byteorder/big_endian.h>
+#elif defined (__LITTLE_ENDIAN__)
 #include <linux/byteorder/little_endian.h>
+#else
+#error "__BIG_ENDIAN__ or __LITTLE_ENDIAN__ must be defined."
+#endif
diff --git a/arch/tile/include/asm/cachectl.h b/arch/tile/include/asm/cachectl.h
new file mode 100644
index 000000000000..af4c9f9154d1
--- /dev/null
+++ b/arch/tile/include/asm/cachectl.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+#ifndef _ASM_TILE_CACHECTL_H
+#define _ASM_TILE_CACHECTL_H
+/*
+ * Options for cacheflush system call.
+ *
+ * The ICACHE flush is performed on all cores currently running the
+ * current process's address space.  The intent is for user
+ * applications to be able to modify code, invoke the system call,
+ * then allow arbitrary other threads in the same address space to see
+ * the newly-modified code.  Passing a length of CHIP_L1I_CACHE_SIZE()
+ * or more invalidates the entire icache on all cores in the address
+ * spaces.  (Note: currently this option invalidates the entire icache
+ * regardless of the requested address and length, but we may choose
+ * to honor the arguments at some point.)
+ *
+ * Flush and invalidation of memory can normally be performed with the
+ * __insn_flush(), __insn_inv(), and __insn_finv() instructions from
+ * userspace.  The DCACHE option to the system call allows userspace
+ * to flush the entire L1+L2 data cache from the core.  In this case,
+ * the address and length arguments are not used.  The DCACHE flush is
+ * restricted to the current core, not all cores in the address space.
+ */
+#define ICACHE  (1<<0)          /* invalidate L1 instruction cache */
+#define DCACHE  (1<<1)          /* flush and invalidate data cache */
+#define BCACHE  (ICACHE|DCACHE) /* flush both caches               */
+#endif  /* _ASM_TILE_CACHECTL_H */
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index 4b4b28969a65..69adc08d36a5 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -242,9 +242,6 @@ long compat_sys_fallocate(int fd, int mode,
 long compat_sys_sched_rr_get_interval(compat_pid_t pid,
                                      struct compat_timespec __user *interval);
-/* Tilera Linux syscalls that don't have "compat" versions. */
-#define compat_sys_flush_cache sys_flush_cache
 /* These are the intvec_64.S trampolines. */
 long _compat_sys_execve(const char __user *path,
                        const compat_uptr_t __user *argv,
diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
index 623a6bb741c1..d16d006d660e 100644
--- a/arch/tile/include/asm/elf.h
+++ b/arch/tile/include/asm/elf.h
@@ -44,7 +44,11 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
 #else
 #define ELF_CLASS       ELFCLASS32
 #endif
+#ifdef __BIG_ENDIAN__
+#define ELF_DATA        ELFDATA2MSB
+#else
 #define ELF_DATA        ELFDATA2LSB
+#endif
 /*
 * There seems to be a bug in how compat_binfmt_elf.c works: it
@@ -59,6 +63,7 @@ enum { ELF_ARCH = CHIP_ELF_TYPE() };
 */
 #define elf_check_arch(x)  \
        ((x)->e_ident[EI_CLASS] == ELF_CLASS && \
+         (x)->e_ident[EI_DATA] == ELF_DATA && \
         (x)->e_machine == CHIP_ELF_TYPE())
 /* The module loader only handles a few relocation types. */
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index d03ec124a598..5909ac3d7218 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -28,29 +28,81 @@
 #include <linux/futex.h>
 #include <linux/uaccess.h>
 #include <linux/errno.h>
+#include <asm/atomic.h>
-extern struct __get_user futex_set(u32 __user *v, int i);
+/*
-extern struct __get_user futex_add(u32 __user *v, int n);
+ * Support macros for futex operations.  Do not use these macros directly.
-extern struct __get_user futex_or(u32 __user *v, int n);
+ * They assume "ret", "val", "oparg", and "uaddr" in the lexical context.
-extern struct __get_user futex_andn(u32 __user *v, int n);
+ * __futex_cmpxchg() additionally assumes "oldval".
-extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n);
+ */
+#ifdef __tilegx__
+#define __futex_asm(OP) \
+        asm("1: {" #OP " %1, %3, %4; movei %0, 0 }\n"           \
+            ".pushsection .fixup,\"ax\"\n"                      \
+            "0: { movei %0, %5; j 9f }\n"                       \
+            ".section __ex_table,\"a\"\n"                       \
+            ".quad 1b, 0b\n"                                    \
+            ".popsection\n"                                     \
+            "9:"                                                \
+            : "=r" (ret), "=r" (val), "+m" (*(uaddr))           \
+            : "r" (uaddr), "r" (oparg), "i" (-EFAULT))
+#define __futex_set() __futex_asm(exch4)
+#define __futex_add() __futex_asm(fetchadd4)
+#define __futex_or() __futex_asm(fetchor4)
+#define __futex_andn() ({ oparg = ~oparg; __futex_asm(fetchand4); })
+#define __futex_cmpxchg() \
+        ({ __insn_mtspr(SPR_CMPEXCH_VALUE, oldval); __futex_asm(cmpexch4); })
+#define __futex_xor()                                           \
+        ({                                                      \
+                u32 oldval, n = oparg;                          \
+                if ((ret = __get_user(oldval, uaddr)) == 0) {   \
+                        do {                                    \
+                                oparg = oldval ^ n;             \
+                                __futex_cmpxchg();              \
+                        } while (ret == 0 && oldval != val);    \
+                }                                               \
+        })
+/* No need to prefetch, since the atomic ops go to the home cache anyway. */
+#define __futex_prolog()
-#ifndef __tilegx__
-extern struct __get_user futex_xor(u32 __user *v, int n);
 #else
-static inline struct __get_user futex_xor(u32 __user *uaddr, int n)
-{
+#define __futex_call(FN)                                                \
-        struct __get_user asm_ret = __get_user_4(uaddr);
+        {                                                               \
-        if (!asm_ret.err) {
+                struct __get_user gu = FN((u32 __force *)uaddr, lock, oparg); \
-                int oldval, newval;
+                val = gu.val;                                           \
-                do {
+                ret = gu.err;                                           \
-                        oldval = asm_ret.val;
-                        newval = oldval ^ n;
-                        asm_ret = futex_cmpxchg(uaddr, oldval, newval);
-                } while (asm_ret.err == 0 && oldval != asm_ret.val);
        }
-        return asm_ret;
-}
+#define __futex_set() __futex_call(__atomic_xchg)
+#define __futex_add() __futex_call(__atomic_xchg_add)
+#define __futex_or() __futex_call(__atomic_or)
+#define __futex_andn() __futex_call(__atomic_andn)
+#define __futex_xor() __futex_call(__atomic_xor)
+#define __futex_cmpxchg()                                               \
+        {                                                               \
+                struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \
+                                                        lock, oldval, oparg); \
+                val = gu.val;                                           \
+                ret = gu.err;                                           \
+        }
+/*
+ * Find the lock pointer for the atomic calls to use, and issue a
+ * prefetch to the user address to bring it into cache.  Similar to
+ * __atomic_setup(), but we can't do a read into the L1 since it might
+ * fault; instead we do a prefetch into the L2.
+ */
+#define __futex_prolog()                                        \
+        int *lock;                                              \
+        __insn_prefetch(uaddr);                                 \
+        lock = __atomic_hashed_lock((int __force *)uaddr)
 #endif
 static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
@@ -59,8 +111,12 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
        int cmp = (encoded_op >> 24) & 15;
        int oparg = (encoded_op << 8) >> 20;
        int cmparg = (encoded_op << 20) >> 20;
-        int ret;
+        int uninitialized_var(val), ret;
-        struct __get_user asm_ret;
+        __futex_prolog();
+        /* The 32-bit futex code makes this assumption, so validate it here. */
+        BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int));
        if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
                oparg = 1 << oparg;
@@ -71,46 +127,45 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
        pagefault_disable();
        switch (op) {
        case FUTEX_OP_SET:
-                asm_ret = futex_set(uaddr, oparg);
+                __futex_set();
                break;
        case FUTEX_OP_ADD:
-                asm_ret = futex_add(uaddr, oparg);
+                __futex_add();
                break;
        case FUTEX_OP_OR:
-                asm_ret = futex_or(uaddr, oparg);
+                __futex_or();
                break;
        case FUTEX_OP_ANDN:
-                asm_ret = futex_andn(uaddr, oparg);
+                __futex_andn();
                break;
        case FUTEX_OP_XOR:
-                asm_ret = futex_xor(uaddr, oparg);
+                __futex_xor();
                break;
        default:
-                asm_ret.err = -ENOSYS;
+                ret = -ENOSYS;
+                break;
        }
        pagefault_enable();
-        ret = asm_ret.err;
        if (!ret) {
                switch (cmp) {
                case FUTEX_OP_CMP_EQ:
-                        ret = (asm_ret.val == cmparg);
+                        ret = (val == cmparg);
                        break;
                case FUTEX_OP_CMP_NE:
-                        ret = (asm_ret.val != cmparg);
+                        ret = (val != cmparg);
                        break;
                case FUTEX_OP_CMP_LT:
-                        ret = (asm_ret.val < cmparg);
+                        ret = (val < cmparg);
                        break;
                case FUTEX_OP_CMP_GE:
-                        ret = (asm_ret.val >= cmparg);
+                        ret = (val >= cmparg);
                        break;
                case FUTEX_OP_CMP_LE:
-                        ret = (asm_ret.val <= cmparg);
+                        ret = (val <= cmparg);
                        break;
                case FUTEX_OP_CMP_GT:
-                        ret = (asm_ret.val > cmparg);
+                        ret = (val > cmparg);
                        break;
                default:
                        ret = -ENOSYS;
@@ -120,22 +175,20 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 }
 static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
-                                                u32 oldval, u32 newval)
+                                                u32 oldval, u32 oparg)
 {
-        struct __get_user asm_ret;
+        int ret, val;
+        __futex_prolog();
        if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
                return -EFAULT;
-        asm_ret = futex_cmpxchg(uaddr, oldval, newval);
+        __futex_cmpxchg();
-        *uval = asm_ret.val;
-        return asm_ret.err;
-}
-#ifndef __tilegx__
+        *uval = val;
-/* Return failure from the atomic wrappers. */
+        return ret;
-struct __get_user __atomic_bad_address(int __user *addr);
+}
-#endif
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h
index 2ac422848c7d..47514a58d685 100644
--- a/arch/tile/include/asm/hardwall.h
+++ b/arch/tile/include/asm/hardwall.h
@@ -11,12 +11,14 @@
 *   NON INFRINGEMENT.  See the GNU General Public License for
 *   more details.
 *
- * Provide methods for the HARDWALL_FILE for accessing the UDN.
+ * Provide methods for access control of per-cpu resources like
+ * UDN, IDN, or IPI.
 */
 #ifndef _ASM_TILE_HARDWALL_H
 #define _ASM_TILE_HARDWALL_H
+#include <arch/chip.h>
 #include <linux/ioctl.h>
 #define HARDWALL_IOCTL_BASE 0xa2
@@ -24,8 +26,9 @@
 /*
 * The HARDWALL_CREATE() ioctl is a macro with a "size" argument.
 * The resulting ioctl value is passed to the kernel in conjunction
- * with a pointer to a little-endian bitmask of cpus, which must be
+ * with a pointer to a standard kernel bitmask of cpus.
- * physically in a rectangular configuration on the chip.
+ * For network resources (UDN or IDN) the bitmask must physically
+ * represent a rectangular configuration on the chip.
 * The "size" is the number of bytes of cpu mask data.
 */
 #define _HARDWALL_CREATE 1
@@ -44,13 +47,7 @@
 #define HARDWALL_GET_ID \
 _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID)
-#ifndef __KERNEL__
+#ifdef __KERNEL__
-/* This is the canonical name expected by userspace. */
-#define HARDWALL_FILE "/dev/hardwall"
-#else
 /* /proc hooks for hardwall. */
 struct proc_dir_entry;
 #ifdef CONFIG_HARDWALL
@@ -59,7 +56,6 @@ int proc_pid_hardwall(struct task_struct *task, char *buffer);
 #else
 static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {}
 #endif
 #endif
 #endif /* _ASM_TILE_HARDWALL_H */
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index d396d1805163..b2042380a5aa 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -106,4 +106,25 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+                                       struct page *page, int writable)
+{
+        size_t pagesize = huge_page_size(hstate_vma(vma));
+        if (pagesize != PUD_SIZE && pagesize != PMD_SIZE)
+                entry = pte_mksuper(entry);
+        return entry;
+}
+#define arch_make_huge_pte arch_make_huge_pte
+/* Sizes to scale up page size for PTEs with HV_PTE_SUPER bit. */
+enum {
+        HUGE_SHIFT_PGDIR = 0,
+        HUGE_SHIFT_PMD = 1,
+        HUGE_SHIFT_PAGE = 2,
+        HUGE_SHIFT_ENTRIES
+};
+extern int huge_shift[HUGE_SHIFT_ENTRIES];
+#endif
 #endif /* _ASM_TILE_HUGETLB_H */
diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h
index 5db0ce54284d..b4e96fef2cf8 100644
--- a/arch/tile/include/asm/irqflags.h
+++ b/arch/tile/include/asm/irqflags.h
@@ -28,10 +28,10 @@
 */
 #if CHIP_HAS_AUX_PERF_COUNTERS()
 #define LINUX_MASKABLE_INTERRUPTS_HI \
-       (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT)))
+        (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT)))
 #else
 #define LINUX_MASKABLE_INTERRUPTS_HI \
-       (~(INT_MASK_HI(INT_PERF_COUNT)))
+        (~(INT_MASK_HI(INT_PERF_COUNT)))
 #endif
 #else
@@ -90,6 +90,14 @@
        __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, (unsigned long)(__m)); \
        __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, (unsigned long)(__m>>32)); \
 } while (0)
+#define interrupt_mask_save_mask() \
+        (__insn_mfspr(SPR_INTERRUPT_MASK_SET_K_0) | \
+         (((unsigned long long)__insn_mfspr(SPR_INTERRUPT_MASK_SET_K_1))<<32))
+#define interrupt_mask_restore_mask(mask) do { \
+        unsigned long long __m = (mask); \
+        __insn_mtspr(SPR_INTERRUPT_MASK_K_0, (unsigned long)(__m)); \
+        __insn_mtspr(SPR_INTERRUPT_MASK_K_1, (unsigned long)(__m>>32)); \
+} while (0)
 #else
 #define interrupt_mask_set(n) \
        __insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (1UL << (n)))
@@ -101,6 +109,10 @@
        __insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (mask))
 #define interrupt_mask_reset_mask(mask) \
        __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (mask))
+#define interrupt_mask_save_mask() \
+        __insn_mfspr(SPR_INTERRUPT_MASK_K)
+#define interrupt_mask_restore_mask(mask) \
+        __insn_mtspr(SPR_INTERRUPT_MASK_K, (mask))
 #endif
 /*
@@ -122,7 +134,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
 /* Disable all interrupts, including NMIs. */
 #define arch_local_irq_disable_all() \
-        interrupt_mask_set_mask(-1UL)
+        interrupt_mask_set_mask(-1ULL)
 /* Re-enable all maskable interrupts. */
 #define arch_local_irq_enable() \
@@ -179,7 +191,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
 #ifdef __tilegx__
 #if INT_MEM_ERROR != 0
-# error Fix IRQ_DISABLED() macro
+# error Fix IRQS_DISABLED() macro
 #endif
 /* Return 0 or 1 to indicate whether interrupts are currently disabled. */
@@ -207,9 +219,10 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
        mtspr   SPR_INTERRUPT_MASK_SET_K, tmp
 /* Enable interrupts. */
-#define IRQ_ENABLE(tmp0, tmp1)                                  \
+#define IRQ_ENABLE_LOAD(tmp0, tmp1)                             \
        GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0);                  \
-        ld      tmp0, tmp0;                                     \
+        ld      tmp0, tmp0
+#define IRQ_ENABLE_APPLY(tmp0, tmp1)                            \
        mtspr   SPR_INTERRUPT_MASK_RESET_K, tmp0
 #else /* !__tilegx__ */
@@ -253,17 +266,22 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
        mtspr   SPR_INTERRUPT_MASK_SET_K_1, tmp
 /* Enable interrupts. */
-#define IRQ_ENABLE(tmp0, tmp1)                                  \
+#define IRQ_ENABLE_LOAD(tmp0, tmp1)                             \
        GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0);                  \
        {                                                       \
         lw     tmp0, tmp0;                                     \
         addi   tmp1, tmp0, 4                                   \
        };                                                      \
-        lw      tmp1, tmp1;                                     \
+        lw      tmp1, tmp1
+#define IRQ_ENABLE_APPLY(tmp0, tmp1)                            \
        mtspr   SPR_INTERRUPT_MASK_RESET_K_0, tmp0;             \
        mtspr   SPR_INTERRUPT_MASK_RESET_K_1, tmp1
 #endif
+#define IRQ_ENABLE(tmp0, tmp1)                                  \
+        IRQ_ENABLE_LOAD(tmp0, tmp1);                            \
+        IRQ_ENABLE_APPLY(tmp0, tmp1)
 /*
 * Do the CPU's IRQ-state tracing from assembly code. We call a
 * C function, but almost everywhere we do, we don't mind clobbering
diff --git a/arch/tile/include/asm/kexec.h b/arch/tile/include/asm/kexec.h
index c11a6cc73bb8..fc98ccfc98ac 100644
--- a/arch/tile/include/asm/kexec.h
+++ b/arch/tile/include/asm/kexec.h
@@ -19,12 +19,24 @@
 #include <asm/page.h>
+#ifndef __tilegx__
 /* Maximum physical address we can use pages from. */
 #define KEXEC_SOURCE_MEMORY_LIMIT TASK_SIZE
 /* Maximum address we can reach in physical address mode. */
 #define KEXEC_DESTINATION_MEMORY_LIMIT TASK_SIZE
 /* Maximum address we can use for the control code buffer. */
 #define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+#else
+/* We need to limit the memory below PGDIR_SIZE since
+ * we only setup page table for [0, PGDIR_SIZE) before final kexec.
+ */
+/* Maximum physical address we can use pages from. */
+#define KEXEC_SOURCE_MEMORY_LIMIT PGDIR_SIZE
+/* Maximum address we can reach in physical address mode. */
+#define KEXEC_DESTINATION_MEMORY_LIMIT PGDIR_SIZE
+/* Maximum address we can use for the control code buffer. */
+#define KEXEC_CONTROL_MEMORY_LIMIT PGDIR_SIZE
+#endif
 #define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
diff --git a/arch/tile/include/asm/kvm_para.h b/arch/tile/include/asm/kvm_para.h
new file mode 100644
index 000000000000..14fab8f0b957
--- /dev/null
+++ b/arch/tile/include/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/tile/include/asm/mmu.h b/arch/tile/include/asm/mmu.h
index 92f94c77b6e4..e2c789096795 100644
--- a/arch/tile/include/asm/mmu.h
+++ b/arch/tile/include/asm/mmu.h
@@ -21,7 +21,7 @@ struct mm_context {
         * Written under the mmap_sem semaphore; read without the
         * semaphore but atomically, but it is conservatively set.
         */
-        unsigned int priority_cached;
+        unsigned long priority_cached;
 };
 typedef struct mm_context mm_context_t;
diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h
index 15fb24641120..37f0b741dee7 100644
--- a/arch/tile/include/asm/mmu_context.h
+++ b/arch/tile/include/asm/mmu_context.h
@@ -30,11 +30,15 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
        return 0;
 }
-/* Note that arch/tile/kernel/head.S also calls hv_install_context() */
+/*
+ * Note that arch/tile/kernel/head_NN.S and arch/tile/mm/migrate_NN.S
+ * also call hv_install_context().
+ */
 static inline void __install_page_table(pgd_t *pgdir, int asid, pgprot_t prot)
 {
        /* FIXME: DIRECTIO should not always be set. FIXME. */
-        int rc = hv_install_context(__pa(pgdir), prot, asid, HV_CTX_DIRECTIO);
+        int rc = hv_install_context(__pa(pgdir), prot, asid,
+                                    HV_CTX_DIRECTIO | CTX_PAGE_FLAG);
        if (rc < 0)
                panic("hv_install_context failed: %d", rc);
 }
diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h
new file mode 100644
index 000000000000..44ed07ccd3d2
--- /dev/null
+++ b/arch/tile/include/asm/module.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+#ifndef _ASM_TILE_MODULE_H
+#define _ASM_TILE_MODULE_H
+#include <arch/chip.h>
+#include <asm-generic/module.h>
+/* We can't use modules built with different page sizes. */
+#if defined(CONFIG_PAGE_SIZE_16KB)
+# define MODULE_PGSZ " 16KB"
+#elif defined(CONFIG_PAGE_SIZE_64KB)
+# define MODULE_PGSZ " 64KB"
+#else
+# define MODULE_PGSZ ""
+#endif
+/* We don't really support no-SMP so tag if someone tries. */
+#ifdef CONFIG_SMP
+#define MODULE_NOSMP ""
+#else
+#define MODULE_NOSMP " nosmp"
+#endif
+#define MODULE_ARCH_VERMAGIC CHIP_ARCH_NAME MODULE_PGSZ MODULE_NOSMP
+#endif /* _ASM_TILE_MODULE_H */
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index db93518fac03..9d9131e5c552 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -20,8 +20,17 @@
 #include <arch/chip.h>
 /* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */
-#define PAGE_SHIFT      HV_LOG2_PAGE_SIZE_SMALL
+#if defined(CONFIG_PAGE_SIZE_16KB)
-#define HPAGE_SHIFT     HV_LOG2_PAGE_SIZE_LARGE
+#define PAGE_SHIFT      14
+#define CTX_PAGE_FLAG   HV_CTX_PG_SM_16K
+#elif defined(CONFIG_PAGE_SIZE_64KB)
+#define PAGE_SHIFT      16
+#define CTX_PAGE_FLAG   HV_CTX_PG_SM_64K
+#else
+#define PAGE_SHIFT      HV_LOG2_DEFAULT_PAGE_SIZE_SMALL
+#define CTX_PAGE_FLAG   0
+#endif
+#define HPAGE_SHIFT     HV_LOG2_DEFAULT_PAGE_SIZE_LARGE
 #define PAGE_SIZE       (_AC(1, UL) << PAGE_SHIFT)
 #define HPAGE_SIZE      (_AC(1, UL) << HPAGE_SHIFT)
@@ -78,8 +87,7 @@ typedef HV_PTE pgprot_t;
 /*
 * User L2 page tables are managed as one L2 page table per page,
 * because we use the page allocator for them.  This keeps the allocation
- * simple and makes it potentially useful to implement HIGHPTE at some point.
+ * simple, but it's also inefficient, since L2 page tables are much smaller
- * However, it's also inefficient, since L2 page tables are much smaller
 * than pages (currently 2KB vs 64KB).  So we should revisit this.
 */
 typedef struct page *pgtable_t;
@@ -128,7 +136,7 @@ static inline __attribute_const__ int get_order(unsigned long size)
 #define HUGETLB_PAGE_ORDER      (HPAGE_SHIFT - PAGE_SHIFT)
-#define HUGE_MAX_HSTATE         2
+#define HUGE_MAX_HSTATE         6
 #ifdef CONFIG_HUGETLB_PAGE
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h
index e919c0bdc22d..1b902508b664 100644
--- a/arch/tile/include/asm/pgalloc.h
+++ b/arch/tile/include/asm/pgalloc.h
@@ -19,24 +19,24 @@
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <asm/fixmap.h>
+#include <asm/page.h>
 #include <hv/hypervisor.h>
 /* Bits for the size of the second-level page table. */
-#define L2_KERNEL_PGTABLE_SHIFT \
+#define L2_KERNEL_PGTABLE_SHIFT _HV_LOG2_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT)
-  (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL + HV_LOG2_PTE_SIZE)
+/* How big is a kernel L2 page table? */
+#define L2_KERNEL_PGTABLE_SIZE (1UL << L2_KERNEL_PGTABLE_SHIFT)
 /* We currently allocate user L2 page tables by page (unlike kernel L2s). */
-#if L2_KERNEL_PGTABLE_SHIFT < HV_LOG2_PAGE_SIZE_SMALL
+#if L2_KERNEL_PGTABLE_SHIFT < PAGE_SHIFT
-#define L2_USER_PGTABLE_SHIFT HV_LOG2_PAGE_SIZE_SMALL
+#define L2_USER_PGTABLE_SHIFT PAGE_SHIFT
 #else
 #define L2_USER_PGTABLE_SHIFT L2_KERNEL_PGTABLE_SHIFT
 #endif
 /* How many pages do we need, as an "order", for a user L2 page table? */
-#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - HV_LOG2_PAGE_SIZE_SMALL)
+#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - PAGE_SHIFT)
-/* How big is a kernel L2 page table? */
-#define L2_KERNEL_PGTABLE_SIZE (1 << L2_KERNEL_PGTABLE_SHIFT)
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
@@ -50,14 +50,14 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 static inline void pmd_populate_kernel(struct mm_struct *mm,
                                       pmd_t *pmd, pte_t *ptep)
 {
-        set_pmd(pmd, ptfn_pmd(__pa(ptep) >> HV_LOG2_PAGE_TABLE_ALIGN,
+        set_pmd(pmd, ptfn_pmd(HV_CPA_TO_PTFN(__pa(ptep)),
                              __pgprot(_PAGE_PRESENT)));
 }
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
                                pgtable_t page)
 {
-        set_pmd(pmd, ptfn_pmd(HV_PFN_TO_PTFN(page_to_pfn(page)),
+        set_pmd(pmd, ptfn_pmd(HV_CPA_TO_PTFN(PFN_PHYS(page_to_pfn(page))),
                              __pgprot(_PAGE_PRESENT)));
 }
@@ -68,8 +68,20 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+extern pgtable_t pgtable_alloc_one(struct mm_struct *mm, unsigned long address,
-extern void pte_free(struct mm_struct *mm, struct page *pte);
+                                   int order);
+extern void pgtable_free(struct mm_struct *mm, struct page *pte, int order);
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+                                      unsigned long address)
+{
+        return pgtable_alloc_one(mm, address, L2_USER_PGTABLE_ORDER);
+}
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
+{
+        pgtable_free(mm, pte, L2_USER_PGTABLE_ORDER);
+}
 #define pmd_pgtable(pmd) pmd_page(pmd)
@@ -85,8 +97,13 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
        pte_free(mm, virt_to_page(pte));
 }
-extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+extern void __pgtable_free_tlb(struct mmu_gather *tlb, struct page *pte,
-                           unsigned long address);
+                               unsigned long address, int order);
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+                                  unsigned long address)
+{
+        __pgtable_free_tlb(tlb, pte, address, L2_USER_PGTABLE_ORDER);
+}
 #define check_pgt_cache()       do { } while (0)
@@ -104,19 +121,44 @@ void shatter_pmd(pmd_t *pmd);
 void shatter_huge_page(unsigned long addr);
 #ifdef __tilegx__
-/* We share a single page allocator for both L1 and L2 page tables. */
-#if HV_L1_SIZE != HV_L2_SIZE
-# error Rework assumption that L1 and L2 page tables are same size.
-#endif
-#define L1_USER_PGTABLE_ORDER L2_USER_PGTABLE_ORDER
 #define pud_populate(mm, pud, pmd) \
  pmd_populate_kernel((mm), (pmd_t *)(pud), (pte_t *)(pmd))
-#define pmd_alloc_one(mm, addr) \
-  ((pmd_t *)page_to_virt(pte_alloc_one((mm), (addr))))
+/* Bits for the size of the L1 (intermediate) page table. */
-#define pmd_free(mm, pmdp) \
+#define L1_KERNEL_PGTABLE_SHIFT _HV_LOG2_L1_SIZE(HPAGE_SHIFT)
-  pte_free((mm), virt_to_page(pmdp))
-#define __pmd_free_tlb(tlb, pmdp, address) \
+/* How big is a kernel L2 page table? */
-  __pte_free_tlb((tlb), virt_to_page(pmdp), (address))
+#define L1_KERNEL_PGTABLE_SIZE (1UL << L1_KERNEL_PGTABLE_SHIFT)
+/* We currently allocate L1 page tables by page. */
+#if L1_KERNEL_PGTABLE_SHIFT < PAGE_SHIFT
+#define L1_USER_PGTABLE_SHIFT PAGE_SHIFT
+#else
+#define L1_USER_PGTABLE_SHIFT L1_KERNEL_PGTABLE_SHIFT
 #endif
+/* How many pages do we need, as an "order", for an L1 page table? */
+#define L1_USER_PGTABLE_ORDER (L1_USER_PGTABLE_SHIFT - PAGE_SHIFT)
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+        struct page *p = pgtable_alloc_one(mm, address, L1_USER_PGTABLE_ORDER);
+        return (pmd_t *)page_to_virt(p);
+}
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
+{
+        pgtable_free(mm, virt_to_page(pmdp), L1_USER_PGTABLE_ORDER);
+}
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
+                                  unsigned long address)
+{
+        __pgtable_free_tlb(tlb, virt_to_page(pmdp), address,
+                           L1_USER_PGTABLE_ORDER);
+}
+#endif /* __tilegx__ */
 #endif /* _ASM_TILE_PGALLOC_H */
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index 67490910774d..73b1a4c9ad03 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -27,8 +27,10 @@
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/pfn.h>
 #include <asm/processor.h>
 #include <asm/fixmap.h>
+#include <asm/page.h>
 struct mm_struct;
 struct vm_area_struct;
@@ -69,6 +71,7 @@ extern void set_page_homes(void);
 #define _PAGE_PRESENT           HV_PTE_PRESENT
 #define _PAGE_HUGE_PAGE         HV_PTE_PAGE
+#define _PAGE_SUPER_PAGE        HV_PTE_SUPER
 #define _PAGE_READABLE          HV_PTE_READABLE
 #define _PAGE_WRITABLE          HV_PTE_WRITABLE
 #define _PAGE_EXECUTABLE        HV_PTE_EXECUTABLE
@@ -85,6 +88,7 @@ extern void set_page_homes(void);
 #define _PAGE_ALL (\
  _PAGE_PRESENT | \
  _PAGE_HUGE_PAGE | \
+  _PAGE_SUPER_PAGE | \
  _PAGE_READABLE | \
  _PAGE_WRITABLE | \
  _PAGE_EXECUTABLE | \
@@ -162,7 +166,7 @@ extern void set_page_homes(void);
  (pgprot_t) { ((oldprot).val & ~_PAGE_ALL) | (newprot).val }
 /* Just setting the PFN to zero suffices. */
-#define pte_pgprot(x) hv_pte_set_pfn((x), 0)
+#define pte_pgprot(x) hv_pte_set_pa((x), 0)
 /*
 * For PTEs and PDEs, we must clear the Present bit first when
@@ -187,6 +191,7 @@ static inline void __pte_clear(pte_t *ptep)
 * Undefined behaviour if not..
 */
 #define pte_present hv_pte_get_present
+#define pte_mknotpresent hv_pte_clear_present
 #define pte_user hv_pte_get_user
 #define pte_read hv_pte_get_readable
 #define pte_dirty hv_pte_get_dirty
@@ -194,6 +199,7 @@ static inline void __pte_clear(pte_t *ptep)
 #define pte_write hv_pte_get_writable
 #define pte_exec hv_pte_get_executable
 #define pte_huge hv_pte_get_page
+#define pte_super hv_pte_get_super
 #define pte_rdprotect hv_pte_clear_readable
 #define pte_exprotect hv_pte_clear_executable
 #define pte_mkclean hv_pte_clear_dirty
@@ -206,6 +212,7 @@ static inline void __pte_clear(pte_t *ptep)
 #define pte_mkyoung hv_pte_set_accessed
 #define pte_mkwrite hv_pte_set_writable
 #define pte_mkhuge hv_pte_set_page
+#define pte_mksuper hv_pte_set_super
 #define pte_special(pte) 0
 #define pte_mkspecial(pte) (pte)
@@ -261,7 +268,7 @@ static inline int pte_none(pte_t pte)
 static inline unsigned long pte_pfn(pte_t pte)
 {
-        return hv_pte_get_pfn(pte);
+        return PFN_DOWN(hv_pte_get_pa(pte));
 }
 /* Set or get the remote cache cpu in a pgprot with remote caching. */
@@ -270,7 +277,7 @@ extern int get_remote_cache_cpu(pgprot_t prot);
 static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 {
-        return hv_pte_set_pfn(prot, pfn);
+        return hv_pte_set_pa(prot, PFN_PHYS(pfn));
 }
 /* Support for priority mappings. */
@@ -312,7 +319,7 @@ extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next);
 */
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-        return pfn_pte(hv_pte_get_pfn(pte), newprot);
+        return pfn_pte(pte_pfn(pte), newprot);
 }
 /*
@@ -335,13 +342,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
-#if defined(CONFIG_HIGHPTE)
-extern pte_t *pte_offset_map(pmd_t *, unsigned long address);
-#define pte_unmap(pte) kunmap_atomic(pte)
-#else
 #define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
 #define pte_unmap(pte) do { } while (0)
-#endif
 /* Clear a non-executable kernel PTE and flush it from the TLB. */
 #define kpte_clear_flush(ptep, vaddr)           \
@@ -410,6 +412,46 @@ static inline unsigned long pmd_index(unsigned long address)
        return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
 }
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+                                            unsigned long address,
+                                            pmd_t *pmdp)
+{
+        return ptep_test_and_clear_young(vma, address, pmdp_ptep(pmdp));
+}
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+                                      unsigned long address, pmd_t *pmdp)
+{
+        ptep_set_wrprotect(mm, address, pmdp_ptep(pmdp));
+}
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+                                       unsigned long address,
+                                       pmd_t *pmdp)
+{
+        return pte_pmd(ptep_get_and_clear(mm, address, pmdp_ptep(pmdp)));
+}
+static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+        set_pte(pmdp_ptep(pmdp), pmd_pte(pmdval));
+}
+#define set_pmd_at(mm, addr, pmdp, pmdval) __set_pmd(pmdp, pmdval)
+/* Create a pmd from a PTFN. */
+static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
+{
+        return pte_pmd(hv_pte_set_ptfn(prot, ptfn));
+}
+/* Return the page-table frame number (ptfn) that a pmd_t points at. */
+#define pmd_ptfn(pmd) hv_pte_get_ptfn(pmd_pte(pmd))
 /*
 * A given kernel pmd_t maps to a specific virtual address (either a
 * kernel huge page or a kernel pte_t table).  Since kernel pte_t
@@ -430,7 +472,48 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 * OK for pte_lockptr(), since we just end up with potentially one
 * lock being used for several pte_t arrays.
 */
-#define pmd_page(pmd) pfn_to_page(HV_PTFN_TO_PFN(pmd_ptfn(pmd)))
+#define pmd_page(pmd) pfn_to_page(PFN_DOWN(HV_PTFN_TO_CPA(pmd_ptfn(pmd))))
+static inline void pmd_clear(pmd_t *pmdp)
+{
+        __pte_clear(pmdp_ptep(pmdp));
+}
+#define pmd_mknotpresent(pmd)   pte_pmd(pte_mknotpresent(pmd_pte(pmd)))
+#define pmd_young(pmd)          pte_young(pmd_pte(pmd))
+#define pmd_mkyoung(pmd)        pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkold(pmd)          pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_mkwrite(pmd)        pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_write(pmd)          pte_write(pmd_pte(pmd))
+#define pmd_wrprotect(pmd)      pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd)        pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_huge_page(pmd)      pte_huge(pmd_pte(pmd))
+#define pmd_mkhuge(pmd)         pte_pmd(pte_mkhuge(pmd_pte(pmd)))
+#define __HAVE_ARCH_PMD_WRITE
+#define pfn_pmd(pfn, pgprot)    pte_pmd(pfn_pte((pfn), (pgprot)))
+#define pmd_pfn(pmd)            pte_pfn(pmd_pte(pmd))
+#define mk_pmd(page, pgprot)    pfn_pmd(page_to_pfn(page), (pgprot))
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+        return pfn_pmd(pmd_pfn(pmd), newprot);
+}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define has_transparent_hugepage() 1
+#define pmd_trans_huge pmd_huge_page
+static inline pmd_t pmd_mksplitting(pmd_t pmd)
+{
+        return pte_pmd(hv_pte_set_client2(pmd_pte(pmd)));
+}
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+        return hv_pte_get_client2(pmd_pte(pmd));
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 /*
 * The pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
@@ -448,17 +531,13 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
       return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
 }
-static inline int pmd_huge_page(pmd_t pmd)
-{
-        return pmd_val(pmd) & _PAGE_HUGE_PAGE;
-}
 #include <asm-generic/pgtable.h>
 /* Support /proc/NN/pgtable API. */
 struct seq_file;
 int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm,
-                           unsigned long vaddr, pte_t *ptep, void **datap);
+                           unsigned long vaddr, unsigned long pagesize,
+                           pte_t *ptep, void **datap);
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
index 9f98529761fd..4ce4a7a99c24 100644
--- a/arch/tile/include/asm/pgtable_32.h
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -20,11 +20,12 @@
 * The level-1 index is defined by the huge page size.  A PGD is composed
 * of PTRS_PER_PGD pgd_t's and is the top level of the page table.
 */
-#define PGDIR_SHIFT     HV_LOG2_PAGE_SIZE_LARGE
+#define PGDIR_SHIFT     HPAGE_SHIFT
-#define PGDIR_SIZE      HV_PAGE_SIZE_LARGE
+#define PGDIR_SIZE      HPAGE_SIZE
 #define PGDIR_MASK      (~(PGDIR_SIZE-1))
-#define PTRS_PER_PGD    (1 << (32 - PGDIR_SHIFT))
+#define PTRS_PER_PGD    _HV_L1_ENTRIES(HPAGE_SHIFT)
-#define SIZEOF_PGD      (PTRS_PER_PGD * sizeof(pgd_t))
+#define PGD_INDEX(va)   _HV_L1_INDEX(va, HPAGE_SHIFT)
+#define SIZEOF_PGD      _HV_L1_SIZE(HPAGE_SHIFT)
 /*
 * The level-2 index is defined by the difference between the huge
@@ -33,8 +34,9 @@
 * Note that the hypervisor docs use PTE for what we call pte_t, so
 * this nomenclature is somewhat confusing.
 */
-#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
+#define PTRS_PER_PTE    _HV_L2_ENTRIES(HPAGE_SHIFT, PAGE_SHIFT)
-#define SIZEOF_PTE      (PTRS_PER_PTE * sizeof(pte_t))
+#define PTE_INDEX(va)   _HV_L2_INDEX(va, HPAGE_SHIFT, PAGE_SHIFT)
+#define SIZEOF_PTE      _HV_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT)
 #ifndef __ASSEMBLY__
@@ -111,24 +113,14 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
        return pte;
 }
-static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
+/*
-{
+ * pmds are wrappers around pgds, which are the same as ptes.
-        set_pte(&pmdp->pud.pgd, pmdval.pud.pgd);
+ * It's often convenient to "cast" back and forth and use the pte methods,
-}
+ * which are the methods supplied by the hypervisor.
+ */
-/* Create a pmd from a PTFN. */
+#define pmd_pte(pmd) ((pmd).pud.pgd)
-static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
+#define pmdp_ptep(pmdp) (&(pmdp)->pud.pgd)
-{
+#define pte_pmd(pte) ((pmd_t){ { (pte) } })
-        return (pmd_t){ { hv_pte_set_ptfn(prot, ptfn) } };
-}
-/* Return the page-table frame number (ptfn) that a pmd_t points at. */
-#define pmd_ptfn(pmd) hv_pte_get_ptfn((pmd).pud.pgd)
-static inline void pmd_clear(pmd_t *pmdp)
-{
-        __pte_clear(&pmdp->pud.pgd);
-}
 #endif /* __ASSEMBLY__ */
diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h
index fd80328523b4..2492fa5478e7 100644
--- a/arch/tile/include/asm/pgtable_64.h
+++ b/arch/tile/include/asm/pgtable_64.h
@@ -21,17 +21,19 @@
 #define PGDIR_SIZE      HV_L1_SPAN
 #define PGDIR_MASK      (~(PGDIR_SIZE-1))
 #define PTRS_PER_PGD    HV_L0_ENTRIES
-#define SIZEOF_PGD      (PTRS_PER_PGD * sizeof(pgd_t))
+#define PGD_INDEX(va)   HV_L0_INDEX(va)
+#define SIZEOF_PGD      HV_L0_SIZE
 /*
 * The level-1 index is defined by the huge page size.  A PMD is composed
 * of PTRS_PER_PMD pgd_t's and is the middle level of the page table.
 */
-#define PMD_SHIFT       HV_LOG2_PAGE_SIZE_LARGE
+#define PMD_SHIFT       HPAGE_SHIFT
-#define PMD_SIZE        HV_PAGE_SIZE_LARGE
+#define PMD_SIZE        HPAGE_SIZE
 #define PMD_MASK        (~(PMD_SIZE-1))
-#define PTRS_PER_PMD    (1 << (PGDIR_SHIFT - PMD_SHIFT))
+#define PTRS_PER_PMD    _HV_L1_ENTRIES(HPAGE_SHIFT)
-#define SIZEOF_PMD      (PTRS_PER_PMD * sizeof(pmd_t))
+#define PMD_INDEX(va)   _HV_L1_INDEX(va, HPAGE_SHIFT)
+#define SIZEOF_PMD      _HV_L1_SIZE(HPAGE_SHIFT)
 /*
 * The level-2 index is defined by the difference between the huge
@@ -40,17 +42,19 @@
 * Note that the hypervisor docs use PTE for what we call pte_t, so
 * this nomenclature is somewhat confusing.
 */
-#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
+#define PTRS_PER_PTE    _HV_L2_ENTRIES(HPAGE_SHIFT, PAGE_SHIFT)
-#define SIZEOF_PTE      (PTRS_PER_PTE * sizeof(pte_t))
+#define PTE_INDEX(va)   _HV_L2_INDEX(va, HPAGE_SHIFT, PAGE_SHIFT)
+#define SIZEOF_PTE      _HV_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT)
 /*
- * Align the vmalloc area to an L2 page table, and leave a guard page
+ * Align the vmalloc area to an L2 page table.  Omit guard pages at
- * at the beginning and end.  The vmalloc code also puts in an internal
+ * the beginning and end for simplicity (particularly in the per-cpu
+ * memory allocation code).  The vmalloc code puts in an internal
 * guard page between each allocation.
 */
 #define _VMALLOC_END    HUGE_VMAP_BASE
-#define VMALLOC_END     (_VMALLOC_END - PAGE_SIZE)
+#define VMALLOC_END     _VMALLOC_END
-#define VMALLOC_START   (_VMALLOC_START + PAGE_SIZE)
+#define VMALLOC_START   _VMALLOC_START
 #define HUGE_VMAP_END   (HUGE_VMAP_BASE + PGDIR_SIZE)
@@ -98,7 +102,7 @@ static inline int pud_bad(pud_t pud)
 * A pud_t points to a pmd_t array.  Since we can have multiple per
 * page, we don't have a one-to-one mapping of pud_t's to pages.
 */
-#define pud_page(pud) pfn_to_page(HV_PTFN_TO_PFN(pud_ptfn(pud)))
+#define pud_page(pud) pfn_to_page(PFN_DOWN(HV_PTFN_TO_CPA(pud_ptfn(pud))))
 static inline unsigned long pud_index(unsigned long address)
 {
@@ -108,28 +112,6 @@ static inline unsigned long pud_index(unsigned long address)
 #define pmd_offset(pud, address) \
        ((pmd_t *)pud_page_vaddr(*(pud)) + pmd_index(address))
-static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
-        set_pte(pmdp, pmdval);
-}
-/* Create a pmd from a PTFN and pgprot. */
-static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
-{
-        return hv_pte_set_ptfn(prot, ptfn);
-}
-/* Return the page-table frame number (ptfn) that a pmd_t points at. */
-static inline unsigned long pmd_ptfn(pmd_t pmd)
-{
-        return hv_pte_get_ptfn(pmd);
-}
-static inline void pmd_clear(pmd_t *pmdp)
-{
-        __pte_clear(pmdp);
-}
 /* Normalize an address to having the correct high bits set. */
 #define pgd_addr_normalize pgd_addr_normalize
 static inline unsigned long pgd_addr_normalize(unsigned long addr)
@@ -170,6 +152,13 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
        return hv_pte(__insn_exch(&ptep->val, 0UL));
 }
+/*
+ * pmds are the same as pgds and ptes, so converting is a no-op.
+ */
+#define pmd_pte(pmd) (pmd)
+#define pmdp_ptep(pmdp) (pmdp)
+#define pte_pmd(pte) (pte)
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_TILE_PGTABLE_64_H */
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 15cd8a4a06ce..8c4dd9ff91eb 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -76,6 +76,17 @@ struct async_tlb {
 #ifdef CONFIG_HARDWALL
 struct hardwall_info;
+struct hardwall_task {
+        /* Which hardwall is this task tied to? (or NULL if none) */
+        struct hardwall_info *info;
+        /* Chains this task into the list at info->task_head. */
+        struct list_head list;
+};
+#ifdef __tilepro__
+#define HARDWALL_TYPES 1   /* udn */
+#else
+#define HARDWALL_TYPES 3   /* udn, idn, and ipi */
+#endif
 #endif
 struct thread_struct {
@@ -116,10 +127,8 @@ struct thread_struct {
        unsigned long dstream_pf;
 #endif
 #ifdef CONFIG_HARDWALL
-        /* Is this task tied to an activated hardwall? */
+        /* Hardwall information for various resources. */
-        struct hardwall_info *hardwall;
+        struct hardwall_task hardwall[HARDWALL_TYPES];
-        /* Chains this task into the list at hardwall->list. */
-        struct list_head hardwall_list;
 #endif
 #if CHIP_HAS_TILE_DMA()
        /* Async DMA TLB fault information */
diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h
index e58613e0752f..c67eb70ea78e 100644
--- a/arch/tile/include/asm/setup.h
+++ b/arch/tile/include/asm/setup.h
@@ -41,15 +41,15 @@ void restrict_dma_mpls(void);
 #ifdef CONFIG_HARDWALL
 /* User-level network management functions */
 void reset_network_state(void);
-void grant_network_mpls(void);
-void restrict_network_mpls(void);
 struct task_struct;
-int hardwall_deactivate(struct task_struct *task);
+void hardwall_switch_tasks(struct task_struct *prev, struct task_struct *next);
+void hardwall_deactivate_all(struct task_struct *task);
+int hardwall_ipi_valid(int cpu);
 /* Hook hardwall code into changes in affinity. */
 #define arch_set_cpus_allowed(p, new_mask) do { \
-        if (p->thread.hardwall && !cpumask_equal(&p->cpus_allowed, new_mask)) \
+        if (!cpumask_equal(&p->cpus_allowed, new_mask)) \
-                hardwall_deactivate(p); \
+                hardwall_deactivate_all(p); \
 } while (0)
 #endif
diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h
index 3b5507c31eae..06f0464cfed9 100644
--- a/arch/tile/include/asm/syscalls.h
+++ b/arch/tile/include/asm/syscalls.h
@@ -43,7 +43,8 @@ long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi,
                     u32 len, int advice);
 int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi,
                       u32 len_lo, u32 len_hi, int advice);
-long sys_flush_cache(void);
+long sys_cacheflush(unsigned long addr, unsigned long len,
+                    unsigned long flags);
 #ifndef __tilegx__  /* No mmap() in the 32-bit kernel. */
 #define sys_mmap sys_mmap
 #endif
diff --git a/arch/tile/include/asm/tlbflush.h b/arch/tile/include/asm/tlbflush.h
index 96199d214fb8..dcf91b25a1e5 100644
--- a/arch/tile/include/asm/tlbflush.h
+++ b/arch/tile/include/asm/tlbflush.h
@@ -38,16 +38,11 @@ DECLARE_PER_CPU(int, current_asid);
 /* The hypervisor tells us what ASIDs are available to us. */
 extern int min_asid, max_asid;
-static inline unsigned long hv_page_size(const struct vm_area_struct *vma)
-{
-        return (vma->vm_flags & VM_HUGETLB) ? HPAGE_SIZE : PAGE_SIZE;
-}
 /* Pass as vma pointer for non-executable mapping, if no vma available. */
-#define FLUSH_NONEXEC ((const struct vm_area_struct *)-1UL)
+#define FLUSH_NONEXEC ((struct vm_area_struct *)-1UL)
 /* Flush a single user page on this cpu. */
-static inline void local_flush_tlb_page(const struct vm_area_struct *vma,
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
                                        unsigned long addr,
                                        unsigned long page_size)
 {
@@ -60,7 +55,7 @@ static inline void local_flush_tlb_page(const struct vm_area_struct *vma,
 }
 /* Flush range of user pages on this cpu. */
-static inline void local_flush_tlb_pages(const struct vm_area_struct *vma,
+static inline void local_flush_tlb_pages(struct vm_area_struct *vma,
                                         unsigned long addr,
                                         unsigned long page_size,
                                         unsigned long len)
@@ -117,10 +112,10 @@ extern void flush_tlb_all(void);
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 extern void flush_tlb_current_task(void);
 extern void flush_tlb_mm(struct mm_struct *);
-extern void flush_tlb_page(const struct vm_area_struct *, unsigned long);
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
-extern void flush_tlb_page_mm(const struct vm_area_struct *,
+extern void flush_tlb_page_mm(struct vm_area_struct *,
                              struct mm_struct *, unsigned long);
-extern void flush_tlb_range(const struct vm_area_struct *,
+extern void flush_tlb_range(struct vm_area_struct *,
                            unsigned long start, unsigned long end);
 #define flush_tlb()     flush_tlb_current_task()
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index ef34d2caa5b1..c3dd275f25e2 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -114,45 +114,75 @@ struct exception_table_entry {
 extern int fixup_exception(struct pt_regs *regs);
 /*
- * We return the __get_user_N function results in a structure,
+ * Support macros for __get_user().
- * thus in r0 and r1.  If "err" is zero, "val" is the result
+ *
- * of the read; otherwise, "err" is -EFAULT.
+ * Implementation note: The "case 8" logic of casting to the type of
- *
+ * the result of subtracting the value from itself is basically a way
- * We rarely need 8-byte values on a 32-bit architecture, but
+ * of keeping all integer types the same, but casting any pointers to
- * we size the structure to accommodate.  In practice, for the
+ * ptrdiff_t, i.e. also an integer type.  This way there are no
- * the smaller reads, we can zero the high word for free, and
+ * questionable casts seen by the compiler on an ILP32 platform.
- * the caller will ignore it by virtue of casting anyway.
+ *
+ * Note that __get_user() and __put_user() assume proper alignment.
 */
-struct __get_user {
-        unsigned long long val;
-        int err;
-};
-/*
+#ifdef __LP64__
- * FIXME: we should express these as inline extended assembler, since
+#define _ASM_PTR        ".quad"
- * they're fundamentally just a variable dereference and some
+#else
- * supporting exception_table gunk.  Note that (a la i386) we can
+#define _ASM_PTR        ".long"
- * extend the copy_to_user and copy_from_user routines to call into
+#endif
- * such extended assembler routines, though we will have to use a
- * different return code in that case (1, 2, or 4, rather than -EFAULT).
+#define __get_user_asm(OP, x, ptr, ret)                                 \
- */
+        asm volatile("1: {" #OP " %1, %2; movei %0, 0 }\n"              \
-extern struct __get_user __get_user_1(const void __user *);
+                     ".pushsection .fixup,\"ax\"\n"                     \
-extern struct __get_user __get_user_2(const void __user *);
+                     "0: { movei %1, 0; movei %0, %3 }\n"               \
-extern struct __get_user __get_user_4(const void __user *);
+                     "j 9f\n"                                           \
-extern struct __get_user __get_user_8(const void __user *);
+                     ".section __ex_table,\"a\"\n"                      \
-extern int __put_user_1(long, void __user *);
+                     _ASM_PTR " 1b, 0b\n"                               \
-extern int __put_user_2(long, void __user *);
+                     ".popsection\n"                                    \
-extern int __put_user_4(long, void __user *);
+                     "9:"                                               \
-extern int __put_user_8(long long, void __user *);
+                     : "=r" (ret), "=r" (x)                             \
+                     : "r" (ptr), "i" (-EFAULT))
-/* Unimplemented routines to cause linker failures */
-extern struct __get_user __get_user_bad(void);
+#ifdef __tilegx__
-extern int __put_user_bad(void);
+#define __get_user_1(x, ptr, ret) __get_user_asm(ld1u, x, ptr, ret)
+#define __get_user_2(x, ptr, ret) __get_user_asm(ld2u, x, ptr, ret)
+#define __get_user_4(x, ptr, ret) __get_user_asm(ld4u, x, ptr, ret)
+#define __get_user_8(x, ptr, ret) __get_user_asm(ld, x, ptr, ret)
+#else
+#define __get_user_1(x, ptr, ret) __get_user_asm(lb_u, x, ptr, ret)
+#define __get_user_2(x, ptr, ret) __get_user_asm(lh_u, x, ptr, ret)
+#define __get_user_4(x, ptr, ret) __get_user_asm(lw, x, ptr, ret)
+#ifdef __LITTLE_ENDIAN
+#define __lo32(a, b) a
+#define __hi32(a, b) b
+#else
+#define __lo32(a, b) b
+#define __hi32(a, b) a
+#endif
+#define __get_user_8(x, ptr, ret)                                       \
+        ({                                                              \
+                unsigned int __a, __b;                                  \
+                asm volatile("1: { lw %1, %3; addi %2, %3, 4 }\n"       \
+                             "2: { lw %2, %2; movei %0, 0 }\n"          \
+                             ".pushsection .fixup,\"ax\"\n"             \
+                             "0: { movei %1, 0; movei %2, 0 }\n"        \
+                             "{ movei %0, %4; j 9f }\n"                 \
+                             ".section __ex_table,\"a\"\n"              \
+                             ".word 1b, 0b\n"                           \
+                             ".word 2b, 0b\n"                           \
+                             ".popsection\n"                            \
+                             "9:"                                       \
+                             : "=r" (ret), "=r" (__a), "=&r" (__b)      \
+                             : "r" (ptr), "i" (-EFAULT));               \
+                (x) = (__typeof(x))(__typeof((x)-(x)))                  \
+                        (((u64)__hi32(__a, __b) << 32) |                \
+                         __lo32(__a, __b));                             \
+        })
+#endif
+extern int __get_user_bad(void)
+  __attribute__((warning("sizeof __get_user argument not 1, 2, 4 or 8")));
-/*
- * Careful: we have to cast the result to the type of the pointer
- * for sign reasons.
- */
 /**
 * __get_user: - Get a simple variable from user space, with less checking.
 * @x:   Variable to store result.
@@ -174,30 +204,62 @@ extern int __put_user_bad(void);
 * function.
 */
 #define __get_user(x, ptr)                                              \
-({      struct __get_user __ret;                                        \
+        ({                                                              \
-        __typeof__(*(ptr)) const __user *__gu_addr = (ptr);             \
+                int __ret;                                              \
-        __chk_user_ptr(__gu_addr);                                      \
+                __chk_user_ptr(ptr);                                    \
-        switch (sizeof(*(__gu_addr))) {                                 \
+                switch (sizeof(*(ptr))) {                               \
-        case 1:                                                         \
+                case 1: __get_user_1(x, ptr, __ret); break;             \
-                __ret = __get_user_1(__gu_addr);                        \
+                case 2: __get_user_2(x, ptr, __ret); break;             \
-                break;                                                  \
+                case 4: __get_user_4(x, ptr, __ret); break;             \
-        case 2:                                                         \
+                case 8: __get_user_8(x, ptr, __ret); break;             \
-                __ret = __get_user_2(__gu_addr);                        \
+                default: __ret = __get_user_bad(); break;               \
-                break;                                                  \
+                }                                                       \
-        case 4:                                                         \
+                __ret;                                                  \
-                __ret = __get_user_4(__gu_addr);                        \
+        })
-                break;                                                  \
-        case 8:                                                         \
+/* Support macros for __put_user(). */
-                __ret = __get_user_8(__gu_addr);                        \
-                break;                                                  \
+#define __put_user_asm(OP, x, ptr, ret)                 \
-        default:                                                        \
+        asm volatile("1: {" #OP " %1, %2; movei %0, 0 }\n"              \
-                __ret = __get_user_bad();                               \
+                     ".pushsection .fixup,\"ax\"\n"                     \
-                break;                                                  \
+                     "0: { movei %0, %3; j 9f }\n"                      \
-        }                                                               \
+                     ".section __ex_table,\"a\"\n"                      \
-        (x) = (__typeof__(*__gu_addr)) (__typeof__(*__gu_addr - *__gu_addr)) \
+                     _ASM_PTR " 1b, 0b\n"                               \
-          __ret.val;                                                    \
+                     ".popsection\n"                                    \
-        __ret.err;                                                      \
+                     "9:"                                               \
-})
+                     : "=r" (ret)                                       \
+                     : "r" (ptr), "r" (x), "i" (-EFAULT))
+#ifdef __tilegx__
+#define __put_user_1(x, ptr, ret) __put_user_asm(st1, x, ptr, ret)
+#define __put_user_2(x, ptr, ret) __put_user_asm(st2, x, ptr, ret)
+#define __put_user_4(x, ptr, ret) __put_user_asm(st4, x, ptr, ret)
+#define __put_user_8(x, ptr, ret) __put_user_asm(st, x, ptr, ret)
+#else
+#define __put_user_1(x, ptr, ret) __put_user_asm(sb, x, ptr, ret)
+#define __put_user_2(x, ptr, ret) __put_user_asm(sh, x, ptr, ret)
+#define __put_user_4(x, ptr, ret) __put_user_asm(sw, x, ptr, ret)
+#define __put_user_8(x, ptr, ret)                                       \
+        ({                                                              \
+                u64 __x = (__typeof((x)-(x)))(x);                       \
+                int __lo = (int) __x, __hi = (int) (__x >> 32);         \
+                asm volatile("1: { sw %1, %2; addi %0, %1, 4 }\n"       \
+                             "2: { sw %0, %3; movei %0, 0 }\n"          \
+                             ".pushsection .fixup,\"ax\"\n"             \
+                             "0: { movei %0, %4; j 9f }\n"              \
+                             ".section __ex_table,\"a\"\n"              \
+                             ".word 1b, 0b\n"                           \
+                             ".word 2b, 0b\n"                           \
+                             ".popsection\n"                            \
+                             "9:"                                       \
+                             : "=&r" (ret)                              \
+                             : "r" (ptr), "r" (__lo32(__lo, __hi)),     \
+                             "r" (__hi32(__lo, __hi)), "i" (-EFAULT));  \
+        })
+#endif
+extern int __put_user_bad(void)
+  __attribute__((warning("sizeof __put_user argument not 1, 2, 4 or 8")));
 /**
 * __put_user: - Write a simple value into user space, with less checking.
@@ -217,39 +279,19 @@ extern int __put_user_bad(void);
 * function.
 *
 * Returns zero on success, or -EFAULT on error.
- *
- * Implementation note: The "case 8" logic of casting to the type of
- * the result of subtracting the value from itself is basically a way
- * of keeping all integer types the same, but casting any pointers to
- * ptrdiff_t, i.e. also an integer type.  This way there are no
- * questionable casts seen by the compiler on an ILP32 platform.
 */
 #define __put_user(x, ptr)                                              \
 ({                                                                      \
-        int __pu_err = 0;                                               \
+        int __ret;                                                      \
-        __typeof__(*(ptr)) __user *__pu_addr = (ptr);                   \
+        __chk_user_ptr(ptr);                                            \
-        typeof(*__pu_addr) __pu_val = (x);                              \
+        switch (sizeof(*(ptr))) {                                       \
-        __chk_user_ptr(__pu_addr);                                      \
+        case 1: __put_user_1(x, ptr, __ret); break;                     \
-        switch (sizeof(__pu_val)) {                                     \
+        case 2: __put_user_2(x, ptr, __ret); break;                     \
-        case 1:                                                         \
+        case 4: __put_user_4(x, ptr, __ret); break;                     \
-                __pu_err = __put_user_1((long)__pu_val, __pu_addr);     \
+        case 8: __put_user_8(x, ptr, __ret); break;                     \
-                break;                                                  \
+        default: __ret = __put_user_bad(); break;                       \
-        case 2:                                                         \
-                __pu_err = __put_user_2((long)__pu_val, __pu_addr);     \
-                break;                                                  \
-        case 4:                                                         \
-                __pu_err = __put_user_4((long)__pu_val, __pu_addr);     \
-                break;                                                  \
-        case 8:                                                         \
-                __pu_err =                                              \
-                  __put_user_8((__typeof__(__pu_val - __pu_val))__pu_val,\
-                        __pu_addr);                                     \
-                break;                                                  \
-        default:                                                        \
-                __pu_err = __put_user_bad();                            \
-                break;                                                  \
        }                                                               \
-        __pu_err;                                                       \
+        __ret;                                                          \
 })
 /*
@@ -378,7 +420,7 @@ static inline unsigned long __must_check copy_from_user(void *to,
 /**
 * __copy_in_user() - copy data within user space, with less checking.
 * @to:   Destination address, in user space.
- * @from: Source address, in kernel space.
+ * @from: Source address, in user space.
 * @n:    Number of bytes to copy.
 *
 * Context: User context only.  This function may sleep.
diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
index f70bf1c541f1..a017246ca0ce 100644
--- a/arch/tile/include/asm/unistd.h
+++ b/arch/tile/include/asm/unistd.h
@@ -24,8 +24,8 @@
 #include <asm-generic/unistd.h>
 /* Additional Tilera-specific syscalls. */
-#define __NR_flush_cache        (__NR_arch_specific_syscall + 1)
+#define __NR_cacheflush (__NR_arch_specific_syscall + 1)
-__SYSCALL(__NR_flush_cache, sys_flush_cache)
+__SYSCALL(__NR_cacheflush, sys_cacheflush)
 #ifndef __tilegx__
 /* "Fast" syscalls provide atomic support for 32-bit chips. */
diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h
index f13188ac281a..2a20b266d944 100644
--- a/arch/tile/include/hv/drv_xgbe_intf.h
+++ b/arch/tile/include/hv/drv_xgbe_intf.h
@@ -460,7 +460,7 @@ typedef void* lepp_comp_t;
 *  linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for
 *  our page size of exactly 65536.  We add one for a "body" fragment.
 */
-#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1)
+#define LEPP_MAX_FRAGS (65536 / HV_DEFAULT_PAGE_SIZE_SMALL + 2 + 1)
 /** Total number of bytes needed for an lepp_tso_cmd_t. */
 #define LEPP_TSO_CMD_SIZE(num_frags, header_size) \
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index 72ec1e972f15..ccd847e2347f 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -17,8 +17,8 @@
 * The hypervisor's public API.
 */
-#ifndef _TILE_HV_H
+#ifndef _HV_HV_H
-#define _TILE_HV_H
+#define _HV_HV_H
 #include <arch/chip.h>
@@ -42,25 +42,45 @@
 */
 #define HV_L1_SPAN (__HV_SIZE_ONE << HV_LOG2_L1_SPAN)
-/** The log2 of the size of small pages, in bytes. This value should
+/** The log2 of the initial size of small pages, in bytes.
- * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL).
+ * See HV_DEFAULT_PAGE_SIZE_SMALL.
 */
-#define HV_LOG2_PAGE_SIZE_SMALL 16
+#define HV_LOG2_DEFAULT_PAGE_SIZE_SMALL 16
-/** The size of small pages, in bytes. This value should be verified
+/** The initial size of small pages, in bytes. This value should be verified
 * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_SMALL).
+ * It may also be modified when installing a new context.
 */
-#define HV_PAGE_SIZE_SMALL (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_SMALL)
+#define HV_DEFAULT_PAGE_SIZE_SMALL \
+  (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_SMALL)
-/** The log2 of the size of large pages, in bytes. This value should be
+/** The log2 of the initial size of large pages, in bytes.
- * verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE).
+ * See HV_DEFAULT_PAGE_SIZE_LARGE.
 */
-#define HV_LOG2_PAGE_SIZE_LARGE 24
+#define HV_LOG2_DEFAULT_PAGE_SIZE_LARGE 24
-/** The size of large pages, in bytes. This value should be verified
+/** The initial size of large pages, in bytes. This value should be verified
 * at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_LARGE).
+ * It may also be modified when installing a new context.
 */
-#define HV_PAGE_SIZE_LARGE (__HV_SIZE_ONE << HV_LOG2_PAGE_SIZE_LARGE)
+#define HV_DEFAULT_PAGE_SIZE_LARGE \
+  (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_LARGE)
+#if CHIP_VA_WIDTH() > 32
+/** The log2 of the initial size of jumbo pages, in bytes.
+ * See HV_DEFAULT_PAGE_SIZE_JUMBO.
+ */
+#define HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO 32
+/** The initial size of jumbo pages, in bytes. This value should
+ * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO).
+ * It may also be modified when installing a new context.
+ */
+#define HV_DEFAULT_PAGE_SIZE_JUMBO \
+  (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO)
+#endif
 /** The log2 of the granularity at which page tables must be aligned;
 *  in other words, the CPA for a page table must have this many zero
@@ -280,8 +300,11 @@
 #define HV_DISPATCH_GET_IPI_PTE                   56
 #endif
+/** hv_set_pte_super_shift */
+#define HV_DISPATCH_SET_PTE_SUPER_SHIFT           57
 /** One more than the largest dispatch value */
-#define _HV_DISPATCH_END                          57
+#define _HV_DISPATCH_END                          58
 #ifndef __ASSEMBLER__
@@ -401,7 +424,18 @@ typedef enum {
   *  that the temperature has hit an upper limit and is no longer being
   *  accurately tracked.
   */
-  HV_SYSCONF_BOARD_TEMP      = 6
+  HV_SYSCONF_BOARD_TEMP      = 6,
+  /** Legal page size bitmask for hv_install_context().
+   * For example, if 16KB and 64KB small pages are supported,
+   * it would return "HV_CTX_PG_SM_16K | HV_CTX_PG_SM_64K".
+   */
+  HV_SYSCONF_VALID_PAGE_SIZES = 7,
+  /** The size of jumbo pages, in bytes.
+   * If no jumbo pages are available, zero will be returned.
+   */
+  HV_SYSCONF_PAGE_SIZE_JUMBO = 8,
 } HV_SysconfQuery;
@@ -474,7 +508,19 @@ typedef enum {
  HV_CONFSTR_SWITCH_CONTROL  = 14,
  /** Chip revision level. */
-  HV_CONFSTR_CHIP_REV        = 15
+  HV_CONFSTR_CHIP_REV        = 15,
+  /** CPU module part number. */
+  HV_CONFSTR_CPUMOD_PART_NUM = 16,
+  /** CPU module serial number. */
+  HV_CONFSTR_CPUMOD_SERIAL_NUM = 17,
+  /** CPU module revision level. */
+  HV_CONFSTR_CPUMOD_REV      = 18,
+  /** Human-readable CPU module description. */
+  HV_CONFSTR_CPUMOD_DESC     = 19
 } HV_ConfstrQuery;
@@ -494,11 +540,16 @@ int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len);
 /** Tile coordinate */
 typedef struct
 {
+#ifndef __BIG_ENDIAN__
  /** X coordinate, relative to supervisor's top-left coordinate */
  int x;
  /** Y coordinate, relative to supervisor's top-left coordinate */
  int y;
+#else
+  int y;
+  int x;
+#endif
 } HV_Coord;
@@ -649,6 +700,12 @@ void hv_set_rtc(HV_RTCTime time);
 *  new page table does not need to contain any mapping for the
 *  hv_install_context address itself.
 *
+ *  At most one HV_CTX_PG_SM_* flag may be specified in "flags";
+ *  if multiple flags are specified, HV_EINVAL is returned.
+ *  Specifying none of the flags results in using the default page size.
+ *  All cores participating in a given client must request the same
+ *  page size, or the results are undefined.
+ *
 * @param page_table Root of the page table.
 * @param access PTE providing info on how to read the page table.  This
 *   value must be consistent between multiple tiles sharing a page table,
@@ -667,8 +724,36 @@ int hv_install_context(HV_PhysAddr page_table, HV_PTE access, HV_ASID asid,
 #define HV_CTX_DIRECTIO     0x1   /**< Direct I/O requests are accepted from
                                       PL0. */
+#define HV_CTX_PG_SM_4K     0x10  /**< Use 4K small pages, if available. */
+#define HV_CTX_PG_SM_16K    0x20  /**< Use 16K small pages, if available. */
+#define HV_CTX_PG_SM_64K    0x40  /**< Use 64K small pages, if available. */
+#define HV_CTX_PG_SM_MASK   0xf0  /**< Mask of all possible small pages. */
 #ifndef __ASSEMBLER__
+/** Set the number of pages ganged together by HV_PTE_SUPER at a
+ * particular level of the page table.
+ *
+ * The current TILE-Gx hardware only supports powers of four
+ * (i.e. log2_count must be a multiple of two), and the requested
+ * "super" page size must be less than the span of the next level in
+ * the page table.  The largest size that can be requested is 64GB.
+ *
+ * The shift value is initially "0" for all page table levels,
+ * indicating that the HV_PTE_SUPER bit is effectively ignored.
+ *
+ * If you change the count from one non-zero value to another, the
+ * hypervisor will flush the entire TLB and TSB to avoid confusion.
+ *
+ * @param level Page table level (0, 1, or 2)
+ * @param log2_count Base-2 log of the number of pages to gang together,
+ * i.e. how much to shift left the base page size for the super page size.
+ * @return Zero on success, or a hypervisor error code on failure.
+ */
+int hv_set_pte_super_shift(int level, int log2_count);
 /** Value returned from hv_inquire_context(). */
 typedef struct
 {
@@ -986,8 +1071,13 @@ HV_VirtAddrRange hv_inquire_virtual(int idx);
 /** A range of ASID values. */
 typedef struct
 {
+#ifndef __BIG_ENDIAN__
  HV_ASID start;        /**< First ASID in the range. */
  unsigned int size;    /**< Number of ASIDs. Zero for an invalid range. */
+#else
+  unsigned int size;    /**< Number of ASIDs. Zero for an invalid range. */
+  HV_ASID start;        /**< First ASID in the range. */
+#endif
 } HV_ASIDRange;
 /** Returns information about a range of ASIDs.
@@ -1238,11 +1328,14 @@ HV_Errno hv_set_command_line(HV_VirtAddr buf, int length);
 * with the existing priority pages) or "red/black" (if they don't).
 * The bitmask provides information on which parts of the cache
 * have been used for pinned pages so far on this tile; if (1 << N)
- * appears in the bitmask, that indicates that a page has been marked
+ * appears in the bitmask, that indicates that a 4KB region of the
- * "priority" whose PFN equals N, mod 8.
+ * cache starting at (N * 4KB) is in use by a "priority" page.
+ * The portion of cache used by a particular page can be computed
+ * by taking the page's PA, modulo CHIP_L2_CACHE_SIZE(), and setting
+ * all the "4KB" bits corresponding to the actual page size.
 * @param bitmask A bitmap of priority page set values
 */
-void hv_set_caching(unsigned int bitmask);
+void hv_set_caching(unsigned long bitmask);
 /** Zero out a specified number of pages.
@@ -1308,6 +1401,7 @@ typedef enum
 /** Message recipient. */
 typedef struct
 {
+#ifndef __BIG_ENDIAN__
  /** X coordinate, relative to supervisor's top-left coordinate */
  unsigned int x:11;
@@ -1316,6 +1410,11 @@ typedef struct
  /** Status of this recipient */
  HV_Recip_State state:10;
+#else //__BIG_ENDIAN__
+  HV_Recip_State state:10;
+  unsigned int y:11;
+  unsigned int x:11;
+#endif
 } HV_Recipient;
 /** Send a message to a set of recipients.
@@ -1851,12 +1950,12 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
 #define HV_PTE_INDEX_USER            10  /**< Page is user-accessible */
 #define HV_PTE_INDEX_ACCESSED        11  /**< Page has been accessed */
 #define HV_PTE_INDEX_DIRTY           12  /**< Page has been written */
-                                         /*   Bits 13-15 are reserved for
+                                         /*   Bits 13-14 are reserved for
                                              future use. */
+#define HV_PTE_INDEX_SUPER           15  /**< Pages ganged together for TLB */
 #define HV_PTE_INDEX_MODE            16  /**< Page mode; see HV_PTE_MODE_xxx */
 #define HV_PTE_MODE_BITS              3  /**< Number of bits in mode */
-                                         /*   Bit 19 is reserved for
+#define HV_PTE_INDEX_CLIENT2         19  /**< Page client state 2 */
-                                              future use. */
 #define HV_PTE_INDEX_LOTAR           20  /**< Page's LOTAR; must be high bits
                                              of word */
 #define HV_PTE_LOTAR_BITS            12  /**< Number of bits in a LOTAR */
@@ -1869,15 +1968,6 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
                                              of word */
 #define HV_PTE_PTFN_BITS             29  /**< Number of bits in a PTFN */
-/** Position of the PFN field within the PTE (subset of the PTFN). */
-#define HV_PTE_INDEX_PFN (HV_PTE_INDEX_PTFN + (HV_LOG2_PAGE_SIZE_SMALL - \
-                                               HV_LOG2_PAGE_TABLE_ALIGN))
-/** Length of the PFN field within the PTE (subset of the PTFN). */
-#define HV_PTE_INDEX_PFN_BITS (HV_PTE_INDEX_PTFN_BITS - \
-                               (HV_LOG2_PAGE_SIZE_SMALL - \
-                                HV_LOG2_PAGE_TABLE_ALIGN))
 /*
 * Legal values for the PTE's mode field
 */
@@ -1957,7 +2047,10 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
 /** Does this PTE map a page?
 *
- * If this bit is set in the level-1 page table, the entry should be
+ * If this bit is set in a level-0 page table, the entry should be
+ * interpreted as a level-2 page table entry mapping a jumbo page.
+ *
+ * If this bit is set in a level-1 page table, the entry should be
 * interpreted as a level-2 page table entry mapping a large page.
 *
 * This bit should not be modified by the client while PRESENT is set, as
@@ -1967,6 +2060,18 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
 */
 #define HV_PTE_PAGE                  (__HV_PTE_ONE << HV_PTE_INDEX_PAGE)
+/** Does this PTE implicitly reference multiple pages?
+ *
+ * If this bit is set in the page table (either in the level-2 page table,
+ * or in a higher level page table in conjunction with the PAGE bit)
+ * then the PTE specifies a range of contiguous pages, not a single page.
+ * The hv_set_pte_super_shift() allows you to specify the count for
+ * each level of the page table.
+ *
+ * Note: this bit is not supported on TILEPro systems.
+ */
+#define HV_PTE_SUPER                 (__HV_PTE_ONE << HV_PTE_INDEX_SUPER)
 /** Is this a global (non-ASID) mapping?
 *
 * If this bit is set, the translations established by this PTE will
@@ -2046,6 +2151,13 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
 */
 #define HV_PTE_CLIENT1               (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT1)
+/** Client-private bit in PTE.
+ *
+ * This bit is guaranteed not to be inspected or modified by the
+ * hypervisor.
+ */
+#define HV_PTE_CLIENT2               (__HV_PTE_ONE << HV_PTE_INDEX_CLIENT2)
 /** Non-coherent (NC) bit in PTE.
 *
 * If this bit is set, the mapping that is set up will be non-coherent
@@ -2178,8 +2290,10 @@ hv_pte_clear_##name(HV_PTE pte)                                 \
 */
 _HV_BIT(present,         PRESENT)
 _HV_BIT(page,            PAGE)
+_HV_BIT(super,           SUPER)
 _HV_BIT(client0,         CLIENT0)
 _HV_BIT(client1,         CLIENT1)
+_HV_BIT(client2,         CLIENT2)
 _HV_BIT(migrating,       MIGRATING)
 _HV_BIT(nc,              NC)
 _HV_BIT(readable,        READABLE)
@@ -2222,40 +2336,11 @@ hv_pte_set_mode(HV_PTE pte, unsigned int val)
 *
 * This field contains the upper bits of the CPA (client physical
 * address) of the target page; the complete CPA is this field with
- * HV_LOG2_PAGE_SIZE_SMALL zero bits appended to it.
+ * HV_LOG2_PAGE_TABLE_ALIGN zero bits appended to it.
- *
- * For PTEs in a level-1 page table where the Page bit is set, the
- * CPA must be aligned modulo the large page size.
- */
-static __inline unsigned int
-hv_pte_get_pfn(const HV_PTE pte)
-{
-  return pte.val >> HV_PTE_INDEX_PFN;
-}
-/** Set the page frame number into a PTE.  See hv_pte_get_pfn. */
-static __inline HV_PTE
-hv_pte_set_pfn(HV_PTE pte, unsigned int val)
-{
-  /*
-   * Note that the use of "PTFN" in the next line is intentional; we
-   * don't want any garbage lower bits left in that field.
-   */
-  pte.val &= ~(((1ULL << HV_PTE_PTFN_BITS) - 1) << HV_PTE_INDEX_PTFN);
-  pte.val |= (__hv64) val << HV_PTE_INDEX_PFN;
-  return pte;
-}
-/** Get the page table frame number from the PTE.
- *
- * This field contains the upper bits of the CPA (client physical
- * address) of the target page table; the complete CPA is this field with
- * with HV_PAGE_TABLE_ALIGN zero bits appended to it.
 *
- * For PTEs in a level-1 page table when the Page bit is not set, the
+ * For all PTEs in the lowest-level page table, and for all PTEs with
- * CPA must be aligned modulo the sticter of HV_PAGE_TABLE_ALIGN and
+ * the Page bit set in all page tables, the CPA must be aligned modulo
- * the level-2 page table size.
+ * the relevant page size.
 */
 static __inline unsigned long
 hv_pte_get_ptfn(const HV_PTE pte)
@@ -2263,7 +2348,6 @@ hv_pte_get_ptfn(const HV_PTE pte)
  return pte.val >> HV_PTE_INDEX_PTFN;
 }
 /** Set the page table frame number into a PTE.  See hv_pte_get_ptfn. */
 static __inline HV_PTE
 hv_pte_set_ptfn(HV_PTE pte, unsigned long val)
@@ -2273,6 +2357,20 @@ hv_pte_set_ptfn(HV_PTE pte, unsigned long val)
  return pte;
 }
+/** Get the client physical address from the PTE.  See hv_pte_set_ptfn. */
+static __inline HV_PhysAddr
+hv_pte_get_pa(const HV_PTE pte)
+{
+  return (__hv64) hv_pte_get_ptfn(pte) << HV_LOG2_PAGE_TABLE_ALIGN;
+}
+/** Set the client physical address into a PTE.  See hv_pte_get_ptfn. */
+static __inline HV_PTE
+hv_pte_set_pa(HV_PTE pte, HV_PhysAddr pa)
+{
+  return hv_pte_set_ptfn(pte, pa >> HV_LOG2_PAGE_TABLE_ALIGN);
+}
 /** Get the remote tile caching this page.
 *
@@ -2308,28 +2406,20 @@ hv_pte_set_lotar(HV_PTE pte, unsigned int val)
 #endif  /* !__ASSEMBLER__ */
-/** Converts a client physical address to a pfn. */
-#define HV_CPA_TO_PFN(p) ((p) >> HV_LOG2_PAGE_SIZE_SMALL)
-/** Converts a pfn to a client physical address. */
-#define HV_PFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_SIZE_SMALL)
 /** Converts a client physical address to a ptfn. */
 #define HV_CPA_TO_PTFN(p) ((p) >> HV_LOG2_PAGE_TABLE_ALIGN)
 /** Converts a ptfn to a client physical address. */
 #define HV_PTFN_TO_CPA(p) (((HV_PhysAddr)(p)) << HV_LOG2_PAGE_TABLE_ALIGN)
-/** Converts a ptfn to a pfn. */
-#define HV_PTFN_TO_PFN(p) \
-  ((p) >> (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN))
-/** Converts a pfn to a ptfn. */
-#define HV_PFN_TO_PTFN(p) \
-  ((p) << (HV_LOG2_PAGE_SIZE_SMALL - HV_LOG2_PAGE_TABLE_ALIGN))
 #if CHIP_VA_WIDTH() > 32
+/*
+ * Note that we currently do not allow customizing the page size
+ * of the L0 pages, but fix them at 4GB, so we do not use the
+ * "_HV_xxx" nomenclature for the L0 macros.
+ */
 /** Log number of HV_PTE entries in L0 page table */
 #define HV_LOG2_L0_ENTRIES (CHIP_VA_WIDTH() - HV_LOG2_L1_SPAN)
@@ -2359,69 +2449,104 @@ hv_pte_set_lotar(HV_PTE pte, unsigned int val)
 #endif /* CHIP_VA_WIDTH() > 32 */
 /** Log number of HV_PTE entries in L1 page table */
-#define HV_LOG2_L1_ENTRIES (HV_LOG2_L1_SPAN - HV_LOG2_PAGE_SIZE_LARGE)
+#define _HV_LOG2_L1_ENTRIES(log2_page_size_large) \
+  (HV_LOG2_L1_SPAN - log2_page_size_large)
 /** Number of HV_PTE entries in L1 page table */
-#define HV_L1_ENTRIES (1 << HV_LOG2_L1_ENTRIES)
+#define _HV_L1_ENTRIES(log2_page_size_large) \
+  (1 << _HV_LOG2_L1_ENTRIES(log2_page_size_large))
 /** Log size of L1 page table in bytes */
-#define HV_LOG2_L1_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L1_ENTRIES)
+#define _HV_LOG2_L1_SIZE(log2_page_size_large) \
+  (HV_LOG2_PTE_SIZE + _HV_LOG2_L1_ENTRIES(log2_page_size_large))
 /** Size of L1 page table in bytes */
-#define HV_L1_SIZE (1 << HV_LOG2_L1_SIZE)
+#define _HV_L1_SIZE(log2_page_size_large) \
+  (1 << _HV_LOG2_L1_SIZE(log2_page_size_large))
 /** Log number of HV_PTE entries in level-2 page table */
-#define HV_LOG2_L2_ENTRIES (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)
+#define _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small) \
+  (log2_page_size_large - log2_page_size_small)
 /** Number of HV_PTE entries in level-2 page table */
-#define HV_L2_ENTRIES (1 << HV_LOG2_L2_ENTRIES)
+#define _HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) \
+  (1 << _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small))
 /** Log size of level-2 page table in bytes */
-#define HV_LOG2_L2_SIZE (HV_LOG2_PTE_SIZE + HV_LOG2_L2_ENTRIES)
+#define _HV_LOG2_L2_SIZE(log2_page_size_large, log2_page_size_small) \
+  (HV_LOG2_PTE_SIZE + \
+   _HV_LOG2_L2_ENTRIES(log2_page_size_large, log2_page_size_small))
 /** Size of level-2 page table in bytes */
-#define HV_L2_SIZE (1 << HV_LOG2_L2_SIZE)
+#define _HV_L2_SIZE(log2_page_size_large, log2_page_size_small) \
+  (1 << _HV_LOG2_L2_SIZE(log2_page_size_large, log2_page_size_small))
 #ifdef __ASSEMBLER__
 #if CHIP_VA_WIDTH() > 32
 /** Index in L1 for a specific VA */
-#define HV_L1_INDEX(va) \
+#define _HV_L1_INDEX(va, log2_page_size_large) \
-  (((va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1))
+  (((va) >> log2_page_size_large) & (_HV_L1_ENTRIES(log2_page_size_large) - 1))
 #else /* CHIP_VA_WIDTH() > 32 */
 /** Index in L1 for a specific VA */
-#define HV_L1_INDEX(va) \
+#define _HV_L1_INDEX(va, log2_page_size_large) \
-  (((va) >> HV_LOG2_PAGE_SIZE_LARGE))
+  (((va) >> log2_page_size_large))
 #endif /* CHIP_VA_WIDTH() > 32 */
 /** Index in level-2 page table for a specific VA */
-#define HV_L2_INDEX(va) \
+#define _HV_L2_INDEX(va, log2_page_size_large, log2_page_size_small) \
-  (((va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1))
+  (((va) >> log2_page_size_small) & \
+   (_HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) - 1))
 #else /* __ASSEMBLER __ */
 #if CHIP_VA_WIDTH() > 32
 /** Index in L1 for a specific VA */
-#define HV_L1_INDEX(va) \
+#define _HV_L1_INDEX(va, log2_page_size_large) \
-  (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE) & (HV_L1_ENTRIES - 1))
+  (((HV_VirtAddr)(va) >> log2_page_size_large) & \
+   (_HV_L1_ENTRIES(log2_page_size_large) - 1))
 #else /* CHIP_VA_WIDTH() > 32 */
 /** Index in L1 for a specific VA */
-#define HV_L1_INDEX(va) \
+#define _HV_L1_INDEX(va, log2_page_size_large) \
-  (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_LARGE))
+  (((HV_VirtAddr)(va) >> log2_page_size_large))
 #endif /* CHIP_VA_WIDTH() > 32 */
 /** Index in level-2 page table for a specific VA */
-#define HV_L2_INDEX(va) \
+#define _HV_L2_INDEX(va, log2_page_size_large, log2_page_size_small) \
-  (((HV_VirtAddr)(va) >> HV_LOG2_PAGE_SIZE_SMALL) & (HV_L2_ENTRIES - 1))
+  (((HV_VirtAddr)(va) >> log2_page_size_small) & \
+   (_HV_L2_ENTRIES(log2_page_size_large, log2_page_size_small) - 1))
 #endif /* __ASSEMBLER __ */
-#endif /* _TILE_HV_H */
+/** Position of the PFN field within the PTE (subset of the PTFN). */
+#define _HV_PTE_INDEX_PFN(log2_page_size) \
+  (HV_PTE_INDEX_PTFN + (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN))
+/** Length of the PFN field within the PTE (subset of the PTFN). */
+#define _HV_PTE_INDEX_PFN_BITS(log2_page_size) \
+  (HV_PTE_INDEX_PTFN_BITS - (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN))
+/** Converts a client physical address to a pfn. */
+#define _HV_CPA_TO_PFN(p, log2_page_size) ((p) >> log2_page_size)
+/** Converts a pfn to a client physical address. */
+#define _HV_PFN_TO_CPA(p, log2_page_size) \
+  (((HV_PhysAddr)(p)) << log2_page_size)
+/** Converts a ptfn to a pfn. */
+#define _HV_PTFN_TO_PFN(p, log2_page_size) \
+  ((p) >> (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN))
+/** Converts a pfn to a ptfn. */
+#define _HV_PFN_TO_PTFN(p, log2_page_size) \
+  ((p) << (log2_page_size - HV_LOG2_PAGE_TABLE_ALIGN))
+#endif /* _HV_HV_H */
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index 0d826faf8f35..5de99248d8df 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -9,10 +9,9 @@ obj-y := backtrace.o entry.o irq.o messaging.o \
        intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o
 obj-$(CONFIG_HARDWALL)          += hardwall.o
-obj-$(CONFIG_TILEGX)            += futex_64.o
 obj-$(CONFIG_COMPAT)            += compat.o compat_signal.o
 obj-$(CONFIG_SMP)               += smpboot.o smp.o tlb.o
 obj-$(CONFIG_MODULES)           += module.o
 obj-$(CONFIG_EARLY_PRINTK)      += early_printk.o
-obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel_$(BITS).o
 obj-$(CONFIG_PCI)               += pci.o
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S
index ec91568df880..133c4b56a99e 100644
--- a/arch/tile/kernel/entry.S
+++ b/arch/tile/kernel/entry.S
@@ -100,8 +100,9 @@ STD_ENTRY(smp_nap)
 */
 STD_ENTRY(_cpu_idle)
        movei r1, 1
+        IRQ_ENABLE_LOAD(r2, r3)
        mtspr INTERRUPT_CRITICAL_SECTION, r1
-        IRQ_ENABLE(r2, r3)             /* unmask, but still with ICS set */
+        IRQ_ENABLE_APPLY(r2, r3)       /* unmask, but still with ICS set */
        mtspr INTERRUPT_CRITICAL_SECTION, zero
        .global _cpu_idle_nap
 _cpu_idle_nap:
diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c
index 8c41891aab34..20273ee37deb 100644
--- a/arch/tile/kernel/hardwall.c
+++ b/arch/tile/kernel/hardwall.c
@@ -33,59 +33,157 @@
 /*
- * This data structure tracks the rectangle data, etc., associated
+ * Implement a per-cpu "hardwall" resource class such as UDN or IPI.
- * one-to-one with a "struct file *" from opening HARDWALL_FILE.
+ * We use "hardwall" nomenclature throughout for historical reasons.
+ * The lock here controls access to the list data structure as well as
+ * to the items on the list.
+ */
+struct hardwall_type {
+        int index;
+        int is_xdn;
+        int is_idn;
+        int disabled;
+        const char *name;
+        struct list_head list;
+        spinlock_t lock;
+        struct proc_dir_entry *proc_dir;
+};
+enum hardwall_index {
+        HARDWALL_UDN = 0,
+#ifndef __tilepro__
+        HARDWALL_IDN = 1,
+        HARDWALL_IPI = 2,
+#endif
+        _HARDWALL_TYPES
+};
+static struct hardwall_type hardwall_types[] = {
+        {  /* user-space access to UDN */
+                0,
+                1,
+                0,
+                0,
+                "udn",
+                LIST_HEAD_INIT(hardwall_types[HARDWALL_UDN].list),
+                __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_UDN].lock),
+                NULL
+        },
+#ifndef __tilepro__
+        {  /* user-space access to IDN */
+                1,
+                1,
+                1,
+                1,  /* disabled pending hypervisor support */
+                "idn",
+                LIST_HEAD_INIT(hardwall_types[HARDWALL_IDN].list),
+                __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_IDN].lock),
+                NULL
+        },
+        {  /* access to user-space IPI */
+                2,
+                0,
+                0,
+                0,
+                "ipi",
+                LIST_HEAD_INIT(hardwall_types[HARDWALL_IPI].list),
+                __SPIN_LOCK_INITIALIZER(hardwall_types[HARDWALL_IPI].lock),
+                NULL
+        },
+#endif
+};
+/*
+ * This data structure tracks the cpu data, etc., associated
+ * one-to-one with a "struct file *" from opening a hardwall device file.
 * Note that the file's private data points back to this structure.
 */
 struct hardwall_info {
-        struct list_head list;             /* "rectangles" list */
+        struct list_head list;             /* for hardwall_types.list */
        struct list_head task_head;        /* head of tasks in this hardwall */
-        struct cpumask cpumask;            /* cpus in the rectangle */
+        struct hardwall_type *type;        /* type of this resource */
+        struct cpumask cpumask;            /* cpus reserved */
+        int id;                            /* integer id for this hardwall */
+        int teardown_in_progress;          /* are we tearing this one down? */
+        /* Remaining fields only valid for user-network resources. */
        int ulhc_x;                        /* upper left hand corner x coord */
        int ulhc_y;                        /* upper left hand corner y coord */
        int width;                         /* rectangle width */
        int height;                        /* rectangle height */
-        int id;                            /* integer id for this hardwall */
+#if CHIP_HAS_REV1_XDN()
-        int teardown_in_progress;          /* are we tearing this one down? */
+        atomic_t xdn_pending_count;        /* cores in phase 1 of drain */
+#endif
 };
-/* Currently allocated hardwall rectangles */
-static LIST_HEAD(rectangles);
 /* /proc/tile/hardwall */
 static struct proc_dir_entry *hardwall_proc_dir;
 /* Functions to manage files in /proc/tile/hardwall. */
-static void hardwall_add_proc(struct hardwall_info *rect);
+static void hardwall_add_proc(struct hardwall_info *);
-static void hardwall_remove_proc(struct hardwall_info *rect);
+static void hardwall_remove_proc(struct hardwall_info *);
-/*
- * Guard changes to the hardwall data structures.
- * This could be finer grained (e.g. one lock for the list of hardwall
- * rectangles, then separate embedded locks for each one's list of tasks),
- * but there are subtle correctness issues when trying to start with
- * a task's "hardwall" pointer and lock the correct rectangle's embedded
- * lock in the presence of a simultaneous deactivation, so it seems
- * easier to have a single lock, given that none of these data
- * structures are touched very frequently during normal operation.
- */
-static DEFINE_SPINLOCK(hardwall_lock);
 /* Allow disabling UDN access. */
-static int udn_disabled;
 static int __init noudn(char *str)
 {
        pr_info("User-space UDN access is disabled\n");
-        udn_disabled = 1;
+        hardwall_types[HARDWALL_UDN].disabled = 1;
        return 0;
 }
 early_param("noudn", noudn);
+#ifndef __tilepro__
+/* Allow disabling IDN access. */
+static int __init noidn(char *str)
+{
+        pr_info("User-space IDN access is disabled\n");
+        hardwall_types[HARDWALL_IDN].disabled = 1;
+        return 0;
+}
+early_param("noidn", noidn);
+/* Allow disabling IPI access. */
+static int __init noipi(char *str)
+{
+        pr_info("User-space IPI access is disabled\n");
+        hardwall_types[HARDWALL_IPI].disabled = 1;
+        return 0;
+}
+early_param("noipi", noipi);
+#endif
 /*
- * Low-level primitives
+ * Low-level primitives for UDN/IDN
 */
+#ifdef __tilepro__
+#define mtspr_XDN(hwt, name, val) \
+        do { (void)(hwt); __insn_mtspr(SPR_UDN_##name, (val)); } while (0)
+#define mtspr_MPL_XDN(hwt, name, val) \
+        do { (void)(hwt); __insn_mtspr(SPR_MPL_UDN_##name, (val)); } while (0)
+#define mfspr_XDN(hwt, name) \
+        ((void)(hwt), __insn_mfspr(SPR_UDN_##name))
+#else
+#define mtspr_XDN(hwt, name, val)                                       \
+        do {                                                            \
+                if ((hwt)->is_idn)                                      \
+                        __insn_mtspr(SPR_IDN_##name, (val));            \
+                else                                                    \
+                        __insn_mtspr(SPR_UDN_##name, (val));            \
+        } while (0)
+#define mtspr_MPL_XDN(hwt, name, val)                                   \
+        do {                                                            \
+                if ((hwt)->is_idn)                                      \
+                        __insn_mtspr(SPR_MPL_IDN_##name, (val));        \
+                else                                                    \
+                        __insn_mtspr(SPR_MPL_UDN_##name, (val));        \
+        } while (0)
+#define mfspr_XDN(hwt, name) \
+  ((hwt)->is_idn ? __insn_mfspr(SPR_IDN_##name) : __insn_mfspr(SPR_UDN_##name))
+#endif
 /* Set a CPU bit if the CPU is online. */
 #define cpu_online_set(cpu, dst) do { \
        if (cpu_online(cpu))          \
@@ -101,7 +199,7 @@ static int contains(struct hardwall_info *r, int x, int y)
 }
 /* Compute the rectangle parameters and validate the cpumask. */
-static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask)
+static int check_rectangle(struct hardwall_info *r, struct cpumask *mask)
 {
        int x, y, cpu, ulhc, lrhc;
@@ -114,8 +212,6 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask)
        r->ulhc_y = cpu_y(ulhc);
        r->width = cpu_x(lrhc) - r->ulhc_x + 1;
        r->height = cpu_y(lrhc) - r->ulhc_y + 1;
-        cpumask_copy(&r->cpumask, mask);
-        r->id = ulhc;   /* The ulhc cpu id can be the hardwall id. */
        /* Width and height must be positive */
        if (r->width <= 0 || r->height <= 0)
@@ -128,7 +224,7 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask)
                                return -EINVAL;
        /*
-         * Note that offline cpus can't be drained when this UDN
+         * Note that offline cpus can't be drained when this user network
         * rectangle eventually closes.  We used to detect this
         * situation and print a warning, but it annoyed users and
         * they ignored it anyway, so now we just return without a
@@ -137,16 +233,6 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask)
        return 0;
 }
-/* Do the two given rectangles overlap on any cpu? */
-static int overlaps(struct hardwall_info *a, struct hardwall_info *b)
-{
-        return a->ulhc_x + a->width > b->ulhc_x &&    /* A not to the left */
-                b->ulhc_x + b->width > a->ulhc_x &&   /* B not to the left */
-                a->ulhc_y + a->height > b->ulhc_y &&  /* A not above */
-                b->ulhc_y + b->height > a->ulhc_y;    /* B not above */
-}
 /*
 * Hardware management of hardwall setup, teardown, trapping,
 * and enabling/disabling PL0 access to the networks.
@@ -157,23 +243,35 @@ enum direction_protect {
        N_PROTECT = (1 << 0),
        E_PROTECT = (1 << 1),
        S_PROTECT = (1 << 2),
-        W_PROTECT = (1 << 3)
+        W_PROTECT = (1 << 3),
+        C_PROTECT = (1 << 4),
 };
-static void enable_firewall_interrupts(void)
+static inline int xdn_which_interrupt(struct hardwall_type *hwt)
+{
+#ifndef __tilepro__
+        if (hwt->is_idn)
+                return INT_IDN_FIREWALL;
+#endif
+        return INT_UDN_FIREWALL;
+}
+static void enable_firewall_interrupts(struct hardwall_type *hwt)
 {
-        arch_local_irq_unmask_now(INT_UDN_FIREWALL);
+        arch_local_irq_unmask_now(xdn_which_interrupt(hwt));
 }
-static void disable_firewall_interrupts(void)
+static void disable_firewall_interrupts(struct hardwall_type *hwt)
 {
-        arch_local_irq_mask_now(INT_UDN_FIREWALL);
+        arch_local_irq_mask_now(xdn_which_interrupt(hwt));
 }
 /* Set up hardwall on this cpu based on the passed hardwall_info. */
-static void hardwall_setup_ipi_func(void *info)
+static void hardwall_setup_func(void *info)
 {
        struct hardwall_info *r = info;
+        struct hardwall_type *hwt = r->type;
        int cpu = smp_processor_id();
        int x = cpu % smp_width;
        int y = cpu / smp_width;
@@ -187,13 +285,12 @@ static void hardwall_setup_ipi_func(void *info)
        if (y == r->ulhc_y + r->height - 1)
                bits |= S_PROTECT;
        BUG_ON(bits == 0);
-        __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, bits);
+        mtspr_XDN(hwt, DIRECTION_PROTECT, bits);
-        enable_firewall_interrupts();
+        enable_firewall_interrupts(hwt);
 }
 /* Set up all cpus on edge of rectangle to enable/disable hardwall SPRs. */
-static void hardwall_setup(struct hardwall_info *r)
+static void hardwall_protect_rectangle(struct hardwall_info *r)
 {
        int x, y, cpu, delta;
        struct cpumask rect_cpus;
@@ -217,37 +314,50 @@ static void hardwall_setup(struct hardwall_info *r)
        }
        /* Then tell all the cpus to set up their protection SPR */
-        on_each_cpu_mask(&rect_cpus, hardwall_setup_ipi_func, r, 1);
+        on_each_cpu_mask(&rect_cpus, hardwall_setup_func, r, 1);
 }
 void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
 {
        struct hardwall_info *rect;
+        struct hardwall_type *hwt;
        struct task_struct *p;
        struct siginfo info;
-        int x, y;
        int cpu = smp_processor_id();
        int found_processes;
        unsigned long flags;
        struct pt_regs *old_regs = set_irq_regs(regs);
        irq_enter();
+        /* Figure out which network trapped. */
+        switch (fault_num) {
+#ifndef __tilepro__
+        case INT_IDN_FIREWALL:
+                hwt = &hardwall_types[HARDWALL_IDN];
+                break;
+#endif
+        case INT_UDN_FIREWALL:
+                hwt = &hardwall_types[HARDWALL_UDN];
+                break;
+        default:
+                BUG();
+        }
+        BUG_ON(hwt->disabled);
        /* This tile trapped a network access; find the rectangle. */
-        x = cpu % smp_width;
+        spin_lock_irqsave(&hwt->lock, flags);
-        y = cpu / smp_width;
+        list_for_each_entry(rect, &hwt->list, list) {
-        spin_lock_irqsave(&hardwall_lock, flags);
+                if (cpumask_test_cpu(cpu, &rect->cpumask))
-        list_for_each_entry(rect, &rectangles, list) {
-                if (contains(rect, x, y))
                        break;
        }
        /*
         * It shouldn't be possible not to find this cpu on the
         * rectangle list, since only cpus in rectangles get hardwalled.
-         * The hardwall is only removed after the UDN is drained.
+         * The hardwall is only removed after the user network is drained.
         */
-        BUG_ON(&rect->list == &rectangles);
+        BUG_ON(&rect->list == &hwt->list);
        /*
         * If we already started teardown on this hardwall, don't worry;
@@ -255,30 +365,32 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
         * to quiesce.
         */
        if (rect->teardown_in_progress) {
-                pr_notice("cpu %d: detected hardwall violation %#lx"
+                pr_notice("cpu %d: detected %s hardwall violation %#lx"
                       " while teardown already in progress\n",
-                       cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT));
+                          cpu, hwt->name,
+                          (long)mfspr_XDN(hwt, DIRECTION_PROTECT));
                goto done;
        }
        /*
         * Kill off any process that is activated in this rectangle.
         * We bypass security to deliver the signal, since it must be
-         * one of the activated processes that generated the UDN
+         * one of the activated processes that generated the user network
         * message that caused this trap, and all the activated
         * processes shared a single open file so are pretty tightly
         * bound together from a security point of view to begin with.
         */
        rect->teardown_in_progress = 1;
        wmb(); /* Ensure visibility of rectangle before notifying processes. */
-        pr_notice("cpu %d: detected hardwall violation %#lx...\n",
+        pr_notice("cpu %d: detected %s hardwall violation %#lx...\n",
-               cpu, (long) __insn_mfspr(SPR_UDN_DIRECTION_PROTECT));
+                  cpu, hwt->name, (long)mfspr_XDN(hwt, DIRECTION_PROTECT));
        info.si_signo = SIGILL;
        info.si_errno = 0;
        info.si_code = ILL_HARDWALL;
        found_processes = 0;
-        list_for_each_entry(p, &rect->task_head, thread.hardwall_list) {
+        list_for_each_entry(p, &rect->task_head,
-                BUG_ON(p->thread.hardwall != rect);
+                            thread.hardwall[hwt->index].list) {
+                BUG_ON(p->thread.hardwall[hwt->index].info != rect);
                if (!(p->flags & PF_EXITING)) {
                        found_processes = 1;
                        pr_notice("hardwall: killing %d\n", p->pid);
@@ -289,7 +401,7 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
                pr_notice("hardwall: no associated processes!\n");
 done:
-        spin_unlock_irqrestore(&hardwall_lock, flags);
+        spin_unlock_irqrestore(&hwt->lock, flags);
        /*
         * We have to disable firewall interrupts now, or else when we
@@ -298,48 +410,87 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
         * haven't yet drained the network, and that would allow packets
         * to cross out of the hardwall region.
         */
-        disable_firewall_interrupts();
+        disable_firewall_interrupts(hwt);
        irq_exit();
        set_irq_regs(old_regs);
 }
-/* Allow access from user space to the UDN. */
+/* Allow access from user space to the user network. */
-void grant_network_mpls(void)
+void grant_hardwall_mpls(struct hardwall_type *hwt)
 {
-        __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_0, 1);
+#ifndef __tilepro__
-        __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_0, 1);
+        if (!hwt->is_xdn) {
-        __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_0, 1);
+                __insn_mtspr(SPR_MPL_IPI_0_SET_0, 1);
-        __insn_mtspr(SPR_MPL_UDN_TIMER_SET_0, 1);
+                return;
+        }
+#endif
+        mtspr_MPL_XDN(hwt, ACCESS_SET_0, 1);
+        mtspr_MPL_XDN(hwt, AVAIL_SET_0, 1);
+        mtspr_MPL_XDN(hwt, COMPLETE_SET_0, 1);
+        mtspr_MPL_XDN(hwt, TIMER_SET_0, 1);
 #if !CHIP_HAS_REV1_XDN()
-        __insn_mtspr(SPR_MPL_UDN_REFILL_SET_0, 1);
+        mtspr_MPL_XDN(hwt, REFILL_SET_0, 1);
-        __insn_mtspr(SPR_MPL_UDN_CA_SET_0, 1);
+        mtspr_MPL_XDN(hwt, CA_SET_0, 1);
 #endif
 }
-/* Deny access from user space to the UDN. */
+/* Deny access from user space to the user network. */
-void restrict_network_mpls(void)
+void restrict_hardwall_mpls(struct hardwall_type *hwt)
 {
-        __insn_mtspr(SPR_MPL_UDN_ACCESS_SET_1, 1);
+#ifndef __tilepro__
-        __insn_mtspr(SPR_MPL_UDN_AVAIL_SET_1, 1);
+        if (!hwt->is_xdn) {
-        __insn_mtspr(SPR_MPL_UDN_COMPLETE_SET_1, 1);
+                __insn_mtspr(SPR_MPL_IPI_0_SET_1, 1);
-        __insn_mtspr(SPR_MPL_UDN_TIMER_SET_1, 1);
+                return;
+        }
+#endif
+        mtspr_MPL_XDN(hwt, ACCESS_SET_1, 1);
+        mtspr_MPL_XDN(hwt, AVAIL_SET_1, 1);
+        mtspr_MPL_XDN(hwt, COMPLETE_SET_1, 1);
+        mtspr_MPL_XDN(hwt, TIMER_SET_1, 1);
 #if !CHIP_HAS_REV1_XDN()
-        __insn_mtspr(SPR_MPL_UDN_REFILL_SET_1, 1);
+        mtspr_MPL_XDN(hwt, REFILL_SET_1, 1);
-        __insn_mtspr(SPR_MPL_UDN_CA_SET_1, 1);
+        mtspr_MPL_XDN(hwt, CA_SET_1, 1);
 #endif
 }
+/* Restrict or deny as necessary for the task we're switching to. */
+void hardwall_switch_tasks(struct task_struct *prev,
+                           struct task_struct *next)
+{
+        int i;
+        for (i = 0; i < HARDWALL_TYPES; ++i) {
+                if (prev->thread.hardwall[i].info != NULL) {
+                        if (next->thread.hardwall[i].info == NULL)
+                                restrict_hardwall_mpls(&hardwall_types[i]);
+                } else if (next->thread.hardwall[i].info != NULL) {
+                        grant_hardwall_mpls(&hardwall_types[i]);
+                }
+        }
+}
+/* Does this task have the right to IPI the given cpu? */
+int hardwall_ipi_valid(int cpu)
+{
+#ifdef __tilegx__
+        struct hardwall_info *info =
+                current->thread.hardwall[HARDWALL_IPI].info;
+        return info && cpumask_test_cpu(cpu, &info->cpumask);
+#else
+        return 0;
+#endif
+}
 /*
- * Code to create, activate, deactivate, and destroy hardwall rectangles.
+ * Code to create, activate, deactivate, and destroy hardwall resources.
 */
-/* Create a hardwall for the given rectangle */
+/* Create a hardwall for the given resource */
-static struct hardwall_info *hardwall_create(
+static struct hardwall_info *hardwall_create(struct hardwall_type *hwt,
-        size_t size, const unsigned char __user *bits)
+                                             size_t size,
+                                             const unsigned char __user *bits)
 {
-        struct hardwall_info *iter, *rect;
+        struct hardwall_info *iter, *info;
        struct cpumask mask;
        unsigned long flags;
        int rc;
@@ -370,55 +521,62 @@ static struct hardwall_info *hardwall_create(
                }
        }
-        /* Allocate a new rectangle optimistically. */
+        /* Allocate a new hardwall_info optimistically. */
-        rect = kmalloc(sizeof(struct hardwall_info),
+        info = kmalloc(sizeof(struct hardwall_info),
                        GFP_KERNEL | __GFP_ZERO);
-        if (rect == NULL)
+        if (info == NULL)
                return ERR_PTR(-ENOMEM);
-        INIT_LIST_HEAD(&rect->task_head);
+        INIT_LIST_HEAD(&info->task_head);
+        info->type = hwt;
        /* Compute the rectangle size and validate that it's plausible. */
-        rc = setup_rectangle(rect, &mask);
+        cpumask_copy(&info->cpumask, &mask);
-        if (rc != 0) {
+        info->id = find_first_bit(cpumask_bits(&mask), nr_cpumask_bits);
-                kfree(rect);
+        if (hwt->is_xdn) {
-                return ERR_PTR(rc);
+                rc = check_rectangle(info, &mask);
+                if (rc != 0) {
+                        kfree(info);
+                        return ERR_PTR(rc);
+                }
        }
        /* Confirm it doesn't overlap and add it to the list. */
-        spin_lock_irqsave(&hardwall_lock, flags);
+        spin_lock_irqsave(&hwt->lock, flags);
-        list_for_each_entry(iter, &rectangles, list) {
+        list_for_each_entry(iter, &hwt->list, list) {
-                if (overlaps(iter, rect)) {
+                if (cpumask_intersects(&iter->cpumask, &info->cpumask)) {
-                        spin_unlock_irqrestore(&hardwall_lock, flags);
+                        spin_unlock_irqrestore(&hwt->lock, flags);
-                        kfree(rect);
+                        kfree(info);
                        return ERR_PTR(-EBUSY);
                }
        }
-        list_add_tail(&rect->list, &rectangles);
+        list_add_tail(&info->list, &hwt->list);
-        spin_unlock_irqrestore(&hardwall_lock, flags);
+        spin_unlock_irqrestore(&hwt->lock, flags);
        /* Set up appropriate hardwalling on all affected cpus. */
-        hardwall_setup(rect);
+        if (hwt->is_xdn)
+                hardwall_protect_rectangle(info);
        /* Create a /proc/tile/hardwall entry. */
-        hardwall_add_proc(rect);
+        hardwall_add_proc(info);
-        return rect;
+        return info;
 }
 /* Activate a given hardwall on this cpu for this process. */
-static int hardwall_activate(struct hardwall_info *rect)
+static int hardwall_activate(struct hardwall_info *info)
 {
-        int cpu, x, y;
+        int cpu;
        unsigned long flags;
        struct task_struct *p = current;
        struct thread_struct *ts = &p->thread;
+        struct hardwall_type *hwt;
-        /* Require a rectangle. */
+        /* Require a hardwall. */
-        if (rect == NULL)
+        if (info == NULL)
                return -ENODATA;
-        /* Not allowed to activate a rectangle that is being torn down. */
+        /* Not allowed to activate a hardwall that is being torn down. */
-        if (rect->teardown_in_progress)
+        if (info->teardown_in_progress)
                return -EINVAL;
        /*
@@ -428,78 +586,87 @@ static int hardwall_activate(struct hardwall_info *rect)
        if (cpumask_weight(&p->cpus_allowed) != 1)
                return -EPERM;
-        /* Make sure we are bound to a cpu in this rectangle. */
+        /* Make sure we are bound to a cpu assigned to this resource. */
        cpu = smp_processor_id();
        BUG_ON(cpumask_first(&p->cpus_allowed) != cpu);
-        x = cpu_x(cpu);
+        if (!cpumask_test_cpu(cpu, &info->cpumask))
-        y = cpu_y(cpu);
-        if (!contains(rect, x, y))
                return -EINVAL;
        /* If we are already bound to this hardwall, it's a no-op. */
-        if (ts->hardwall) {
+        hwt = info->type;
-                BUG_ON(ts->hardwall != rect);
+        if (ts->hardwall[hwt->index].info) {
+                BUG_ON(ts->hardwall[hwt->index].info != info);
                return 0;
        }
-        /* Success!  This process gets to use the user networks on this cpu. */
+        /* Success!  This process gets to use the resource on this cpu. */
-        ts->hardwall = rect;
+        ts->hardwall[hwt->index].info = info;
-        spin_lock_irqsave(&hardwall_lock, flags);
+        spin_lock_irqsave(&hwt->lock, flags);
-        list_add(&ts->hardwall_list, &rect->task_head);
+        list_add(&ts->hardwall[hwt->index].list, &info->task_head);
-        spin_unlock_irqrestore(&hardwall_lock, flags);
+        spin_unlock_irqrestore(&hwt->lock, flags);
-        grant_network_mpls();
+        grant_hardwall_mpls(hwt);
-        printk(KERN_DEBUG "Pid %d (%s) activated for hardwall: cpu %d\n",
+        printk(KERN_DEBUG "Pid %d (%s) activated for %s hardwall: cpu %d\n",
-               p->pid, p->comm, cpu);
+               p->pid, p->comm, hwt->name, cpu);
        return 0;
 }
 /*
- * Deactivate a task's hardwall.  Must hold hardwall_lock.
+ * Deactivate a task's hardwall.  Must hold lock for hardwall_type.
 * This method may be called from free_task(), so we don't want to
 * rely on too many fields of struct task_struct still being valid.
 * We assume the cpus_allowed, pid, and comm fields are still valid.
 */
-static void _hardwall_deactivate(struct task_struct *task)
+static void _hardwall_deactivate(struct hardwall_type *hwt,
+                                 struct task_struct *task)
 {
        struct thread_struct *ts = &task->thread;
        if (cpumask_weight(&task->cpus_allowed) != 1) {
-                pr_err("pid %d (%s) releasing networks with"
+                pr_err("pid %d (%s) releasing %s hardwall with"
                       " an affinity mask containing %d cpus!\n",
-                       task->pid, task->comm,
+                       task->pid, task->comm, hwt->name,
                       cpumask_weight(&task->cpus_allowed));
                BUG();
        }
-        BUG_ON(ts->hardwall == NULL);
+        BUG_ON(ts->hardwall[hwt->index].info == NULL);
-        ts->hardwall = NULL;
+        ts->hardwall[hwt->index].info = NULL;
-        list_del(&ts->hardwall_list);
+        list_del(&ts->hardwall[hwt->index].list);
        if (task == current)
-                restrict_network_mpls();
+                restrict_hardwall_mpls(hwt);
 }
 /* Deactivate a task's hardwall. */
-int hardwall_deactivate(struct task_struct *task)
+static int hardwall_deactivate(struct hardwall_type *hwt,
+                               struct task_struct *task)
 {
        unsigned long flags;
        int activated;
-        spin_lock_irqsave(&hardwall_lock, flags);
+        spin_lock_irqsave(&hwt->lock, flags);
-        activated = (task->thread.hardwall != NULL);
+        activated = (task->thread.hardwall[hwt->index].info != NULL);
        if (activated)
-                _hardwall_deactivate(task);
+                _hardwall_deactivate(hwt, task);
-        spin_unlock_irqrestore(&hardwall_lock, flags);
+        spin_unlock_irqrestore(&hwt->lock, flags);
        if (!activated)
                return -EINVAL;
-        printk(KERN_DEBUG "Pid %d (%s) deactivated for hardwall: cpu %d\n",
+        printk(KERN_DEBUG "Pid %d (%s) deactivated for %s hardwall: cpu %d\n",
-               task->pid, task->comm, smp_processor_id());
+               task->pid, task->comm, hwt->name, smp_processor_id());
        return 0;
 }
-/* Stop a UDN switch before draining the network. */
+void hardwall_deactivate_all(struct task_struct *task)
-static void stop_udn_switch(void *ignored)
+{
+        int i;
+        for (i = 0; i < HARDWALL_TYPES; ++i)
+                if (task->thread.hardwall[i].info)
+                        hardwall_deactivate(&hardwall_types[i], task);
+}
+/* Stop the switch before draining the network. */
+static void stop_xdn_switch(void *arg)
 {
 #if !CHIP_HAS_REV1_XDN()
        /* Freeze the switch and the demux. */
@@ -507,13 +674,71 @@ static void stop_udn_switch(void *ignored)
                     SPR_UDN_SP_FREEZE__SP_FRZ_MASK |
                     SPR_UDN_SP_FREEZE__DEMUX_FRZ_MASK |
                     SPR_UDN_SP_FREEZE__NON_DEST_EXT_MASK);
+#else
+        /*
+         * Drop all packets bound for the core or off the edge.
+         * We rely on the normal hardwall protection setup code
+         * to have set the low four bits to trigger firewall interrupts,
+         * and shift those bits up to trigger "drop on send" semantics,
+         * plus adding "drop on send to core" for all switches.
+         * In practice it seems the switches latch the DIRECTION_PROTECT
+         * SPR so they won't start dropping if they're already
+         * delivering the last message to the core, but it doesn't
+         * hurt to enable it here.
+         */
+        struct hardwall_type *hwt = arg;
+        unsigned long protect = mfspr_XDN(hwt, DIRECTION_PROTECT);
+        mtspr_XDN(hwt, DIRECTION_PROTECT, (protect | C_PROTECT) << 5);
 #endif
 }
+static void empty_xdn_demuxes(struct hardwall_type *hwt)
+{
+#ifndef __tilepro__
+        if (hwt->is_idn) {
+                while (__insn_mfspr(SPR_IDN_DATA_AVAIL) & (1 << 0))
+                        (void) __tile_idn0_receive();
+                while (__insn_mfspr(SPR_IDN_DATA_AVAIL) & (1 << 1))
+                        (void) __tile_idn1_receive();
+                return;
+        }
+#endif
+        while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 0))
+                (void) __tile_udn0_receive();
+        while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 1))
+                (void) __tile_udn1_receive();
+        while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 2))
+                (void) __tile_udn2_receive();
+        while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 3))
+                (void) __tile_udn3_receive();
+}
 /* Drain all the state from a stopped switch. */
-static void drain_udn_switch(void *ignored)
+static void drain_xdn_switch(void *arg)
 {
-#if !CHIP_HAS_REV1_XDN()
+        struct hardwall_info *info = arg;
+        struct hardwall_type *hwt = info->type;
+#if CHIP_HAS_REV1_XDN()
+        /*
+         * The switches have been configured to drop any messages
+         * destined for cores (or off the edge of the rectangle).
+         * But the current message may continue to be delivered,
+         * so we wait until all the cores have finished any pending
+         * messages before we stop draining.
+         */
+        int pending = mfspr_XDN(hwt, PENDING);
+        while (pending--) {
+                empty_xdn_demuxes(hwt);
+                if (hwt->is_idn)
+                        __tile_idn_send(0);
+                else
+                        __tile_udn_send(0);
+        }
+        atomic_dec(&info->xdn_pending_count);
+        while (atomic_read(&info->xdn_pending_count))
+                empty_xdn_demuxes(hwt);
+#else
        int i;
        int from_tile_words, ca_count;
@@ -533,15 +758,7 @@ static void drain_udn_switch(void *ignored)
                (void) __insn_mfspr(SPR_UDN_DEMUX_WRITE_FIFO);
        /* Empty out demuxes. */
-        while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 0))
+        empty_xdn_demuxes(hwt);
-                (void) __tile_udn0_receive();
-        while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 1))
-                (void) __tile_udn1_receive();
-        while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 2))
-                (void) __tile_udn2_receive();
-        while (__insn_mfspr(SPR_UDN_DATA_AVAIL) & (1 << 3))
-                (void) __tile_udn3_receive();
-        BUG_ON((__insn_mfspr(SPR_UDN_DATA_AVAIL) & 0xF) != 0);
        /* Empty out catch all. */
        ca_count = __insn_mfspr(SPR_UDN_DEMUX_CA_COUNT);
@@ -563,21 +780,25 @@ static void drain_udn_switch(void *ignored)
 #endif
 }
-/* Reset random UDN state registers at boot up and during hardwall teardown. */
+/* Reset random XDN state registers at boot up and during hardwall teardown. */
-void reset_network_state(void)
+static void reset_xdn_network_state(struct hardwall_type *hwt)
 {
-#if !CHIP_HAS_REV1_XDN()
+        if (hwt->disabled)
-        /* Reset UDN coordinates to their standard value */
-        unsigned int cpu = smp_processor_id();
-        unsigned int x = cpu % smp_width;
-        unsigned int y = cpu / smp_width;
-#endif
-        if (udn_disabled)
                return;
+        /* Clear out other random registers so we have a clean slate. */
+        mtspr_XDN(hwt, DIRECTION_PROTECT, 0);
+        mtspr_XDN(hwt, AVAIL_EN, 0);
+        mtspr_XDN(hwt, DEADLOCK_TIMEOUT, 0);
 #if !CHIP_HAS_REV1_XDN()
-        __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7));
+        /* Reset UDN coordinates to their standard value */
+        {
+                unsigned int cpu = smp_processor_id();
+                unsigned int x = cpu % smp_width;
+                unsigned int y = cpu / smp_width;
+                __insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7));
+        }
        /* Set demux tags to predefined values and enable them. */
        __insn_mtspr(SPR_UDN_TAG_VALID, 0xf);
@@ -585,56 +806,50 @@ void reset_network_state(void)
        __insn_mtspr(SPR_UDN_TAG_1, (1 << 1));
        __insn_mtspr(SPR_UDN_TAG_2, (1 << 2));
        __insn_mtspr(SPR_UDN_TAG_3, (1 << 3));
-#endif
-        /* Clear out other random registers so we have a clean slate. */
+        /* Set other rev0 random registers to a clean state. */
-        __insn_mtspr(SPR_UDN_AVAIL_EN, 0);
-        __insn_mtspr(SPR_UDN_DEADLOCK_TIMEOUT, 0);
-#if !CHIP_HAS_REV1_XDN()
        __insn_mtspr(SPR_UDN_REFILL_EN, 0);
        __insn_mtspr(SPR_UDN_DEMUX_QUEUE_SEL, 0);
        __insn_mtspr(SPR_UDN_SP_FIFO_SEL, 0);
-#endif
        /* Start the switch and demux. */
-#if !CHIP_HAS_REV1_XDN()
        __insn_mtspr(SPR_UDN_SP_FREEZE, 0);
 #endif
 }
-/* Restart a UDN switch after draining. */
+void reset_network_state(void)
-static void restart_udn_switch(void *ignored)
 {
-        reset_network_state();
+        reset_xdn_network_state(&hardwall_types[HARDWALL_UDN]);
+#ifndef __tilepro__
-        /* Disable firewall interrupts. */
+        reset_xdn_network_state(&hardwall_types[HARDWALL_IDN]);
-        __insn_mtspr(SPR_UDN_DIRECTION_PROTECT, 0);
+#endif
-        disable_firewall_interrupts();
 }
-/* Build a struct cpumask containing all valid tiles in bounding rectangle. */
+/* Restart an XDN switch after draining. */
-static void fill_mask(struct hardwall_info *r, struct cpumask *result)
+static void restart_xdn_switch(void *arg)
 {
-        int x, y, cpu;
+        struct hardwall_type *hwt = arg;
-        cpumask_clear(result);
+#if CHIP_HAS_REV1_XDN()
+        /* One last drain step to avoid races with injection and draining. */
+        empty_xdn_demuxes(hwt);
+#endif
-        cpu = r->ulhc_y * smp_width + r->ulhc_x;
+        reset_xdn_network_state(hwt);
-        for (y = 0; y < r->height; ++y, cpu += smp_width - r->width) {
-                for (x = 0; x < r->width; ++x, ++cpu)
+        /* Disable firewall interrupts. */
-                        cpu_online_set(cpu, result);
+        disable_firewall_interrupts(hwt);
-        }
 }
 /* Last reference to a hardwall is gone, so clear the network. */
-static void hardwall_destroy(struct hardwall_info *rect)
+static void hardwall_destroy(struct hardwall_info *info)
 {
        struct task_struct *task;
+        struct hardwall_type *hwt;
        unsigned long flags;
-        struct cpumask mask;
-        /* Make sure this file actually represents a rectangle. */
+        /* Make sure this file actually represents a hardwall. */
-        if (rect == NULL)
+        if (info == NULL)
                return;
        /*
@@ -644,39 +859,53 @@ static void hardwall_destroy(struct hardwall_info *rect)
         * deactivate any remaining tasks before freeing the
         * hardwall_info object itself.
         */
-        spin_lock_irqsave(&hardwall_lock, flags);
+        hwt = info->type;
-        list_for_each_entry(task, &rect->task_head, thread.hardwall_list)
+        info->teardown_in_progress = 1;
-                _hardwall_deactivate(task);
+        spin_lock_irqsave(&hwt->lock, flags);
-        spin_unlock_irqrestore(&hardwall_lock, flags);
+        list_for_each_entry(task, &info->task_head,
+                            thread.hardwall[hwt->index].list)
-        /* Drain the UDN. */
+                _hardwall_deactivate(hwt, task);
-        printk(KERN_DEBUG "Clearing hardwall rectangle %dx%d %d,%d\n",
+        spin_unlock_irqrestore(&hwt->lock, flags);
-               rect->width, rect->height, rect->ulhc_x, rect->ulhc_y);
-        fill_mask(rect, &mask);
+        if (hwt->is_xdn) {
-        on_each_cpu_mask(&mask, stop_udn_switch, NULL, 1);
+                /* Configure the switches for draining the user network. */
-        on_each_cpu_mask(&mask, drain_udn_switch, NULL, 1);
+                printk(KERN_DEBUG
+                       "Clearing %s hardwall rectangle %dx%d %d,%d\n",
+                       hwt->name, info->width, info->height,
+                       info->ulhc_x, info->ulhc_y);
+                on_each_cpu_mask(&info->cpumask, stop_xdn_switch, hwt, 1);
+                /* Drain the network. */
+#if CHIP_HAS_REV1_XDN()
+                atomic_set(&info->xdn_pending_count,
+                           cpumask_weight(&info->cpumask));
+                on_each_cpu_mask(&info->cpumask, drain_xdn_switch, info, 0);
+#else
+                on_each_cpu_mask(&info->cpumask, drain_xdn_switch, info, 1);
+#endif
-        /* Restart switch and disable firewall. */
+                /* Restart switch and disable firewall. */
-        on_each_cpu_mask(&mask, restart_udn_switch, NULL, 1);
+                on_each_cpu_mask(&info->cpumask, restart_xdn_switch, hwt, 1);
+        }
        /* Remove the /proc/tile/hardwall entry. */
-        hardwall_remove_proc(rect);
+        hardwall_remove_proc(info);
-        /* Now free the rectangle from the list. */
+        /* Now free the hardwall from the list. */
-        spin_lock_irqsave(&hardwall_lock, flags);
+        spin_lock_irqsave(&hwt->lock, flags);
-        BUG_ON(!list_empty(&rect->task_head));
+        BUG_ON(!list_empty(&info->task_head));
-        list_del(&rect->list);
+        list_del(&info->list);
-        spin_unlock_irqrestore(&hardwall_lock, flags);
+        spin_unlock_irqrestore(&hwt->lock, flags);
-        kfree(rect);
+        kfree(info);
 }
 static int hardwall_proc_show(struct seq_file *sf, void *v)
 {
-        struct hardwall_info *rect = sf->private;
+        struct hardwall_info *info = sf->private;
        char buf[256];
-        int rc = cpulist_scnprintf(buf, sizeof(buf), &rect->cpumask);
+        int rc = cpulist_scnprintf(buf, sizeof(buf), &info->cpumask);
        buf[rc++] = '\n';
        seq_write(sf, buf, rc);
        return 0;
@@ -695,31 +924,45 @@ static const struct file_operations hardwall_proc_fops = {
        .release        = single_release,
 };
-static void hardwall_add_proc(struct hardwall_info *rect)
+static void hardwall_add_proc(struct hardwall_info *info)
 {
        char buf[64];
-        snprintf(buf, sizeof(buf), "%d", rect->id);
+        snprintf(buf, sizeof(buf), "%d", info->id);
-        proc_create_data(buf, 0444, hardwall_proc_dir,
+        proc_create_data(buf, 0444, info->type->proc_dir,
-                         &hardwall_proc_fops, rect);
+                         &hardwall_proc_fops, info);
 }
-static void hardwall_remove_proc(struct hardwall_info *rect)
+static void hardwall_remove_proc(struct hardwall_info *info)
 {
        char buf[64];
-        snprintf(buf, sizeof(buf), "%d", rect->id);
+        snprintf(buf, sizeof(buf), "%d", info->id);
-        remove_proc_entry(buf, hardwall_proc_dir);
+        remove_proc_entry(buf, info->type->proc_dir);
 }
 int proc_pid_hardwall(struct task_struct *task, char *buffer)
 {
-        struct hardwall_info *rect = task->thread.hardwall;
+        int i;
-        return rect ? sprintf(buffer, "%d\n", rect->id) : 0;
+        int n = 0;
+        for (i = 0; i < HARDWALL_TYPES; ++i) {
+                struct hardwall_info *info = task->thread.hardwall[i].info;
+                if (info)
+                        n += sprintf(&buffer[n], "%s: %d\n",
+                                     info->type->name, info->id);
+        }
+        return n;
 }
 void proc_tile_hardwall_init(struct proc_dir_entry *root)
 {
-        if (!udn_disabled)
+        int i;
-                hardwall_proc_dir = proc_mkdir("hardwall", root);
+        for (i = 0; i < HARDWALL_TYPES; ++i) {
+                struct hardwall_type *hwt = &hardwall_types[i];
+                if (hwt->disabled)
+                        continue;
+                if (hardwall_proc_dir == NULL)
+                        hardwall_proc_dir = proc_mkdir("hardwall", root);
+                hwt->proc_dir = proc_mkdir(hwt->name, hardwall_proc_dir);
+        }
 }
@@ -729,34 +972,45 @@ void proc_tile_hardwall_init(struct proc_dir_entry *root)
 static long hardwall_ioctl(struct file *file, unsigned int a, unsigned long b)
 {
-        struct hardwall_info *rect = file->private_data;
+        struct hardwall_info *info = file->private_data;
+        int minor = iminor(file->f_mapping->host);
+        struct hardwall_type* hwt;
        if (_IOC_TYPE(a) != HARDWALL_IOCTL_BASE)
                return -EINVAL;
+        BUILD_BUG_ON(HARDWALL_TYPES != _HARDWALL_TYPES);
+        BUILD_BUG_ON(HARDWALL_TYPES !=
+                     sizeof(hardwall_types)/sizeof(hardwall_types[0]));
+        if (minor < 0 || minor >= HARDWALL_TYPES)
+                return -EINVAL;
+        hwt = &hardwall_types[minor];
+        WARN_ON(info && hwt != info->type);
        switch (_IOC_NR(a)) {
        case _HARDWALL_CREATE:
-                if (udn_disabled)
+                if (hwt->disabled)
                        return -ENOSYS;
-                if (rect != NULL)
+                if (info != NULL)
                        return -EALREADY;
-                rect = hardwall_create(_IOC_SIZE(a),
+                info = hardwall_create(hwt, _IOC_SIZE(a),
-                                        (const unsigned char __user *)b);
+                                       (const unsigned char __user *)b);
-                if (IS_ERR(rect))
+                if (IS_ERR(info))
-                        return PTR_ERR(rect);
+                        return PTR_ERR(info);
-                file->private_data = rect;
+                file->private_data = info;
                return 0;
        case _HARDWALL_ACTIVATE:
-                return hardwall_activate(rect);
+                return hardwall_activate(info);
        case _HARDWALL_DEACTIVATE:
-                if (current->thread.hardwall != rect)
+                if (current->thread.hardwall[hwt->index].info != info)
                        return -EINVAL;
-                return hardwall_deactivate(current);
+                return hardwall_deactivate(hwt, current);
        case _HARDWALL_GET_ID:
-                return rect ? rect->id : -EINVAL;
+                return info ? info->id : -EINVAL;
        default:
                return -EINVAL;
@@ -775,26 +1029,28 @@ static long hardwall_compat_ioctl(struct file *file,
 /* The user process closed the file; revoke access to user networks. */
 static int hardwall_flush(struct file *file, fl_owner_t owner)
 {
-        struct hardwall_info *rect = file->private_data;
+        struct hardwall_info *info = file->private_data;
        struct task_struct *task, *tmp;
        unsigned long flags;
-        if (rect) {
+        if (info) {
                /*
                 * NOTE: if multiple threads are activated on this hardwall
                 * file, the other threads will continue having access to the
-                 * UDN until they are context-switched out and back in again.
+                 * user network until they are context-switched out and back
+                 * in again.
                 *
                 * NOTE: A NULL files pointer means the task is being torn
                 * down, so in that case we also deactivate it.
                 */
-                spin_lock_irqsave(&hardwall_lock, flags);
+                struct hardwall_type *hwt = info->type;
-                list_for_each_entry_safe(task, tmp, &rect->task_head,
+                spin_lock_irqsave(&hwt->lock, flags);
-                                         thread.hardwall_list) {
+                list_for_each_entry_safe(task, tmp, &info->task_head,
+                                         thread.hardwall[hwt->index].list) {
                        if (task->files == owner || task->files == NULL)
-                                _hardwall_deactivate(task);
+                                _hardwall_deactivate(hwt, task);
                }
-                spin_unlock_irqrestore(&hardwall_lock, flags);
+                spin_unlock_irqrestore(&hwt->lock, flags);
        }
        return 0;
@@ -824,11 +1080,11 @@ static int __init dev_hardwall_init(void)
        int rc;
        dev_t dev;
-        rc = alloc_chrdev_region(&dev, 0, 1, "hardwall");
+        rc = alloc_chrdev_region(&dev, 0, HARDWALL_TYPES, "hardwall");
        if (rc < 0)
                return rc;
        cdev_init(&hardwall_dev, &dev_hardwall_fops);
-        rc = cdev_add(&hardwall_dev, dev, 1);
+        rc = cdev_add(&hardwall_dev, dev, HARDWALL_TYPES);
        if (rc < 0)
                return rc;
diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S
index 1a39b7c1c87e..f71bfeeaf1a9 100644
--- a/arch/tile/kernel/head_32.S
+++ b/arch/tile/kernel/head_32.S
@@ -69,7 +69,7 @@ ENTRY(_start)
        }
        {
          moveli lr, lo16(1f)
-          move r5, zero
+          moveli r5, CTX_PAGE_FLAG
        }
        {
          auli lr, lr, ha16(1f)
@@ -141,11 +141,11 @@ ENTRY(empty_zero_page)
        .macro PTE va, cpa, bits1, no_org=0
        .ifeq \no_org
-        .org swapper_pg_dir + HV_L1_INDEX(\va) * HV_PTE_SIZE
+        .org swapper_pg_dir + PGD_INDEX(\va) * HV_PTE_SIZE
        .endif
        .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \
              (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE)
-        .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << (HV_PTE_INDEX_PFN - 32))
+        .word (\bits1) | (HV_CPA_TO_PTFN(\cpa) << (HV_PTE_INDEX_PTFN - 32))
        .endm
 __PAGE_ALIGNED_DATA
@@ -166,7 +166,7 @@ ENTRY(swapper_pg_dir)
        /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */
        PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \
                              (1 << (HV_PTE_INDEX_EXECUTABLE - 32))
-        .org swapper_pg_dir + HV_L1_SIZE
+        .org swapper_pg_dir + PGDIR_SIZE
        END(swapper_pg_dir)
        /*
diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S
index 6bc3a932fe45..f9a2734f7b82 100644
--- a/arch/tile/kernel/head_64.S
+++ b/arch/tile/kernel/head_64.S
@@ -114,7 +114,7 @@ ENTRY(_start)
          shl16insli r0, r0, hw0(swapper_pg_dir - PAGE_OFFSET)
        }
        {
-          move r3, zero
+          moveli r3, CTX_PAGE_FLAG
          j hv_install_context
        }
 1:
@@ -210,19 +210,19 @@ ENTRY(empty_zero_page)
        .macro PTE cpa, bits1
        .quad HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED |\
              HV_PTE_GLOBAL | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |\
-              (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN)
+              (\bits1) | (HV_CPA_TO_PTFN(\cpa) << HV_PTE_INDEX_PTFN)
        .endm
 __PAGE_ALIGNED_DATA
        .align PAGE_SIZE
 ENTRY(swapper_pg_dir)
-        .org swapper_pg_dir + HV_L0_INDEX(PAGE_OFFSET) * HV_PTE_SIZE
+        .org swapper_pg_dir + PGD_INDEX(PAGE_OFFSET) * HV_PTE_SIZE
 .Lsv_data_pmd:
        .quad 0  /* PTE temp_data_pmd - PAGE_OFFSET, 0 */
-        .org swapper_pg_dir + HV_L0_INDEX(MEM_SV_START) * HV_PTE_SIZE
+        .org swapper_pg_dir + PGD_INDEX(MEM_SV_START) * HV_PTE_SIZE
 .Lsv_code_pmd:
        .quad 0  /* PTE temp_code_pmd - PAGE_OFFSET, 0 */
-        .org swapper_pg_dir + HV_L0_SIZE
+        .org swapper_pg_dir + SIZEOF_PGD
        END(swapper_pg_dir)
        .align HV_PAGE_TABLE_ALIGN
@@ -233,11 +233,11 @@ ENTRY(temp_data_pmd)
         * permissions later.
         */
        .set addr, 0
-        .rept HV_L1_ENTRIES
+        .rept PTRS_PER_PMD
        PTE addr, HV_PTE_READABLE | HV_PTE_WRITABLE
-        .set addr, addr + HV_PAGE_SIZE_LARGE
+        .set addr, addr + HPAGE_SIZE
        .endr
-        .org temp_data_pmd + HV_L1_SIZE
+        .org temp_data_pmd + SIZEOF_PMD
        END(temp_data_pmd)
        .align HV_PAGE_TABLE_ALIGN
@@ -248,11 +248,11 @@ ENTRY(temp_code_pmd)
         * permissions later.
         */
        .set addr, 0
-        .rept HV_L1_ENTRIES
+        .rept PTRS_PER_PMD
        PTE addr, HV_PTE_READABLE | HV_PTE_EXECUTABLE
-        .set addr, addr + HV_PAGE_SIZE_LARGE
+        .set addr, addr + HPAGE_SIZE
        .endr
-        .org temp_code_pmd + HV_L1_SIZE
+        .org temp_code_pmd + SIZEOF_PMD
        END(temp_code_pmd)
        /*
diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds
index 2b7cd0a659a9..d44c5a67a1ed 100644
--- a/arch/tile/kernel/hvglue.lds
+++ b/arch/tile/kernel/hvglue.lds
@@ -55,4 +55,5 @@ hv_store_mapping = TEXT_OFFSET + 0x106a0;
 hv_inquire_realpa = TEXT_OFFSET + 0x106c0;
 hv_flush_all = TEXT_OFFSET + 0x106e0;
 hv_get_ipi_pte = TEXT_OFFSET + 0x10700;
-hv_glue_internals = TEXT_OFFSET + 0x10720;
+hv_set_pte_super_shift = TEXT_OFFSET + 0x10720;
+hv_glue_internals = TEXT_OFFSET + 0x10740;
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 30ae76e50c44..7c06d597ffd0 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -220,7 +220,9 @@ intvec_\vecname:
         * This routine saves just the first four registers, plus the
         * stack context so we can do proper backtracing right away,
         * and defers to handle_interrupt to save the rest.
-         * The backtracer needs pc, ex1, lr, sp, r52, and faultnum.
+         * The backtracer needs pc, ex1, lr, sp, r52, and faultnum,
+         * and needs sp set to its final location at the bottom of
+         * the stack frame.
         */
        addli   r0, r0, PTREGS_OFFSET_LR - (PTREGS_SIZE + KSTK_PTREGS_GAP)
        wh64    r0   /* cache line 7 */
@@ -450,23 +452,6 @@ intvec_\vecname:
        push_reg r5, r52
        st      r52, r4
-        /* Load tp with our per-cpu offset. */
-#ifdef CONFIG_SMP
-        {
-         mfspr  r20, SPR_SYSTEM_SAVE_K_0
-         moveli r21, hw2_last(__per_cpu_offset)
-        }
-        {
-         shl16insli r21, r21, hw1(__per_cpu_offset)
-         bfextu r20, r20, 0, LOG2_THREAD_SIZE-1
-        }
-        shl16insli r21, r21, hw0(__per_cpu_offset)
-        shl3add r20, r20, r21
-        ld      tp, r20
-#else
-        move    tp, zero
-#endif
        /*
         * If we will be returning to the kernel, we will need to
         * reset the interrupt masks to the state they had before.
@@ -489,6 +474,44 @@ intvec_\vecname:
        .endif
        st      r21, r32
+        /*
+         * we've captured enough state to the stack (including in
+         * particular our EX_CONTEXT state) that we can now release
+         * the interrupt critical section and replace it with our
+         * standard "interrupts disabled" mask value.  This allows
+         * synchronous interrupts (and profile interrupts) to punch
+         * through from this point onwards.
+         *
+         * It's important that no code before this point touch memory
+         * other than our own stack (to keep the invariant that this
+         * is all that gets touched under ICS), and that no code after
+         * this point reference any interrupt-specific SPR, in particular
+         * the EX_CONTEXT_K_ values.
+         */
+        .ifc \function,handle_nmi
+        IRQ_DISABLE_ALL(r20)
+        .else
+        IRQ_DISABLE(r20, r21)
+        .endif
+        mtspr   INTERRUPT_CRITICAL_SECTION, zero
+        /* Load tp with our per-cpu offset. */
+#ifdef CONFIG_SMP
+        {
+         mfspr  r20, SPR_SYSTEM_SAVE_K_0
+         moveli r21, hw2_last(__per_cpu_offset)
+        }
+        {
+         shl16insli r21, r21, hw1(__per_cpu_offset)
+         bfextu r20, r20, 0, LOG2_THREAD_SIZE-1
+        }
+        shl16insli r21, r21, hw0(__per_cpu_offset)
+        shl3add r20, r20, r21
+        ld      tp, r20
+#else
+        move    tp, zero
+#endif
 #ifdef __COLLECT_LINKER_FEEDBACK__
        /*
         * Notify the feedback routines that we were in the
@@ -513,21 +536,6 @@ intvec_\vecname:
 #endif
        /*
-         * we've captured enough state to the stack (including in
-         * particular our EX_CONTEXT state) that we can now release
-         * the interrupt critical section and replace it with our
-         * standard "interrupts disabled" mask value.  This allows
-         * synchronous interrupts (and profile interrupts) to punch
-         * through from this point onwards.
-         */
-        .ifc \function,handle_nmi
-        IRQ_DISABLE_ALL(r20)
-        .else
-        IRQ_DISABLE(r20, r21)
-        .endif
-        mtspr   INTERRUPT_CRITICAL_SECTION, zero
-        /*
         * Prepare the first 256 stack bytes to be rapidly accessible
         * without having to fetch the background data.
         */
@@ -736,9 +744,10 @@ STD_ENTRY(interrupt_return)
        beqzt   r30, .Lrestore_regs
        j       3f
 2:      TRACE_IRQS_ON
+        IRQ_ENABLE_LOAD(r20, r21)
        movei   r0, 1
        mtspr   INTERRUPT_CRITICAL_SECTION, r0
-        IRQ_ENABLE(r20, r21)
+        IRQ_ENABLE_APPLY(r20, r21)
        beqzt   r30, .Lrestore_regs
 3:
@@ -755,7 +764,6 @@ STD_ENTRY(interrupt_return)
         * that will save some cycles if this turns out to be a syscall.
         */
 .Lrestore_regs:
-        FEEDBACK_REENTER(interrupt_return)   /* called from elsewhere */
        /*
         * Rotate so we have one high bit and one low bit to test.
@@ -1249,7 +1257,7 @@ STD_ENTRY(fill_ra_stack)
        int_hand     INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign
        int_hand     INT_DTLB_MISS, DTLB_MISS, do_page_fault
        int_hand     INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault
-        int_hand     INT_IDN_FIREWALL, IDN_FIREWALL, bad_intr
+        int_hand     INT_IDN_FIREWALL, IDN_FIREWALL, do_hardwall_trap
        int_hand     INT_UDN_FIREWALL, UDN_FIREWALL, do_hardwall_trap
        int_hand     INT_TILE_TIMER, TILE_TIMER, do_timer_interrupt
        int_hand     INT_IDN_TIMER, IDN_TIMER, bad_intr
diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c
index 6255f2eab112..f0b54a934712 100644
--- a/arch/tile/kernel/machine_kexec.c
+++ b/arch/tile/kernel/machine_kexec.c
@@ -31,6 +31,8 @@
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 #include <asm/checksum.h>
+#include <asm/tlbflush.h>
+#include <asm/homecache.h>
 #include <hv/hypervisor.h>
@@ -222,11 +224,22 @@ struct page *kimage_alloc_pages_arch(gfp_t gfp_mask, unsigned int order)
        return alloc_pages_node(0, gfp_mask, order);
 }
+/*
+ * Address range in which pa=va mapping is set in setup_quasi_va_is_pa().
+ * For tilepro, PAGE_OFFSET is used since this is the largest possbile value
+ * for tilepro, while for tilegx, we limit it to entire middle level page
+ * table which we assume has been allocated and is undoubtedly large enough.
+ */
+#ifndef __tilegx__
+#define QUASI_VA_IS_PA_ADDR_RANGE PAGE_OFFSET
+#else
+#define QUASI_VA_IS_PA_ADDR_RANGE PGDIR_SIZE
+#endif
 static void setup_quasi_va_is_pa(void)
 {
-        HV_PTE *pgtable;
        HV_PTE pte;
-        int i;
+        unsigned long i;
        /*
         * Flush our TLB to prevent conflicts between the previous contents
@@ -234,16 +247,22 @@ static void setup_quasi_va_is_pa(void)
         */
        local_flush_tlb_all();
-        /* setup VA is PA, at least up to PAGE_OFFSET */
+        /*
+         * setup VA is PA, at least up to QUASI_VA_IS_PA_ADDR_RANGE.
-        pgtable = (HV_PTE *)current->mm->pgd;
+         * Note here we assume that level-1 page table is defined by
+         * HPAGE_SIZE.
+         */
        pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE);
        pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
+        for (i = 0; i < (QUASI_VA_IS_PA_ADDR_RANGE >> HPAGE_SHIFT); i++) {
-        for (i = 0; i < pgd_index(PAGE_OFFSET); i++) {
+                unsigned long vaddr = i << HPAGE_SHIFT;
+                pgd_t *pgd = pgd_offset(current->mm, vaddr);
+                pud_t *pud = pud_offset(pgd, vaddr);
+                pte_t *ptep = (pte_t *) pmd_offset(pud, vaddr);
                unsigned long pfn = i << (HPAGE_SHIFT - PAGE_SHIFT);
                if (pfn_valid(pfn))
-                        __set_pte(&pgtable[i], pfn_pte(pfn, pte));
+                        __set_pte(ptep, pfn_pte(pfn, pte));
        }
 }
@@ -251,6 +270,7 @@ static void setup_quasi_va_is_pa(void)
 void machine_kexec(struct kimage *image)
 {
        void *reboot_code_buffer;
+        pte_t *ptep;
        void (*rnk)(unsigned long, void *, unsigned long)
                __noreturn;
@@ -266,8 +286,10 @@ void machine_kexec(struct kimage *image)
         */
        homecache_change_page_home(image->control_code_page, 0,
                                   smp_processor_id());
-        reboot_code_buffer = vmap(&image->control_code_page, 1, 0,
+        reboot_code_buffer = page_address(image->control_code_page);
-                                  __pgprot(_PAGE_KERNEL | _PAGE_EXECUTABLE));
+        BUG_ON(reboot_code_buffer == NULL);
+        ptep = virt_to_pte(NULL, (unsigned long)reboot_code_buffer);
+        __set_pte(ptep, pte_mkexec(*ptep));
        memcpy(reboot_code_buffer, relocate_new_kernel,
               relocate_new_kernel_size);
        __flush_icache_range(
diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
index 98d476920106..001cbfa10ac6 100644
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c
@@ -159,7 +159,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs,
                switch (ELF_R_TYPE(rel[i].r_info)) {
-#define MUNGE(func) (*location = ((*location & ~func(-1)) | func(value)))
+#ifdef __LITTLE_ENDIAN
+# define MUNGE(func) \
+        (*location = ((*location & ~func(-1)) | func(value)))
+#else
+/*
+ * Instructions are always little-endian, so when we read them as data,
+ * we have to swap them around before and after modifying them.
+ */
+# define MUNGE(func) \
+        (*location = swab64((swab64(*location) & ~func(-1)) | func(value)))
+#endif
 #ifndef __tilegx__
                case R_TILE_32:
diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c
index 446a7f52cc11..dafc447b5125 100644
--- a/arch/tile/kernel/proc.c
+++ b/arch/tile/kernel/proc.c
@@ -22,6 +22,7 @@
 #include <linux/proc_fs.h>
 #include <linux/sysctl.h>
 #include <linux/hardirq.h>
+#include <linux/hugetlb.h>
 #include <linux/mman.h>
 #include <asm/unaligned.h>
 #include <asm/pgtable.h>
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index f572c19c4082..ba1023d8a021 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -128,10 +128,10 @@ void arch_release_thread_info(struct thread_info *info)
         * Calling deactivate here just frees up the data structures.
         * If the task we're freeing held the last reference to a
         * hardwall fd, it would have been released prior to this point
-         * anyway via exit_files(), and "hardwall" would be NULL by now.
+         * anyway via exit_files(), and the hardwall_task.info pointers
+         * would be NULL by now.
         */
-        if (info->task->thread.hardwall)
+        hardwall_deactivate_all(info->task);
-                hardwall_deactivate(info->task);
 #endif
        if (step_state) {
@@ -245,7 +245,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 #ifdef CONFIG_HARDWALL
        /* New thread does not own any networks. */
-        p->thread.hardwall = NULL;
+        memset(&p->thread.hardwall[0], 0,
+               sizeof(struct hardwall_task) * HARDWALL_TYPES);
 #endif
@@ -515,12 +516,7 @@ struct task_struct *__sched _switch_to(struct task_struct *prev,
 #ifdef CONFIG_HARDWALL
        /* Enable or disable access to the network registers appropriately. */
-        if (prev->thread.hardwall != NULL) {
+        hardwall_switch_tasks(prev, next);
-                if (next->thread.hardwall == NULL)
-                        restrict_network_mpls();
-        } else if (next->thread.hardwall != NULL) {
-                grant_network_mpls();
-        }
 #endif
        /*
diff --git a/arch/tile/kernel/relocate_kernel.S b/arch/tile/kernel/relocate_kernel_32.S
index 010b418515f8..010b418515f8 100644
--- a/arch/tile/kernel/relocate_kernel.S
+++ b/arch/tile/kernel/relocate_kernel_32.S
diff --git a/arch/tile/kernel/relocate_kernel_64.S b/arch/tile/kernel/relocate_kernel_64.S
new file mode 100644
index 000000000000..1c09a4f5a4ea
--- /dev/null
+++ b/arch/tile/kernel/relocate_kernel_64.S
@@ -0,0 +1,260 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * copy new kernel into place and then call hv_reexec
+ *
+ */
+#include <linux/linkage.h>
+#include <arch/chip.h>
+#include <asm/page.h>
+#include <hv/hypervisor.h>
+#undef RELOCATE_NEW_KERNEL_VERBOSE
+STD_ENTRY(relocate_new_kernel)
+        move    r30, r0         /* page list */
+        move    r31, r1         /* address of page we are on */
+        move    r32, r2         /* start address of new kernel */
+        shrui   r1, r1, PAGE_SHIFT
+        addi    r1, r1, 1
+        shli    sp, r1, PAGE_SHIFT
+        addi    sp, sp, -8
+        /* we now have a stack (whether we need one or not) */
+        moveli  r40, hw2_last(hv_console_putc)
+        shl16insli r40, r40, hw1(hv_console_putc)
+        shl16insli r40, r40, hw0(hv_console_putc)
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+        moveli  r0, 'r'
+        jalr    r40
+        moveli  r0, '_'
+        jalr    r40
+        moveli  r0, 'n'
+        jalr    r40
+        moveli  r0, '_'
+        jalr    r40
+        moveli  r0, 'k'
+        jalr    r40
+        moveli  r0, '\n'
+        jalr    r40
+#endif
+        /*
+         * Throughout this code r30 is pointer to the element of page
+         * list we are working on.
+         *
+         * Normally we get to the next element of the page list by
+         * incrementing r30 by eight.  The exception is if the element
+         * on the page list is an IND_INDIRECTION in which case we use
+         * the element with the low bits masked off as the new value
+         * of r30.
+         *
+         * To get this started, we need the value passed to us (which
+         * will always be an IND_INDIRECTION) in memory somewhere with
+         * r30 pointing at it.  To do that, we push the value passed
+         * to us on the stack and make r30 point to it.
+         */
+        st      sp, r30
+        move    r30, sp
+        addi    sp, sp, -16
+#if CHIP_HAS_CBOX_HOME_MAP()
+        /*
+         * On TILE-GX, we need to flush all tiles' caches, since we may
+         * have been doing hash-for-home caching there.  Note that we
+         * must do this _after_ we're completely done modifying any memory
+         * other than our output buffer (which we know is locally cached).
+         * We want the caches to be fully clean when we do the reexec,
+         * because the hypervisor is going to do this flush again at that
+         * point, and we don't want that second flush to overwrite any memory.
+         */
+        {
+         move   r0, zero         /* cache_pa */
+         moveli r1, hw2_last(HV_FLUSH_EVICT_L2)
+        }
+        {
+         shl16insli     r1, r1, hw1(HV_FLUSH_EVICT_L2)
+         movei  r2, -1           /* cache_cpumask; -1 means all client tiles */
+        }
+        {
+         shl16insli     r1, r1, hw0(HV_FLUSH_EVICT_L2)  /* cache_control */
+         move   r3, zero         /* tlb_va */
+        }
+        {
+         move   r4, zero         /* tlb_length */
+         move   r5, zero         /* tlb_pgsize */
+        }
+        {
+         move   r6, zero         /* tlb_cpumask */
+         move   r7, zero         /* asids */
+        }
+        {
+         moveli r20, hw2_last(hv_flush_remote)
+         move   r8, zero         /* asidcount */
+        }
+        shl16insli      r20, r20, hw1(hv_flush_remote)
+        shl16insli      r20, r20, hw0(hv_flush_remote)
+        jalr    r20
+#endif
+        /* r33 is destination pointer, default to zero */
+        moveli  r33, 0
+.Lloop: ld      r10, r30
+        andi    r9, r10, 0xf    /* low 4 bits tell us what type it is */
+        xor     r10, r10, r9    /* r10 is now value with low 4 bits stripped */
+        cmpeqi  r0, r9, 0x1     /* IND_DESTINATION */
+        beqzt   r0, .Ltry2
+        move    r33, r10
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+        moveli  r0, 'd'
+        jalr    r40
+#endif
+        addi    r30, r30, 8
+        j       .Lloop
+.Ltry2:
+        cmpeqi  r0, r9, 0x2     /* IND_INDIRECTION */
+        beqzt   r0, .Ltry4
+        move    r30, r10
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+        moveli  r0, 'i'
+        jalr    r40
+#endif
+        j       .Lloop
+.Ltry4:
+        cmpeqi  r0, r9, 0x4     /* IND_DONE */
+        beqzt   r0, .Ltry8
+        mf
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+        moveli  r0, 'D'
+        jalr    r40
+        moveli  r0, '\n'
+        jalr    r40
+#endif
+        move    r0, r32
+        moveli  r41, hw2_last(hv_reexec)
+        shl16insli      r41, r41, hw1(hv_reexec)
+        shl16insli      r41, r41, hw0(hv_reexec)
+        jalr    r41
+        /* we should not get here */
+        moveli  r0, '?'
+        jalr    r40
+        moveli  r0, '\n'
+        jalr    r40
+        j       .Lhalt
+.Ltry8: cmpeqi  r0, r9, 0x8     /* IND_SOURCE */
+        beqz    r0, .Lerr       /* unknown type */
+        /* copy page at r10 to page at r33 */
+        move    r11, r33
+        moveli  r0, hw2_last(PAGE_SIZE)
+        shl16insli      r0, r0, hw1(PAGE_SIZE)
+        shl16insli      r0, r0, hw0(PAGE_SIZE)
+        add     r33, r33, r0
+        /* copy word at r10 to word at r11 until r11 equals r33 */
+        /* We know page size must be multiple of 8, so we can unroll
+         * 8 times safely without any edge case checking.
+         *
+         * Issue a flush of the destination every 8 words to avoid
+         * incoherence when starting the new kernel.  (Now this is
+         * just good paranoia because the hv_reexec call will also
+         * take care of this.)
+         */
+1:
+        { ld    r0, r10; addi   r10, r10, 8 }
+        { st    r11, r0; addi   r11, r11, 8 }
+        { ld    r0, r10; addi   r10, r10, 8 }
+        { st    r11, r0; addi   r11, r11, 8 }
+        { ld    r0, r10; addi   r10, r10, 8 }
+        { st    r11, r0; addi   r11, r11, 8 }
+        { ld    r0, r10; addi   r10, r10, 8 }
+        { st    r11, r0; addi   r11, r11, 8 }
+        { ld    r0, r10; addi   r10, r10, 8 }
+        { st    r11, r0; addi   r11, r11, 8 }
+        { ld    r0, r10; addi   r10, r10, 8 }
+        { st    r11, r0; addi   r11, r11, 8 }
+        { ld    r0, r10; addi   r10, r10, 8 }
+        { st    r11, r0; addi   r11, r11, 8 }
+        { ld    r0, r10; addi   r10, r10, 8 }
+        { st    r11, r0 }
+        { flush r11    ; addi   r11, r11, 8 }
+        cmpeq   r0, r33, r11
+        beqzt   r0, 1b
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+        moveli  r0, 's'
+        jalr    r40
+#endif
+        addi    r30, r30, 8
+        j       .Lloop
+.Lerr:  moveli  r0, 'e'
+        jalr    r40
+        moveli  r0, 'r'
+        jalr    r40
+        moveli  r0, 'r'
+        jalr    r40
+        moveli  r0, '\n'
+        jalr    r40
+.Lhalt:
+        moveli r41, hw2_last(hv_halt)
+        shl16insli r41, r41, hw1(hv_halt)
+        shl16insli r41, r41, hw0(hv_halt)
+        jalr    r41
+        STD_ENDPROC(relocate_new_kernel)
+        .section .rodata,"a"
+        .globl relocate_new_kernel_size
+relocate_new_kernel_size:
+        .long .Lend_relocate_new_kernel - relocate_new_kernel
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 98d80eb49ddb..6098ccc59be2 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -28,6 +28,7 @@
 #include <linux/highmem.h>
 #include <linux/smp.h>
 #include <linux/timex.h>
+#include <linux/hugetlb.h>
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
@@ -49,9 +50,6 @@ char chip_model[64] __write_once;
 struct pglist_data node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
-/* We only create bootmem data on node 0. */
-static bootmem_data_t __initdata node0_bdata;
 /* Information on the NUMA nodes that we compute early */
 unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES];
 unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES];
@@ -534,37 +532,96 @@ static void __init setup_memory(void)
 #endif
 }
-static void __init setup_bootmem_allocator(void)
+/*
+ * On 32-bit machines, we only put bootmem on the low controller,
+ * since PAs > 4GB can't be used in bootmem.  In principle one could
+ * imagine, e.g., multiple 1 GB controllers all of which could support
+ * bootmem, but in practice using controllers this small isn't a
+ * particularly interesting scenario, so we just keep it simple and
+ * use only the first controller for bootmem on 32-bit machines.
+ */
+static inline int node_has_bootmem(int nid)
 {
-        unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn;
+#ifdef CONFIG_64BIT
+        return 1;
+#else
+        return nid == 0;
+#endif
+}
-        /* Provide a node 0 bdata. */
+static inline unsigned long alloc_bootmem_pfn(int nid,
-        NODE_DATA(0)->bdata = &node0_bdata;
+                                              unsigned long size,
+                                              unsigned long goal)
+{
+        void *kva = __alloc_bootmem_node(NODE_DATA(nid), size,
+                                         PAGE_SIZE, goal);
+        unsigned long pfn = kaddr_to_pfn(kva);
+        BUG_ON(goal && PFN_PHYS(pfn) != goal);
+        return pfn;
+}
-#ifdef CONFIG_PCI
+static void __init setup_bootmem_allocator_node(int i)
-        /* Don't let boot memory alias the PCI region. */
+{
-        last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn);
+        unsigned long start, end, mapsize, mapstart;
+        if (node_has_bootmem(i)) {
+                NODE_DATA(i)->bdata = &bootmem_node_data[i];
+        } else {
+                /* Share controller zero's bdata for now. */
+                NODE_DATA(i)->bdata = &bootmem_node_data[0];
+                return;
+        }
+        /* Skip up to after the bss in node 0. */
+        start = (i == 0) ? min_low_pfn : node_start_pfn[i];
+        /* Only lowmem, if we're a HIGHMEM build. */
+#ifdef CONFIG_HIGHMEM
+        end = node_lowmem_end_pfn[i];
 #else
-        last_alloc_pfn = max_low_pfn;
+        end = node_end_pfn[i];
 #endif
-        /*
+        /* No memory here. */
-         * Initialize the boot-time allocator (with low memory only):
+        if (end == start)
-         * The first argument says where to put the bitmap, and the
+                return;
-         * second says where the end of allocatable memory is.
-         */
+        /* Figure out where the bootmem bitmap is located. */
-        bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn);
+        mapsize = bootmem_bootmap_pages(end - start);
+        if (i == 0) {
+                /* Use some space right before the heap on node 0. */
+                mapstart = start;
+                start += mapsize;
+        } else {
+                /* Allocate bitmap on node 0 to avoid page table issues. */
+                mapstart = alloc_bootmem_pfn(0, PFN_PHYS(mapsize), 0);
+        }
+        /* Initialize a node. */
+        init_bootmem_node(NODE_DATA(i), mapstart, start, end);
+        /* Free all the space back into the allocator. */
+        free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start));
+#if defined(CONFIG_PCI)
        /*
-         * Let the bootmem allocator use all the space we've given it
+         * Throw away any memory aliased by the PCI region.  FIXME: this
-         * except for its own bitmap.
+         * is a temporary hack to work around bug 10502, and needs to be
+         * fixed properly.
         */
-        first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size);
+        if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start)
-        if (first_alloc_pfn >= last_alloc_pfn)
+                reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn),
-                early_panic("Not enough memory on controller 0 for bootmem\n");
+                                PFN_PHYS(pci_reserve_end_pfn -
+                                         pci_reserve_start_pfn),
+                                BOOTMEM_EXCLUSIVE);
+#endif
+}
-        free_bootmem(PFN_PHYS(first_alloc_pfn),
+static void __init setup_bootmem_allocator(void)
-                     PFN_PHYS(last_alloc_pfn - first_alloc_pfn));
+{
+        int i;
+        for (i = 0; i < MAX_NUMNODES; ++i)
+                setup_bootmem_allocator_node(i);
 #ifdef CONFIG_KEXEC
        if (crashk_res.start != crashk_res.end)
@@ -595,14 +652,6 @@ static int __init percpu_size(void)
        return size;
 }
-static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal)
-{
-        void *kva = __alloc_bootmem(size, PAGE_SIZE, goal);
-        unsigned long pfn = kaddr_to_pfn(kva);
-        BUG_ON(goal && PFN_PHYS(pfn) != goal);
-        return pfn;
-}
 static void __init zone_sizes_init(void)
 {
        unsigned long zones_size[MAX_NR_ZONES] = { 0 };
@@ -640,21 +689,22 @@ static void __init zone_sizes_init(void)
                 * though, there'll be no lowmem, so we just alloc_bootmem
                 * the memmap.  There will be no percpu memory either.
                 */
-                if (__pfn_to_highbits(start) == 0) {
+                if (i != 0 && cpu_isset(i, isolnodes)) {
-                        /* In low PAs, allocate via bootmem. */
+                        node_memmap_pfn[i] =
+                                alloc_bootmem_pfn(0, memmap_size, 0);
+                        BUG_ON(node_percpu[i] != 0);
+                } else if (node_has_bootmem(start)) {
                        unsigned long goal = 0;
                        node_memmap_pfn[i] =
-                                alloc_bootmem_pfn(memmap_size, goal);
+                                alloc_bootmem_pfn(i, memmap_size, 0);
                        if (kdata_huge)
                                goal = PFN_PHYS(lowmem_end) - node_percpu[i];
                        if (node_percpu[i])
                                node_percpu_pfn[i] =
-                                    alloc_bootmem_pfn(node_percpu[i], goal);
+                                        alloc_bootmem_pfn(i, node_percpu[i],
-                } else if (cpu_isset(i, isolnodes)) {
+                                                          goal);
-                        node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0);
-                        BUG_ON(node_percpu[i] != 0);
                } else {
-                        /* In high PAs, just reserve some pages. */
+                        /* In non-bootmem zones, just reserve some pages. */
                        node_memmap_pfn[i] = node_free_pfn[i];
                        node_free_pfn[i] += PFN_UP(memmap_size);
                        if (!kdata_huge) {
@@ -678,16 +728,9 @@ static void __init zone_sizes_init(void)
                zones_size[ZONE_NORMAL] = end - start;
 #endif
-                /*
+                /* Take zone metadata from controller 0 if we're isolnode. */
-                 * Everyone shares node 0's bootmem allocator, but
+                if (node_isset(i, isolnodes))
-                 * we use alloc_remap(), above, to put the actual
+                        NODE_DATA(i)->bdata = &bootmem_node_data[0];
-                 * struct page array on the individual controllers,
-                 * which is most of the data that we actually care about.
-                 * We can't place bootmem allocators on the other
-                 * controllers since the bootmem allocator can only
-                 * operate on 32-bit physical addresses.
-                 */
-                NODE_DATA(i)->bdata = NODE_DATA(0)->bdata;
                free_area_init_node(i, zones_size, start, NULL);
                printk(KERN_DEBUG "  Normal zone: %ld per-cpu pages\n",
@@ -870,6 +913,22 @@ subsys_initcall(topology_init);
 #endif /* CONFIG_NUMA */
+/*
+ * Initialize hugepage support on this cpu.  We do this on all cores
+ * early in boot: before argument parsing for the boot cpu, and after
+ * argument parsing but before the init functions run on the secondaries.
+ * So the values we set up here in the hypervisor may be overridden on
+ * the boot cpu as arguments are parsed.
+ */
+static __cpuinit void init_super_pages(void)
+{
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+        int i;
+        for (i = 0; i < HUGE_SHIFT_ENTRIES; ++i)
+                hv_set_pte_super_shift(i, huge_shift[i]);
+#endif
+}
 /**
 * setup_cpu() - Do all necessary per-cpu, tile-specific initialization.
 * @boot: Is this the boot cpu?
@@ -924,6 +983,8 @@ void __cpuinit setup_cpu(int boot)
        /* Reset the network state on this cpu. */
        reset_network_state();
 #endif
+        init_super_pages();
 }
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -1412,13 +1473,13 @@ void __init setup_per_cpu_areas(void)
                for (i = 0; i < size; i += PAGE_SIZE, ++pfn, ++pg) {
                        /* Update the vmalloc mapping and page home. */
-                        pte_t *ptep =
+                        unsigned long addr = (unsigned long)ptr + i;
-                                virt_to_pte(NULL, (unsigned long)ptr + i);
+                        pte_t *ptep = virt_to_pte(NULL, addr);
                        pte_t pte = *ptep;
                        BUG_ON(pfn != pte_pfn(pte));
                        pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3);
                        pte = set_remote_cache_cpu(pte, cpu);
-                        set_pte(ptep, pte);
+                        set_pte_at(&init_mm, addr, ptep, pte);
                        /* Update the lowmem mapping for consistency. */
                        lowmem_va = (unsigned long)pfn_to_kaddr(pfn);
@@ -1431,7 +1492,7 @@ void __init setup_per_cpu_areas(void)
                                BUG_ON(pte_huge(*ptep));
                        }
                        BUG_ON(pfn != pte_pfn(*ptep));
-                        set_pte(ptep, pte);
+                        set_pte_at(&init_mm, lowmem_va, ptep, pte);
                }
        }
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
index 89529c9f0605..27742e87e255 100644
--- a/arch/tile/kernel/single_step.c
+++ b/arch/tile/kernel/single_step.c
@@ -172,9 +172,6 @@ static tile_bundle_bits rewrite_load_store_unaligned(
                return (tilepro_bundle_bits) 0;
        }
-#ifndef __LITTLE_ENDIAN
-# error We assume little-endian representation with copy_xx_user size 2 here
-#endif
        /* Handle unaligned load/store */
        if (mem_op == MEMOP_LOAD || mem_op == MEMOP_LOAD_POSTINCR) {
                unsigned short val_16;
@@ -195,8 +192,19 @@ static tile_bundle_bits rewrite_load_store_unaligned(
                        state->update = 1;
                }
        } else {
+                unsigned short val_16;
                val = (val_reg == TREG_ZERO) ? 0 : regs->regs[val_reg];
-                err = copy_to_user(addr, &val, size);
+                switch (size) {
+                case 2:
+                        val_16 = val;
+                        err = copy_to_user(addr, &val_16, sizeof(val_16));
+                        break;
+                case 4:
+                        err = copy_to_user(addr, &val, sizeof(val));
+                        break;
+                default:
+                        BUG();
+                }
        }
        if (err) {
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index 91da0f721958..cbc73a8b8fe1 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -203,7 +203,7 @@ void __init ipi_init(void)
                if (hv_get_ipi_pte(tile, KERNEL_PL, &pte) != 0)
                        panic("Failed to initialize IPI for cpu %d\n", cpu);
-                offset = hv_pte_get_pfn(pte) << PAGE_SHIFT;
+                offset = PFN_PHYS(pte_pfn(pte));
                ipi_mappings[cpu] = ioremap_prot(offset, PAGE_SIZE, pte);
        }
 #endif
diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c
index cb44ba7ccd2d..b08095b402d6 100644
--- a/arch/tile/kernel/sys.c
+++ b/arch/tile/kernel/sys.c
@@ -32,11 +32,17 @@
 #include <asm/syscalls.h>
 #include <asm/pgtable.h>
 #include <asm/homecache.h>
+#include <asm/cachectl.h>
 #include <arch/chip.h>
-SYSCALL_DEFINE0(flush_cache)
+SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, len,
+                unsigned long, flags)
 {
-        homecache_evict(cpumask_of(smp_processor_id()));
+        if (flags & DCACHE)
+                homecache_evict(cpumask_of(smp_processor_id()));
+        if (flags & ICACHE)
+                flush_remote(0, HV_FLUSH_EVICT_L1I, mm_cpumask(current->mm),
+                             0, 0, 0, NULL, NULL, 0);
        return 0;
 }
diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c
index 71ae728e9d0b..e25b0a89c18f 100644
--- a/arch/tile/kernel/sysfs.c
+++ b/arch/tile/kernel/sysfs.c
@@ -93,6 +93,10 @@ HV_CONF_ATTR(mezz_part,		HV_CONFSTR_MEZZ_PART_NUM)
 HV_CONF_ATTR(mezz_serial,       HV_CONFSTR_MEZZ_SERIAL_NUM)
 HV_CONF_ATTR(mezz_revision,     HV_CONFSTR_MEZZ_REV)
 HV_CONF_ATTR(mezz_description,  HV_CONFSTR_MEZZ_DESC)
+HV_CONF_ATTR(cpumod_part,       HV_CONFSTR_CPUMOD_PART_NUM)
+HV_CONF_ATTR(cpumod_serial,     HV_CONFSTR_CPUMOD_SERIAL_NUM)
+HV_CONF_ATTR(cpumod_revision,   HV_CONFSTR_CPUMOD_REV)
+HV_CONF_ATTR(cpumod_description,HV_CONFSTR_CPUMOD_DESC)
 HV_CONF_ATTR(switch_control,    HV_CONFSTR_SWITCH_CONTROL)
 static struct attribute *board_attrs[] = {
@@ -104,6 +108,10 @@ static struct attribute *board_attrs[] = {
        &dev_attr_mezz_serial.attr,
        &dev_attr_mezz_revision.attr,
        &dev_attr_mezz_description.attr,
+        &dev_attr_cpumod_part.attr,
+        &dev_attr_cpumod_serial.attr,
+        &dev_attr_cpumod_revision.attr,
+        &dev_attr_cpumod_description.attr,
        &dev_attr_switch_control.attr,
        NULL
 };
diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c
index a5f241c24cac..3fd54d5bbd4c 100644
--- a/arch/tile/kernel/tlb.c
+++ b/arch/tile/kernel/tlb.c
@@ -15,6 +15,7 @@
 #include <linux/cpumask.h>
 #include <linux/module.h>
+#include <linux/hugetlb.h>
 #include <asm/tlbflush.h>
 #include <asm/homecache.h>
 #include <hv/hypervisor.h>
@@ -49,25 +50,25 @@ void flush_tlb_current_task(void)
        flush_tlb_mm(current->mm);
 }
-void flush_tlb_page_mm(const struct vm_area_struct *vma, struct mm_struct *mm,
+void flush_tlb_page_mm(struct vm_area_struct *vma, struct mm_struct *mm,
                       unsigned long va)
 {
-        unsigned long size = hv_page_size(vma);
+        unsigned long size = vma_kernel_pagesize(vma);
        int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0;
        flush_remote(0, cache, mm_cpumask(mm),
                     va, size, size, mm_cpumask(mm), NULL, 0);
 }
-void flush_tlb_page(const struct vm_area_struct *vma, unsigned long va)
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 {
        flush_tlb_page_mm(vma, vma->vm_mm, va);
 }
 EXPORT_SYMBOL(flush_tlb_page);
-void flush_tlb_range(const struct vm_area_struct *vma,
+void flush_tlb_range(struct vm_area_struct *vma,
                     unsigned long start, unsigned long end)
 {
-        unsigned long size = hv_page_size(vma);
+        unsigned long size = vma_kernel_pagesize(vma);
        struct mm_struct *mm = vma->vm_mm;
        int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0;
        flush_remote(0, cache, mm_cpumask(mm), start, end - start, size,
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 73cff814ac57..5b19a23c8908 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -195,6 +195,25 @@ static int special_ill(bundle_bits bundle, int *sigp, int *codep)
        return 1;
 }
+static const char *const int_name[] = {
+        [INT_MEM_ERROR] = "Memory error",
+        [INT_ILL] = "Illegal instruction",
+        [INT_GPV] = "General protection violation",
+        [INT_UDN_ACCESS] = "UDN access",
+        [INT_IDN_ACCESS] = "IDN access",
+#if CHIP_HAS_SN()
+        [INT_SN_ACCESS] = "SN access",
+#endif
+        [INT_SWINT_3] = "Software interrupt 3",
+        [INT_SWINT_2] = "Software interrupt 2",
+        [INT_SWINT_0] = "Software interrupt 0",
+        [INT_UNALIGN_DATA] = "Unaligned data",
+        [INT_DOUBLE_FAULT] = "Double fault",
+#ifdef __tilegx__
+        [INT_ILL_TRANS] = "Illegal virtual address",
+#endif
+};
 void __kprobes do_trap(struct pt_regs *regs, int fault_num,
                       unsigned long reason)
 {
@@ -211,10 +230,17 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
         * current process and hope for the best.
         */
        if (!user_mode(regs)) {
+                const char *name;
                if (fixup_exception(regs))  /* only UNALIGN_DATA in practice */
                        return;
-                pr_alert("Kernel took bad trap %d at PC %#lx\n",
+                if (fault_num >= 0 &&
-                       fault_num, regs->pc);
+                    fault_num < sizeof(int_name)/sizeof(int_name[0]) &&
+                    int_name[fault_num] != NULL)
+                        name = int_name[fault_num];
+                else
+                        name = "Unknown interrupt";
+                pr_alert("Kernel took bad trap %d (%s) at PC %#lx\n",
+                         fault_num, name, regs->pc);
                if (fault_num == INT_GPV)
                        pr_alert("GPV_REASON is %#lx\n", reason);
                show_regs(regs);
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 771b251b409d..f5cada70c3c8 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -18,7 +18,6 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/atomic.h>
-#include <asm/futex.h>
 #include <arch/chip.h>
 /* See <asm/atomic_32.h> */
@@ -50,7 +49,7 @@ int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss;
 #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-static inline int *__atomic_hashed_lock(volatile void *v)
+int *__atomic_hashed_lock(volatile void *v)
 {
        /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */
 #if ATOMIC_LOCKS_FOUND_VIA_TABLE()
@@ -191,47 +190,6 @@ u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
 EXPORT_SYMBOL(_atomic64_cmpxchg);
-static inline int *__futex_setup(int __user *v)
-{
-        /*
-         * Issue a prefetch to the counter to bring it into cache.
-         * As for __atomic_setup, but we can't do a read into the L1
-         * since it might fault; instead we do a prefetch into the L2.
-         */
-        __insn_prefetch(v);
-        return __atomic_hashed_lock((int __force *)v);
-}
-struct __get_user futex_set(u32 __user *v, int i)
-{
-        return __atomic_xchg((int __force *)v, __futex_setup(v), i);
-}
-struct __get_user futex_add(u32 __user *v, int n)
-{
-        return __atomic_xchg_add((int __force *)v, __futex_setup(v), n);
-}
-struct __get_user futex_or(u32 __user *v, int n)
-{
-        return __atomic_or((int __force *)v, __futex_setup(v), n);
-}
-struct __get_user futex_andn(u32 __user *v, int n)
-{
-        return __atomic_andn((int __force *)v, __futex_setup(v), n);
-}
-struct __get_user futex_xor(u32 __user *v, int n)
-{
-        return __atomic_xor((int __force *)v, __futex_setup(v), n);
-}
-struct __get_user futex_cmpxchg(u32 __user *v, int o, int n)
-{
-        return __atomic_cmpxchg((int __force *)v, __futex_setup(v), o, n);
-}
 /*
 * If any of the atomic or futex routines hit a bad address (not in
 * the page tables at kernel PL) this routine is called.  The futex
@@ -323,7 +281,4 @@ void __init __init_atomic_per_cpu(void)
        BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE);
 #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-        /* The futex code makes this assumption, so we validate it here. */
-        BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int));
 }
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index 2a81d32de0da..dd5f0a33fdaf 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -18,14 +18,6 @@
 /* arch/tile/lib/usercopy.S */
 #include <linux/uaccess.h>
-EXPORT_SYMBOL(__get_user_1);
-EXPORT_SYMBOL(__get_user_2);
-EXPORT_SYMBOL(__get_user_4);
-EXPORT_SYMBOL(__get_user_8);
-EXPORT_SYMBOL(__put_user_1);
-EXPORT_SYMBOL(__put_user_2);
-EXPORT_SYMBOL(__put_user_4);
-EXPORT_SYMBOL(__put_user_8);
 EXPORT_SYMBOL(strnlen_user_asm);
 EXPORT_SYMBOL(strncpy_from_user_asm);
 EXPORT_SYMBOL(clear_user_asm);
diff --git a/arch/tile/lib/memchr_64.c b/arch/tile/lib/memchr_64.c
index 84fdc8d8e735..6f867dbf7c56 100644
--- a/arch/tile/lib/memchr_64.c
+++ b/arch/tile/lib/memchr_64.c
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/module.h>
+#include "string-endian.h"
 void *memchr(const void *s, int c, size_t n)
 {
@@ -39,11 +40,8 @@ void *memchr(const void *s, int c, size_t n)
        /* Read the first word, but munge it so that bytes before the array
         * will not match goal.
-         *
-         * Note that this shift count expression works because we know
-         * shift counts are taken mod 64.
         */
-        before_mask = (1ULL << (s_int << 3)) - 1;
+        before_mask = MASK(s_int);
        v = (*p | before_mask) ^ (goal & before_mask);
        /* Compute the address of the last byte. */
@@ -65,7 +63,7 @@ void *memchr(const void *s, int c, size_t n)
        /* We found a match, but it might be in a byte past the end
         * of the array.
         */
-        ret = ((char *)p) + (__insn_ctz(bits) >> 3);
+        ret = ((char *)p) + (CFZ(bits) >> 3);
        return (ret <= last_byte_ptr) ? ret : NULL;
 }
 EXPORT_SYMBOL(memchr);
diff --git a/arch/tile/lib/memcpy_64.c b/arch/tile/lib/memcpy_64.c
index 3fab9a6a2bbe..c79b8e7c6828 100644
--- a/arch/tile/lib/memcpy_64.c
+++ b/arch/tile/lib/memcpy_64.c
@@ -15,7 +15,6 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/module.h>
-#define __memcpy memcpy
 /* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */
 /* Must be 8 bytes in size. */
@@ -188,6 +187,7 @@ int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
        /* n != 0 if we get here.  Write out any trailing bytes. */
        dst1 = (char *)dst8;
+#ifndef __BIG_ENDIAN__
        if (n & 4) {
                ST4((uint32_t *)dst1, final);
                dst1 += 4;
@@ -202,11 +202,30 @@ int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
        }
        if (n)
                ST1((uint8_t *)dst1, final);
+#else
+        if (n & 4) {
+                ST4((uint32_t *)dst1, final >> 32);
+                dst1 += 4;
+        }
+        else
+        {
+                final >>= 32;
+        }
+        if (n & 2) {
+                ST2((uint16_t *)dst1, final >> 16);
+                dst1 += 2;
+        }
+        else
+        {
+                final >>= 16;
+        }
+        if (n & 1)
+                ST1((uint8_t *)dst1, final >> 8);
+#endif
        return RETVAL;
 }
 #ifdef USERCOPY_FUNC
 #undef ST1
 #undef ST2
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c
index b2fe15e01075..3bc4b4e40d93 100644
--- a/arch/tile/lib/memcpy_tile64.c
+++ b/arch/tile/lib/memcpy_tile64.c
@@ -160,7 +160,7 @@ retry_source:
                        break;
                if (get_remote_cache_cpu(src_pte) == smp_processor_id())
                        break;
-                src_page = pfn_to_page(hv_pte_get_pfn(src_pte));
+                src_page = pfn_to_page(pte_pfn(src_pte));
                get_page(src_page);
                if (pte_val(src_pte) != pte_val(*src_ptep)) {
                        put_page(src_page);
@@ -168,7 +168,7 @@ retry_source:
                }
                if (pte_huge(src_pte)) {
                        /* Adjust the PTE to correspond to a small page */
-                        int pfn = hv_pte_get_pfn(src_pte);
+                        int pfn = pte_pfn(src_pte);
                        pfn += (((unsigned long)source & (HPAGE_SIZE-1))
                                >> PAGE_SHIFT);
                        src_pte = pfn_pte(pfn, src_pte);
@@ -188,7 +188,7 @@ retry_dest:
                        put_page(src_page);
                        break;
                }
-                dst_page = pfn_to_page(hv_pte_get_pfn(dst_pte));
+                dst_page = pfn_to_page(pte_pfn(dst_pte));
                if (dst_page == src_page) {
                        /*
                         * Source and dest are on the same page; this
@@ -206,7 +206,7 @@ retry_dest:
                }
                if (pte_huge(dst_pte)) {
                        /* Adjust the PTE to correspond to a small page */
-                        int pfn = hv_pte_get_pfn(dst_pte);
+                        int pfn = pte_pfn(dst_pte);
                        pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
                                >> PAGE_SHIFT);
                        dst_pte = pfn_pte(pfn, dst_pte);
diff --git a/arch/tile/lib/strchr_64.c b/arch/tile/lib/strchr_64.c
index 617a9273aaa8..f39f9dc422b0 100644
--- a/arch/tile/lib/strchr_64.c
+++ b/arch/tile/lib/strchr_64.c
@@ -15,8 +15,7 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/module.h>
+#include "string-endian.h"
-#undef strchr
 char *strchr(const char *s, int c)
 {
@@ -33,13 +32,9 @@ char *strchr(const char *s, int c)
         * match neither zero nor goal (we make sure the high bit of each
         * byte is 1, and the low 7 bits are all the opposite of the goal
         * byte).
-         *
-         * Note that this shift count expression works because we know shift
-         * counts are taken mod 64.
         */
-        const uint64_t before_mask = (1ULL << (s_int << 3)) - 1;
+        const uint64_t before_mask = MASK(s_int);
-        uint64_t v = (*p | before_mask) ^
+        uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui(before_mask, 1));
-                (goal & __insn_v1shrsi(before_mask, 1));
        uint64_t zero_matches, goal_matches;
        while (1) {
@@ -55,8 +50,8 @@ char *strchr(const char *s, int c)
                v = *++p;
        }
-        z = __insn_ctz(zero_matches);
+        z = CFZ(zero_matches);
-        g = __insn_ctz(goal_matches);
+        g = CFZ(goal_matches);
        /* If we found c before '\0' we got a match. Note that if c == '\0'
         * then g == z, and we correctly return the address of the '\0'
diff --git a/arch/tile/lib/string-endian.h b/arch/tile/lib/string-endian.h
new file mode 100644
index 000000000000..c0eed7ce69c3
--- /dev/null
+++ b/arch/tile/lib/string-endian.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Provide a mask based on the pointer alignment that
+ * sets up non-zero bytes before the beginning of the string.
+ * The MASK expression works because shift counts are taken mod 64.
+ * Also, specify how to count "first" and "last" bits
+ * when the bits have been read as a word.
+ */
+#include <asm/byteorder.h>
+#ifdef __LITTLE_ENDIAN
+#define MASK(x) (__insn_shl(1ULL, (x << 3)) - 1)
+#define NULMASK(x) ((2ULL << x) - 1)
+#define CFZ(x) __insn_ctz(x)
+#define REVCZ(x) __insn_clz(x)
+#else
+#define MASK(x) (__insn_shl(-2LL, ((-x << 3) - 1)))
+#define NULMASK(x) (-2LL << (63 - x))
+#define CFZ(x) __insn_clz(x)
+#define REVCZ(x) __insn_ctz(x)
+#endif
diff --git a/arch/tile/lib/strlen_64.c b/arch/tile/lib/strlen_64.c
index 1c92d46202a8..9583fc3361fa 100644
--- a/arch/tile/lib/strlen_64.c
+++ b/arch/tile/lib/strlen_64.c
@@ -15,8 +15,7 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/module.h>
+#include "string-endian.h"
-#undef strlen
 size_t strlen(const char *s)
 {
@@ -24,15 +23,13 @@ size_t strlen(const char *s)
        const uintptr_t s_int = (uintptr_t) s;
        const uint64_t *p = (const uint64_t *)(s_int & -8);
-        /* Read the first word, but force bytes before the string to be nonzero.
+        /* Read and MASK the first word. */
-         * This expression works because we know shift counts are taken mod 64.
+        uint64_t v = *p | MASK(s_int);
-         */
-        uint64_t v = *p | ((1ULL << (s_int << 3)) - 1);
        uint64_t bits;
        while ((bits = __insn_v1cmpeqi(v, 0)) == 0)
                v = *++p;
-        return ((const char *)p) + (__insn_ctz(bits) >> 3) - s;
+        return ((const char *)p) + (CFZ(bits) >> 3) - s;
 }
 EXPORT_SYMBOL(strlen);
diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S
index 979f76d83746..b62d002af009 100644
--- a/arch/tile/lib/usercopy_32.S
+++ b/arch/tile/lib/usercopy_32.S
@@ -19,82 +19,6 @@
 /* Access user memory, but use MMU to avoid propagating kernel exceptions. */
-        .pushsection .fixup,"ax"
-get_user_fault:
-        { move r0, zero; move r1, zero }
-        { movei r2, -EFAULT; jrp lr }
-        ENDPROC(get_user_fault)
-put_user_fault:
-        { movei r0, -EFAULT; jrp lr }
-        ENDPROC(put_user_fault)
-        .popsection
-/*
- * __get_user_N functions take a pointer in r0, and return 0 in r2
- * on success, with the value in r0; or else -EFAULT in r2.
- */
-#define __get_user_N(bytes, LOAD) \
-        STD_ENTRY(__get_user_##bytes); \
-1:      { LOAD r0, r0; move r1, zero; move r2, zero }; \
-        jrp lr; \
-        STD_ENDPROC(__get_user_##bytes); \
-        .pushsection __ex_table,"a"; \
-        .word 1b, get_user_fault; \
-        .popsection
-__get_user_N(1, lb_u)
-__get_user_N(2, lh_u)
-__get_user_N(4, lw)
-/*
- * __get_user_8 takes a pointer in r0, and returns 0 in r2
- * on success, with the value in r0/r1; or else -EFAULT in r2.
- */
-        STD_ENTRY(__get_user_8);
-1:      { lw r0, r0; addi r1, r0, 4 };
-2:      { lw r1, r1; move r2, zero };
-        jrp lr;
-        STD_ENDPROC(__get_user_8);
-        .pushsection __ex_table,"a";
-        .word 1b, get_user_fault;
-        .word 2b, get_user_fault;
-        .popsection
-/*
- * __put_user_N functions take a value in r0 and a pointer in r1,
- * and return 0 in r0 on success or -EFAULT on failure.
- */
-#define __put_user_N(bytes, STORE) \
-        STD_ENTRY(__put_user_##bytes); \
-1:      { STORE r1, r0; move r0, zero }; \
-        jrp lr; \
-        STD_ENDPROC(__put_user_##bytes); \
-        .pushsection __ex_table,"a"; \
-        .word 1b, put_user_fault; \
-        .popsection
-__put_user_N(1, sb)
-__put_user_N(2, sh)
-__put_user_N(4, sw)
-/*
- * __put_user_8 takes a value in r0/r1 and a pointer in r2,
- * and returns 0 in r0 on success or -EFAULT on failure.
- */
-STD_ENTRY(__put_user_8)
-1:      { sw r2, r0; addi r2, r2, 4 }
-2:      { sw r2, r1; move r0, zero }
-        jrp lr
-        STD_ENDPROC(__put_user_8)
-        .pushsection __ex_table,"a"
-        .word 1b, put_user_fault
-        .word 2b, put_user_fault
-        .popsection
 /*
 * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
 * It returns the length, including the terminating NUL, or zero on exception.
diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S
index 2ff44f87b78e..adb2dbbc70cd 100644
--- a/arch/tile/lib/usercopy_64.S
+++ b/arch/tile/lib/usercopy_64.S
@@ -19,55 +19,6 @@
 /* Access user memory, but use MMU to avoid propagating kernel exceptions. */
-        .pushsection .fixup,"ax"
-get_user_fault:
-        { movei r1, -EFAULT; move r0, zero }
-        jrp lr
-        ENDPROC(get_user_fault)
-put_user_fault:
-        { movei r0, -EFAULT; jrp lr }
-        ENDPROC(put_user_fault)
-        .popsection
-/*
- * __get_user_N functions take a pointer in r0, and return 0 in r1
- * on success, with the value in r0; or else -EFAULT in r1.
- */
-#define __get_user_N(bytes, LOAD) \
-        STD_ENTRY(__get_user_##bytes); \
-1:      { LOAD r0, r0; move r1, zero }; \
-        jrp lr; \
-        STD_ENDPROC(__get_user_##bytes); \
-        .pushsection __ex_table,"a"; \
-        .quad 1b, get_user_fault; \
-        .popsection
-__get_user_N(1, ld1u)
-__get_user_N(2, ld2u)
-__get_user_N(4, ld4u)
-__get_user_N(8, ld)
-/*
- * __put_user_N functions take a value in r0 and a pointer in r1,
- * and return 0 in r0 on success or -EFAULT on failure.
- */
-#define __put_user_N(bytes, STORE) \
-        STD_ENTRY(__put_user_##bytes); \
-1:      { STORE r1, r0; move r0, zero }; \
-        jrp lr; \
-        STD_ENDPROC(__put_user_##bytes); \
-        .pushsection __ex_table,"a"; \
-        .quad 1b, put_user_fault; \
-        .popsection
-__put_user_N(1, st1)
-__put_user_N(2, st2)
-__put_user_N(4, st4)
-__put_user_N(8, st)
 /*
 * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
 * It returns the length, including the terminating NUL, or zero on exception.
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 22e58f51ed23..84ce7abbf5af 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -187,7 +187,7 @@ static pgd_t *get_current_pgd(void)
        HV_Context ctx = hv_inquire_context();
        unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT;
        struct page *pgd_page = pfn_to_page(pgd_pfn);
-        BUG_ON(PageHighMem(pgd_page));   /* oops, HIGHPTE? */
+        BUG_ON(PageHighMem(pgd_page));
        return (pgd_t *) __va(ctx.page_table);
 }
@@ -273,11 +273,15 @@ static int handle_page_fault(struct pt_regs *regs,
        int si_code;
        int is_kernel_mode;
        pgd_t *pgd;
+        unsigned int flags;
        /* on TILE, protection faults are always writes */
        if (!is_page_fault)
                write = 1;
+        flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
+                 (write ? FAULT_FLAG_WRITE : 0));
        is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL);
        tsk = validate_current();
@@ -382,6 +386,8 @@ static int handle_page_fault(struct pt_regs *regs,
                        vma = NULL;  /* happy compiler */
                        goto bad_area_nosemaphore;
                }
+retry:
                down_read(&mm->mmap_sem);
        }
@@ -429,7 +435,11 @@ good_area:
         * make sure we exit gracefully rather than endlessly redo
         * the fault.
         */
-        fault = handle_mm_fault(mm, vma, address, write);
+        fault = handle_mm_fault(mm, vma, address, flags);
+        if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+                return 0;
        if (unlikely(fault & VM_FAULT_ERROR)) {
                if (fault & VM_FAULT_OOM)
                        goto out_of_memory;
@@ -437,10 +447,22 @@ good_area:
                        goto do_sigbus;
                BUG();
        }
-        if (fault & VM_FAULT_MAJOR)
+        if (flags & FAULT_FLAG_ALLOW_RETRY) {
-                tsk->maj_flt++;
+                if (fault & VM_FAULT_MAJOR)
-        else
+                        tsk->maj_flt++;
-                tsk->min_flt++;
+                else
+                        tsk->min_flt++;
+                if (fault & VM_FAULT_RETRY) {
+                        flags &= ~FAULT_FLAG_ALLOW_RETRY;
+                         /*
+                          * No need to up_read(&mm->mmap_sem) as we would
+                          * have already released it in __lock_page_or_retry
+                          * in mm/filemap.c.
+                          */
+                        goto retry;
+                }
+        }
 #if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
        /*
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index 499f73770b05..dbcbdf7b8aa8 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -30,6 +30,7 @@
 #include <linux/cache.h>
 #include <linux/smp.h>
 #include <linux/module.h>
+#include <linux/hugetlb.h>
 #include <asm/page.h>
 #include <asm/sections.h>
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 42cfcba4e1ef..812e2d037972 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -27,85 +27,161 @@
 #include <linux/mman.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
+#include <asm/setup.h>
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+/*
+ * Provide an additional huge page size (in addition to the regular default
+ * huge page size) if no "hugepagesz" arguments are specified.
+ * Note that it must be smaller than the default huge page size so
+ * that it's possible to allocate them on demand from the buddy allocator.
+ * You can change this to 64K (on a 16K build), 256K, 1M, or 4M,
+ * or not define it at all.
+ */
+#define ADDITIONAL_HUGE_SIZE (1024 * 1024UL)
+/* "Extra" page-size multipliers, one per level of the page table. */
+int huge_shift[HUGE_SHIFT_ENTRIES] = {
+#ifdef ADDITIONAL_HUGE_SIZE
+#define ADDITIONAL_HUGE_SHIFT __builtin_ctzl(ADDITIONAL_HUGE_SIZE / PAGE_SIZE)
+        [HUGE_SHIFT_PAGE] = ADDITIONAL_HUGE_SHIFT
+#endif
+};
+/*
+ * This routine is a hybrid of pte_alloc_map() and pte_alloc_kernel().
+ * It assumes that L2 PTEs are never in HIGHMEM (we don't support that).
+ * It locks the user pagetable, and bumps up the mm->nr_ptes field,
+ * but otherwise allocate the page table using the kernel versions.
+ */
+static pte_t *pte_alloc_hugetlb(struct mm_struct *mm, pmd_t *pmd,
+                                unsigned long address)
+{
+        pte_t *new;
+        if (pmd_none(*pmd)) {
+                new = pte_alloc_one_kernel(mm, address);
+                if (!new)
+                        return NULL;
+                smp_wmb(); /* See comment in __pte_alloc */
+                spin_lock(&mm->page_table_lock);
+                if (likely(pmd_none(*pmd))) {  /* Has another populated it ? */
+                        mm->nr_ptes++;
+                        pmd_populate_kernel(mm, pmd, new);
+                        new = NULL;
+                } else
+                        VM_BUG_ON(pmd_trans_splitting(*pmd));
+                spin_unlock(&mm->page_table_lock);
+                if (new)
+                        pte_free_kernel(mm, new);
+        }
+        return pte_offset_kernel(pmd, address);
+}
+#endif
 pte_t *huge_pte_alloc(struct mm_struct *mm,
                      unsigned long addr, unsigned long sz)
 {
        pgd_t *pgd;
        pud_t *pud;
-        pte_t *pte = NULL;
-        /* We do not yet support multiple huge page sizes. */
+        addr &= -sz;   /* Mask off any low bits in the address. */
-        BUG_ON(sz != PMD_SIZE);
        pgd = pgd_offset(mm, addr);
        pud = pud_alloc(mm, pgd, addr);
-        if (pud)
-                pte = (pte_t *) pmd_alloc(mm, pud, addr);
-        BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
-        return pte;
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+        if (sz >= PGDIR_SIZE) {
+                BUG_ON(sz != PGDIR_SIZE &&
+                       sz != PGDIR_SIZE << huge_shift[HUGE_SHIFT_PGDIR]);
+                return (pte_t *)pud;
+        } else {
+                pmd_t *pmd = pmd_alloc(mm, pud, addr);
+                if (sz >= PMD_SIZE) {
+                        BUG_ON(sz != PMD_SIZE &&
+                               sz != (PMD_SIZE << huge_shift[HUGE_SHIFT_PMD]));
+                        return (pte_t *)pmd;
+                }
+                else {
+                        if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE])
+                                panic("Unexpected page size %#lx\n", sz);
+                        return pte_alloc_hugetlb(mm, pmd, addr);
+                }
+        }
+#else
+        BUG_ON(sz != PMD_SIZE);
+        return (pte_t *) pmd_alloc(mm, pud, addr);
+#endif
 }
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+static pte_t *get_pte(pte_t *base, int index, int level)
 {
-        pgd_t *pgd;
+        pte_t *ptep = base + index;
-        pud_t *pud;
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
-        pmd_t *pmd = NULL;
+        if (!pte_present(*ptep) && huge_shift[level] != 0) {
+                unsigned long mask = -1UL << huge_shift[level];
-        pgd = pgd_offset(mm, addr);
+                pte_t *super_ptep = base + (index & mask);
-        if (pgd_present(*pgd)) {
+                pte_t pte = *super_ptep;
-                pud = pud_offset(pgd, addr);
+                if (pte_present(pte) && pte_super(pte))
-                if (pud_present(*pud))
+                        ptep = super_ptep;
-                        pmd = pmd_offset(pud, addr);
        }
-        return (pte_t *) pmd;
+#endif
+        return ptep;
 }
-#ifdef HUGETLB_TEST
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
-                              int write)
 {
-        unsigned long start = address;
+        pgd_t *pgd;
-        int length = 1;
+        pud_t *pud;
-        int nr;
+        pmd_t *pmd;
-        struct page *page;
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
-        struct vm_area_struct *vma;
+        pte_t *pte;
+#endif
-        vma = find_vma(mm, addr);
-        if (!vma || !is_vm_hugetlb_page(vma))
-                return ERR_PTR(-EINVAL);
-        pte = huge_pte_offset(mm, address);
-        /* hugetlb should be locked, and hence, prefaulted */
+        /* Get the top-level page table entry. */
-        WARN_ON(!pte || pte_none(*pte));
+        pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0);
+        if (!pgd_present(*pgd))
+                return NULL;
-        page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
+        /* We don't have four levels. */
+        pud = pud_offset(pgd, addr);
+#ifndef __PAGETABLE_PUD_FOLDED
+# error support fourth page table level
+#endif
-        WARN_ON(!PageHead(page));
+        /* Check for an L0 huge PTE, if we have three levels. */
+#ifndef __PAGETABLE_PMD_FOLDED
+        if (pud_huge(*pud))
+                return (pte_t *)pud;
-        return page;
+        pmd = (pmd_t *)get_pte((pte_t *)pud_page_vaddr(*pud),
-}
+                               pmd_index(addr), 1);
+        if (!pmd_present(*pmd))
-int pmd_huge(pmd_t pmd)
+                return NULL;
-{
+#else
-        return 0;
+        pmd = pmd_offset(pud, addr);
-}
+#endif
-int pud_huge(pud_t pud)
+        /* Check for an L1 huge PTE. */
-{
+        if (pmd_huge(*pmd))
-        return 0;
+                return (pte_t *)pmd;
-}
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+        /* Check for an L2 huge PTE. */
+        pte = get_pte((pte_t *)pmd_page_vaddr(*pmd), pte_index(addr), 2);
+        if (!pte_present(*pte))
+                return NULL;
+        if (pte_super(*pte))
+                return pte;
+#endif
-struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
-                             pmd_t *pmd, int write)
-{
        return NULL;
 }
-#else
 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
                              int write)
 {
@@ -149,8 +225,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
        return 0;
 }
-#endif
 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
                unsigned long addr, unsigned long len,
@@ -322,21 +396,102 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                return hugetlb_get_unmapped_area_topdown(file, addr, len,
                                pgoff, flags);
 }
+#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
-static __init int setup_hugepagesz(char *opt)
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+static __init int __setup_hugepagesz(unsigned long ps)
 {
-        unsigned long ps = memparse(opt, &opt);
+        int log_ps = __builtin_ctzl(ps);
-        if (ps == PMD_SIZE) {
+        int level, base_shift;
-                hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
-        } else if (ps == PUD_SIZE) {
+        if ((1UL << log_ps) != ps || (log_ps & 1) != 0) {
-                hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+                pr_warn("Not enabling %ld byte huge pages;"
+                        " must be a power of four.\n", ps);
+                return -EINVAL;
+        }
+        if (ps > 64*1024*1024*1024UL) {
+                pr_warn("Not enabling %ld MB huge pages;"
+                        " largest legal value is 64 GB .\n", ps >> 20);
+                return -EINVAL;
+        } else if (ps >= PUD_SIZE) {
+                static long hv_jpage_size;
+                if (hv_jpage_size == 0)
+                        hv_jpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO);
+                if (hv_jpage_size != PUD_SIZE) {
+                        pr_warn("Not enabling >= %ld MB huge pages:"
+                                " hypervisor reports size %ld\n",
+                                PUD_SIZE >> 20, hv_jpage_size);
+                        return -EINVAL;
+                }
+                level = 0;
+                base_shift = PUD_SHIFT;
+        } else if (ps >= PMD_SIZE) {
+                level = 1;
+                base_shift = PMD_SHIFT;
+        } else if (ps > PAGE_SIZE) {
+                level = 2;
+                base_shift = PAGE_SHIFT;
        } else {
-                pr_err("hugepagesz: Unsupported page size %lu M\n",
+                pr_err("hugepagesz: huge page size %ld too small\n", ps);
-                        ps >> 20);
+                return -EINVAL;
-                return 0;
        }
-        return 1;
+        if (log_ps != base_shift) {
+                int shift_val = log_ps - base_shift;
+                if (huge_shift[level] != 0) {
+                        int old_shift = base_shift + huge_shift[level];
+                        pr_warn("Not enabling %ld MB huge pages;"
+                                " already have size %ld MB.\n",
+                                ps >> 20, (1UL << old_shift) >> 20);
+                        return -EINVAL;
+                }
+                if (hv_set_pte_super_shift(level, shift_val) != 0) {
+                        pr_warn("Not enabling %ld MB huge pages;"
+                                " no hypervisor support.\n", ps >> 20);
+                        return -EINVAL;
+                }
+                printk(KERN_DEBUG "Enabled %ld MB huge pages\n", ps >> 20);
+                huge_shift[level] = shift_val;
+        }
+        hugetlb_add_hstate(log_ps - PAGE_SHIFT);
+        return 0;
+}
+static bool saw_hugepagesz;
+static __init int setup_hugepagesz(char *opt)
+{
+        if (!saw_hugepagesz) {
+                saw_hugepagesz = true;
+                memset(huge_shift, 0, sizeof(huge_shift));
+        }
+        return __setup_hugepagesz(memparse(opt, NULL));
 }
 __setup("hugepagesz=", setup_hugepagesz);
-#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
+#ifdef ADDITIONAL_HUGE_SIZE
+/*
+ * Provide an additional huge page size if no "hugepagesz" args are given.
+ * In that case, all the cores have properly set up their hv super_shift
+ * already, but we need to notify the hugetlb code to enable the
+ * new huge page size from the Linux point of view.
+ */
+static __init int add_default_hugepagesz(void)
+{
+        if (!saw_hugepagesz) {
+                BUILD_BUG_ON(ADDITIONAL_HUGE_SIZE >= PMD_SIZE ||
+                             ADDITIONAL_HUGE_SIZE <= PAGE_SIZE);
+                BUILD_BUG_ON((PAGE_SIZE << ADDITIONAL_HUGE_SHIFT) !=
+                             ADDITIONAL_HUGE_SIZE);
+                BUILD_BUG_ON(ADDITIONAL_HUGE_SHIFT & 1);
+                hugetlb_add_hstate(ADDITIONAL_HUGE_SHIFT);
+        }
+        return 0;
+}
+arch_initcall(add_default_hugepagesz);
+#endif
+#endif /* CONFIG_HUGETLB_SUPER_PAGES */
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 6a9d20ddc34f..630dd2ce2afe 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -82,7 +82,7 @@ static int num_l2_ptes[MAX_NUMNODES];
 static void init_prealloc_ptes(int node, int pages)
 {
-        BUG_ON(pages & (HV_L2_ENTRIES-1));
+        BUG_ON(pages & (PTRS_PER_PTE - 1));
        if (pages) {
                num_l2_ptes[node] = pages;
                l2_ptes[node] = __alloc_bootmem(pages * sizeof(pte_t),
@@ -131,14 +131,9 @@ static void __init assign_pte(pmd_t *pmd, pte_t *page_table)
 #ifdef __tilegx__
-#if HV_L1_SIZE != HV_L2_SIZE
-# error Rework assumption that L1 and L2 page tables are same size.
-#endif
-/* Since pmd_t arrays and pte_t arrays are the same size, just use casts. */
 static inline pmd_t *alloc_pmd(void)
 {
-        return (pmd_t *)alloc_pte();
+        return __alloc_bootmem(L1_KERNEL_PGTABLE_SIZE, HV_PAGE_TABLE_ALIGN, 0);
 }
 static inline void assign_pmd(pud_t *pud, pmd_t *pmd)
@@ -444,6 +439,7 @@ static pgd_t pgtables[PTRS_PER_PGD]
 */
 static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 {
+        unsigned long long irqmask;
        unsigned long address, pfn;
        pmd_t *pmd;
        pte_t *pte;
@@ -633,10 +629,13 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
         *  - install pgtables[] as the real page table
         *  - flush the TLB so the new page table takes effect
         */
+        irqmask = interrupt_mask_save_mask();
+        interrupt_mask_set_mask(-1ULL);
        rc = flush_and_install_context(__pa(pgtables),
                                       init_pgprot((unsigned long)pgtables),
                                       __get_cpu_var(current_asid),
                                       cpumask_bits(my_cpu_mask));
+        interrupt_mask_restore_mask(irqmask);
        BUG_ON(rc != 0);
        /* Copy the page table back to the normal swapper_pg_dir. */
@@ -699,6 +698,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
 #endif /* CONFIG_HIGHMEM */
+#ifndef CONFIG_64BIT
 static void __init init_free_pfn_range(unsigned long start, unsigned long end)
 {
        unsigned long pfn;
@@ -771,6 +771,7 @@ static void __init set_non_bootmem_pages_init(void)
                init_free_pfn_range(start, end);
        }
 }
+#endif
 /*
 * paging_init() sets up the page tables - note that all of lowmem is
@@ -807,7 +808,7 @@ void __init paging_init(void)
         * changing init_mm once we get up and running, and there's no
         * need for e.g. vmalloc_sync_all().
         */
-        BUILD_BUG_ON(pgd_index(VMALLOC_START) != pgd_index(VMALLOC_END));
+        BUILD_BUG_ON(pgd_index(VMALLOC_START) != pgd_index(VMALLOC_END - 1));
        pud = pud_offset(pgd_base + pgd_index(VMALLOC_START), VMALLOC_START);
        assign_pmd(pud, alloc_pmd());
 #endif
@@ -859,8 +860,10 @@ void __init mem_init(void)
        /* this will put all bootmem onto the freelists */
        totalram_pages += free_all_bootmem();
+#ifndef CONFIG_64BIT
        /* count all remaining LOWMEM and give all HIGHMEM to page allocator */
        set_non_bootmem_pages_init();
+#endif
        codesize =  (unsigned long)&_etext - (unsigned long)&_text;
        datasize =  (unsigned long)&_end - (unsigned long)&_sdata;
diff --git a/arch/tile/mm/migrate.h b/arch/tile/mm/migrate.h
index cd45a0837fa6..91683d97917e 100644
--- a/arch/tile/mm/migrate.h
+++ b/arch/tile/mm/migrate.h
@@ -24,6 +24,9 @@
 /*
 * This function is used as a helper when setting up the initial
 * page table (swapper_pg_dir).
+ *
+ * You must mask ALL interrupts prior to invoking this code, since
+ * you can't legally touch the stack during the cache flush.
 */
 extern int flush_and_install_context(HV_PhysAddr page_table, HV_PTE access,
                                     HV_ASID asid,
@@ -39,6 +42,9 @@ extern int flush_and_install_context(HV_PhysAddr page_table, HV_PTE access,
 *
 * Note that any non-NULL pointers must not point to the page that
 * is handled by the stack_pte itself.
+ *
+ * You must mask ALL interrupts prior to invoking this code, since
+ * you can't legally touch the stack during the cache flush.
 */
 extern int homecache_migrate_stack_and_flush(pte_t stack_pte, unsigned long va,
                                     size_t length, pte_t *stack_ptep,
diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S
index ac01a7cdf77f..5305814bf187 100644
--- a/arch/tile/mm/migrate_32.S
+++ b/arch/tile/mm/migrate_32.S
@@ -40,8 +40,7 @@
 #define FRAME_R32       16
 #define FRAME_R33       20
 #define FRAME_R34       24
-#define FRAME_R35       28
+#define FRAME_SIZE      28
-#define FRAME_SIZE      32
@@ -66,12 +65,11 @@
 #define r_my_cpumask    r5
 /* Locals (callee-save); must not be more than FRAME_xxx above. */
-#define r_save_ics      r30
+#define r_context_lo    r30
-#define r_context_lo    r31
+#define r_context_hi    r31
-#define r_context_hi    r32
+#define r_access_lo     r32
-#define r_access_lo     r33
+#define r_access_hi     r33
-#define r_access_hi     r34
+#define r_asid          r34
-#define r_asid          r35
 STD_ENTRY(flush_and_install_context)
        /*
@@ -104,11 +102,7 @@ STD_ENTRY(flush_and_install_context)
         sw r_tmp, r33
         addi r_tmp, sp, FRAME_R34
        }
-        {
+        sw r_tmp, r34
-         sw r_tmp, r34
-         addi r_tmp, sp, FRAME_R35
-        }
-        sw r_tmp, r35
        /* Move some arguments to callee-save registers. */
        {
@@ -121,13 +115,6 @@ STD_ENTRY(flush_and_install_context)
        }
        move r_asid, r_asid_in
-        /* Disable interrupts, since we can't use our stack. */
-        {
-         mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION
-         movei r_tmp, 1
-        }
-        mtspr INTERRUPT_CRITICAL_SECTION, r_tmp
        /* First, flush our L2 cache. */
        {
         move r0, zero  /* cache_pa */
@@ -163,7 +150,7 @@ STD_ENTRY(flush_and_install_context)
        }
        {
         move r4, r_asid
-         movei r5, HV_CTX_DIRECTIO
+         moveli r5, HV_CTX_DIRECTIO | CTX_PAGE_FLAG
        }
        jal hv_install_context
        bnz r0, .Ldone
@@ -175,9 +162,6 @@ STD_ENTRY(flush_and_install_context)
        }
 .Ldone:
-        /* Reset interrupts back how they were before. */
-        mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics
        /* Restore the callee-saved registers and return. */
        addli lr, sp, FRAME_SIZE
        {
@@ -202,10 +186,6 @@ STD_ENTRY(flush_and_install_context)
        }
        {
         lw r34, r_tmp
-         addli r_tmp, sp, FRAME_R35
-        }
-        {
-         lw r35, r_tmp
         addi sp, sp, FRAME_SIZE
        }
        jrp lr
diff --git a/arch/tile/mm/migrate_64.S b/arch/tile/mm/migrate_64.S
index e76fea688beb..1d15b10833d1 100644
--- a/arch/tile/mm/migrate_64.S
+++ b/arch/tile/mm/migrate_64.S
@@ -38,8 +38,7 @@
 #define FRAME_R30       16
 #define FRAME_R31       24
 #define FRAME_R32       32
-#define FRAME_R33       40
+#define FRAME_SIZE      40
-#define FRAME_SIZE      48
@@ -60,10 +59,9 @@
 #define r_my_cpumask    r3
 /* Locals (callee-save); must not be more than FRAME_xxx above. */
-#define r_save_ics      r30
+#define r_context       r30
-#define r_context       r31
+#define r_access        r31
-#define r_access        r32
+#define r_asid          r32
-#define r_asid          r33
 /*
 * Caller-save locals and frame constants are the same as
@@ -93,11 +91,7 @@ STD_ENTRY(flush_and_install_context)
         st r_tmp, r31
         addi r_tmp, sp, FRAME_R32
        }
-        {
+        st r_tmp, r32
-         st r_tmp, r32
-         addi r_tmp, sp, FRAME_R33
-        }
-        st r_tmp, r33
        /* Move some arguments to callee-save registers. */
        {
@@ -106,13 +100,6 @@ STD_ENTRY(flush_and_install_context)
        }
        move r_asid, r_asid_in
-        /* Disable interrupts, since we can't use our stack. */
-        {
-         mfspr r_save_ics, INTERRUPT_CRITICAL_SECTION
-         movei r_tmp, 1
-        }
-        mtspr INTERRUPT_CRITICAL_SECTION, r_tmp
        /* First, flush our L2 cache. */
        {
         move r0, zero  /* cache_pa */
@@ -147,7 +134,7 @@ STD_ENTRY(flush_and_install_context)
        }
        {
         move r2, r_asid
-         movei r3, HV_CTX_DIRECTIO
+         moveli r3, HV_CTX_DIRECTIO | CTX_PAGE_FLAG
        }
        jal hv_install_context
        bnez r0, 1f
@@ -158,10 +145,7 @@ STD_ENTRY(flush_and_install_context)
         jal hv_flush_all
        }
-1:      /* Reset interrupts back how they were before. */
+1:      /* Restore the callee-saved registers and return. */
-        mtspr INTERRUPT_CRITICAL_SECTION, r_save_ics
-        /* Restore the callee-saved registers and return. */
        addli lr, sp, FRAME_SIZE
        {
         ld lr, lr
@@ -177,10 +161,6 @@ STD_ENTRY(flush_and_install_context)
        }
        {
         ld r32, r_tmp
-         addli r_tmp, sp, FRAME_R33
-        }
-        {
-         ld r33, r_tmp
         addi sp, sp, FRAME_SIZE
        }
        jrp lr
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 2410aa899b3e..345edfed9fcd 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -132,15 +132,6 @@ void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
        set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
 }
-#if defined(CONFIG_HIGHPTE)
-pte_t *_pte_offset_map(pmd_t *dir, unsigned long address)
-{
-        pte_t *pte = kmap_atomic(pmd_page(*dir)) +
-                (pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK;
-        return &pte[pte_index(address)];
-}
-#endif
 /**
 * shatter_huge_page() - ensure a given address is mapped by a small page.
 *
@@ -289,33 +280,26 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 #define L2_USER_PGTABLE_PAGES (1 << L2_USER_PGTABLE_ORDER)
-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address,
+                               int order)
 {
        gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO;
        struct page *p;
-#if L2_USER_PGTABLE_ORDER > 0
        int i;
-#endif
-#ifdef CONFIG_HIGHPTE
-        flags |= __GFP_HIGHMEM;
-#endif
        p = alloc_pages(flags, L2_USER_PGTABLE_ORDER);
        if (p == NULL)
                return NULL;
-#if L2_USER_PGTABLE_ORDER > 0
        /*
         * Make every page have a page_count() of one, not just the first.
         * We don't use __GFP_COMP since it doesn't look like it works
         * correctly with tlb_remove_page().
         */
-        for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
+        for (i = 1; i < order; ++i) {
                init_page_count(p+i);
                inc_zone_page_state(p+i, NR_PAGETABLE);
        }
-#endif
        pgtable_page_ctor(p);
        return p;
@@ -326,28 +310,28 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 * process).  We have to correct whatever pte_alloc_one() did before
 * returning the pages to the allocator.
 */
-void pte_free(struct mm_struct *mm, struct page *p)
+void pgtable_free(struct mm_struct *mm, struct page *p, int order)
 {
        int i;
        pgtable_page_dtor(p);
        __free_page(p);
-        for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
+        for (i = 1; i < order; ++i) {
                __free_page(p+i);
                dec_zone_page_state(p+i, NR_PAGETABLE);
        }
 }
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+void __pgtable_free_tlb(struct mmu_gather *tlb, struct page *pte,
-                    unsigned long address)
+                        unsigned long address, int order)
 {
        int i;
        pgtable_page_dtor(pte);
        tlb_remove_page(tlb, pte);
-        for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
+        for (i = 1; i < order; ++i) {
                tlb_remove_page(tlb, pte + i);
                dec_zone_page_state(pte + i, NR_PAGETABLE);
        }
@@ -490,7 +474,7 @@ void set_pte(pte_t *ptep, pte_t pte)
 /* Can this mm load a PTE with cached_priority set? */
 static inline int mm_is_priority_cached(struct mm_struct *mm)
 {
-        return mm->context.priority_cached;
+        return mm->context.priority_cached != 0;
 }
 /*
@@ -500,8 +484,8 @@ static inline int mm_is_priority_cached(struct mm_struct *mm)
 void start_mm_caching(struct mm_struct *mm)
 {
        if (!mm_is_priority_cached(mm)) {
-                mm->context.priority_cached = -1U;
+                mm->context.priority_cached = -1UL;
-                hv_set_caching(-1U);
+                hv_set_caching(-1UL);
        }
 }
@@ -516,7 +500,7 @@ void start_mm_caching(struct mm_struct *mm)
 * Presumably we'll come back later and have more luck and clear
 * the value then; for now we'll just keep the cache marked for priority.
 */
-static unsigned int update_priority_cached(struct mm_struct *mm)
+static unsigned long update_priority_cached(struct mm_struct *mm)
 {
        if (mm->context.priority_cached && down_write_trylock(&mm->mmap_sem)) {
                struct vm_area_struct *vm;