vfs: make word-at-a-time accesses handle a non-existing page

It turns out that there are more cases than CONFIG_DEBUG_PAGEALLOC that can have holes in the kernel address space: it seems to happen easily with Xen, and it looks like the AMD gart64 code will also punch holes dynamically. Actually hitting that case is still very unlikely, so just do the access, and take an exception and fix it up for the very unlikely case of it being a page-crosser with no next page. And hey, this abstraction might even help other architectures that have other issues with unaligned word accesses than the possible missing next page. IOW, this could do the byte order magic too. Peter Anvin fixed a thinko in the shifting for the exception case. Reported-and-tested-by: Jana Saout <jana@saout.de> Cc: Peter Anvin <hpa@zytor.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-05-03 13:16:43 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-05-03 17:01:40 -0400
commit: e419b4cc585680940bc42f8ca8a071d6023fb1bb (patch)
tree: 8fce0f12b7b2a0fdca7a937af137910011efa783
parent: ac001e76546523ec2ef05b2f7001d8fdc588d069 (diff)
4 files changed, 58 insertions, 7 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1d14cc6b79ad..c9866b0b77d8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -81,7 +81,7 @@ config X86
        select CLKEVT_I8253
        select ARCH_HAVE_NMI_SAFE_CMPXCHG
        select GENERIC_IOMAP
-        select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC
+        select DCACHE_WORD_ACCESS
 config INSTRUCTION_DECODER
        def_bool (KPROBES || PERF_EVENTS)
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index 6fe6767b7124..e58f03b206c3 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -43,4 +43,37 @@ static inline unsigned long has_zero(unsigned long a)
        return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80);
 }
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+        unsigned long ret, dummy;
+        asm(
+                "1:\tmov %2,%0\n"
+                "2:\n"
+                ".section .fixup,\"ax\"\n"
+                "3:\t"
+                "lea %2,%1\n\t"
+                "and %3,%1\n\t"
+                "mov (%1),%0\n\t"
+                "leal %2,%%ecx\n\t"
+                "andl %4,%%ecx\n\t"
+                "shll $3,%%ecx\n\t"
+                "shr %%cl,%0\n\t"
+                "jmp 2b\n"
+                ".previous\n"
+                _ASM_EXTABLE(1b, 3b)
+                :"=&r" (ret),"=&c" (dummy)
+                :"m" (*(unsigned long *)addr),
+                 "i" (-sizeof(unsigned long)),
+                 "i" (sizeof(unsigned long)-1));
+        return ret;
+}
 #endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/fs/dcache.c b/fs/dcache.c
index b60ddc41d783..b80531c91779 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -141,18 +141,29 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 * Compare 2 name strings, return 0 if they match, otherwise non-zero.
 * The strings are both count bytes long, and count is non-zero.
 */
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+#include <asm/word-at-a-time.h>
+/*
+ * NOTE! 'cs' and 'scount' come from a dentry, so it has a
+ * aligned allocation for this particular component. We don't
+ * strictly need the load_unaligned_zeropad() safety, but it
+ * doesn't hurt either.
+ *
+ * In contrast, 'ct' and 'tcount' can be from a pathname, and do
+ * need the careful unaligned handling.
+ */
 static inline int dentry_cmp(const unsigned char *cs, size_t scount,
                                const unsigned char *ct, size_t tcount)
 {
-#ifdef CONFIG_DCACHE_WORD_ACCESS
        unsigned long a,b,mask;
        if (unlikely(scount != tcount))
                return 1;
        for (;;) {
-                a = *(unsigned long *)cs;
+                a = load_unaligned_zeropad(cs);
-                b = *(unsigned long *)ct;
+                b = load_unaligned_zeropad(ct);
                if (tcount < sizeof(unsigned long))
                        break;
                if (unlikely(a != b))
@@ -165,7 +176,13 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
        }
        mask = ~(~0ul << tcount*8);
        return unlikely(!!((a ^ b) & mask));
+}
 #else
+static inline int dentry_cmp(const unsigned char *cs, size_t scount,
+                                const unsigned char *ct, size_t tcount)
+{
        if (scount != tcount)
                return 1;
@@ -177,9 +194,10 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
                tcount--;
        } while (tcount);
        return 0;
-#endif
 }
+#endif
 static void __d_free(struct rcu_head *head)
 {
        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
diff --git a/fs/namei.c b/fs/namei.c
index 0062dd17eb55..c42791914f82 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1429,7 +1429,7 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len)
        unsigned long hash = 0;
        for (;;) {
-                a = *(unsigned long *)name;
+                a = load_unaligned_zeropad(name);
                if (len < sizeof(unsigned long))
                        break;
                hash += a;
@@ -1459,7 +1459,7 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
        do {
                hash = (hash + a) * 9;
                len += sizeof(unsigned long);
-                a = *(unsigned long *)(name+len);
+                a = load_unaligned_zeropad(name+len);
                /* Do we have any NUL or '/' bytes in this word? */
                mask = has_zero(a) | has_zero(a ^ REPEAT_BYTE('/'));
        } while (!mask);
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-05-03 13:16:43 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-05-03 17:01:40 -0400
commit	e419b4cc585680940bc42f8ca8a071d6023fb1bb (patch)
tree	8fce0f12b7b2a0fdca7a937af137910011efa783
parent	ac001e76546523ec2ef05b2f7001d8fdc588d069 (diff)