aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-10 19:45:56 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-10 19:45:56 -0500
commit992de5a8eca7cbd3215e3eb2c439b2c11582a58b (patch)
tree863988f84c1dd57a02fa337ecbce49263a3b9511
parentb2718bffb4088faf13092db30c1ebf088ddee52e (diff)
parentd5b3cf7139b8770af4ed8bb36a1ab9d290ac39e9 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: "Bite-sized chunks this time, to avoid the MTA ratelimiting woes. - fs/notify updates - ocfs2 - some of MM" That laconic "some MM" is mainly the removal of remap_file_pages(), which is a big simplification of the VM, and which gets rid of a *lot* of random cruft and special cases because we no longer support the non-linear mappings that it used. From a user interface perspective, nothing has changed, because the remap_file_pages() syscall still exists, it's just done by emulating the old behavior by creating a lot of individual small mappings instead of one non-linear one. The emulation is slower than the old "native" non-linear mappings, but nobody really uses or cares about remap_file_pages(), and simplifying the VM is a big advantage. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (78 commits) memcg: zap memcg_slab_caches and memcg_slab_mutex memcg: zap memcg_name argument of memcg_create_kmem_cache memcg: zap __memcg_{charge,uncharge}_slab mm/page_alloc.c: place zone_id check before VM_BUG_ON_PAGE check mm: hugetlb: fix type of hugetlb_treat_as_movable variable mm, hugetlb: remove unnecessary lower bound on sysctl handlers"? mm: memory: merge shared-writable dirtying branches in do_wp_page() mm: memory: remove ->vm_file check on shared writable vmas xtensa: drop _PAGE_FILE and pte_file()-related helpers x86: drop _PAGE_FILE and pte_file()-related helpers unicore32: drop pte_file()-related helpers um: drop _PAGE_FILE and pte_file()-related helpers tile: drop pte_file()-related helpers sparc: drop pte_file()-related helpers sh: drop _PAGE_FILE and pte_file()-related helpers score: drop _PAGE_FILE and pte_file()-related helpers s390: drop pte_file()-related helpers parisc: drop _PAGE_FILE and pte_file()-related helpers openrisc: drop _PAGE_FILE and pte_file()-related helpers nios2: drop _PAGE_FILE and pte_file()-related helpers ...
-rw-r--r--Documentation/cachetlb.txt8
-rw-r--r--Documentation/filesystems/fiemap.txt3
-rw-r--r--Documentation/filesystems/inotify.txt197
-rw-r--r--Documentation/filesystems/ocfs2.txt4
-rw-r--r--Documentation/vm/remap_file_pages.txt7
-rw-r--r--arch/alpha/include/asm/pgtable.h7
-rw-r--r--arch/arc/include/asm/pgtable.h13
-rw-r--r--arch/arm/include/asm/pgtable-2level.h1
-rw-r--r--arch/arm/include/asm/pgtable-3level.h1
-rw-r--r--arch/arm/include/asm/pgtable-nommu.h2
-rw-r--r--arch/arm/include/asm/pgtable.h20
-rw-r--r--arch/arm/mm/proc-macros.S2
-rw-r--r--arch/arm64/include/asm/pgtable.h22
-rw-r--r--arch/avr32/include/asm/pgtable.h25
-rw-r--r--arch/blackfin/include/asm/pgtable.h5
-rw-r--r--arch/c6x/include/asm/pgtable.h5
-rw-r--r--arch/cris/include/arch-v10/arch/mmu.h3
-rw-r--r--arch/cris/include/arch-v32/arch/mmu.h3
-rw-r--r--arch/cris/include/asm/pgtable.h4
-rw-r--r--arch/frv/include/asm/pgtable.h27
-rw-r--r--arch/hexagon/include/asm/pgtable.h60
-rw-r--r--arch/ia64/include/asm/pgtable.h25
-rw-r--r--arch/m32r/include/asm/pgtable-2level.h4
-rw-r--r--arch/m32r/include/asm/pgtable.h11
-rw-r--r--arch/m68k/include/asm/mcf_pgtable.h23
-rw-r--r--arch/m68k/include/asm/motorola_pgtable.h15
-rw-r--r--arch/m68k/include/asm/pgtable_no.h2
-rw-r--r--arch/m68k/include/asm/sun3_pgtable.h15
-rw-r--r--arch/metag/include/asm/pgtable.h6
-rw-r--r--arch/microblaze/include/asm/pgtable.h11
-rw-r--r--arch/mips/include/asm/pgtable-32.h36
-rw-r--r--arch/mips/include/asm/pgtable-64.h9
-rw-r--r--arch/mips/include/asm/pgtable-bits.h9
-rw-r--r--arch/mips/include/asm/pgtable.h2
-rw-r--r--arch/mn10300/include/asm/pgtable.h17
-rw-r--r--arch/nios2/include/asm/pgtable-bits.h1
-rw-r--r--arch/nios2/include/asm/pgtable.h10
-rw-r--r--arch/openrisc/include/asm/pgtable.h8
-rw-r--r--arch/openrisc/kernel/head.S5
-rw-r--r--arch/parisc/include/asm/pgtable.h10
-rw-r--r--arch/s390/include/asm/pgtable.h29
-rw-r--r--arch/score/include/asm/pgtable-bits.h1
-rw-r--r--arch/score/include/asm/pgtable.h18
-rw-r--r--arch/sh/Kconfig2
-rw-r--r--arch/sh/boards/mach-se/7343/irq.c3
-rw-r--r--arch/sh/boards/mach-se/7722/irq.c3
-rw-r--r--arch/sh/include/asm/pgtable_32.h30
-rw-r--r--arch/sh/include/asm/pgtable_64.h9
-rw-r--r--arch/sparc/include/asm/pgtable_32.h24
-rw-r--r--arch/sparc/include/asm/pgtable_64.h40
-rw-r--r--arch/sparc/include/asm/pgtsrmmu.h14
-rw-r--r--arch/tile/include/asm/pgtable.h11
-rw-r--r--arch/tile/mm/homecache.c4
-rw-r--r--arch/um/include/asm/pgtable-2level.h9
-rw-r--r--arch/um/include/asm/pgtable-3level.h20
-rw-r--r--arch/um/include/asm/pgtable.h9
-rw-r--r--arch/unicore32/include/asm/pgtable-hwdef.h1
-rw-r--r--arch/unicore32/include/asm/pgtable.h14
-rw-r--r--arch/x86/include/asm/pgtable-2level.h38
-rw-r--r--arch/x86/include/asm/pgtable-3level.h12
-rw-r--r--arch/x86/include/asm/pgtable.h20
-rw-r--r--arch/x86/include/asm/pgtable_64.h6
-rw-r--r--arch/x86/include/asm/pgtable_types.h3
-rw-r--r--arch/x86/mm/hugetlbpage.c11
-rw-r--r--arch/xtensa/include/asm/pgtable.h10
-rw-r--r--drivers/gpu/drm/drm_vma_manager.c3
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/btrfs/file.c1
-rw-r--r--fs/ceph/addr.c1
-rw-r--r--fs/cifs/file.c1
-rw-r--r--fs/ext4/file.c1
-rw-r--r--fs/f2fs/file.c1
-rw-r--r--fs/fuse/file.c1
-rw-r--r--fs/gfs2/file.c1
-rw-r--r--fs/inode.c1
-rw-r--r--fs/ioctl.c5
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/notify/fanotify/fanotify.c2
-rw-r--r--fs/notify/fanotify/fanotify_user.c35
-rw-r--r--fs/ocfs2/acl.c14
-rw-r--r--fs/ocfs2/alloc.c18
-rw-r--r--fs/ocfs2/cluster/tcp.c3
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h12
-rw-r--r--fs/ocfs2/dir.c10
-rw-r--r--fs/ocfs2/dlm/dlmast.c6
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c4
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c14
-rw-r--r--fs/ocfs2/dlm/dlmdomain.h1
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c7
-rw-r--r--fs/ocfs2/dlmglue.c3
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/journal.c1
-rw-r--r--fs/ocfs2/mmap.c1
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/quota_local.c6
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/reservations.c2
-rw-r--r--fs/ocfs2/super.c17
-rw-r--r--fs/ocfs2/xattr.c10
-rw-r--r--fs/proc/task_mmu.c16
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/xfs/xfs_file.c1
-rw-r--r--include/asm-generic/pgtable.h15
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/fsnotify.h6
-rw-r--r--include/linux/hugetlb.h2
-rw-r--r--include/linux/memcontrol.h7
-rw-r--r--include/linux/mm.h55
-rw-r--r--include/linux/mm_types.h12
-rw-r--r--include/linux/rmap.h2
-rw-r--r--include/linux/slab.h7
-rw-r--r--include/linux/swapops.h4
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/sysctl.c3
-rw-r--r--mm/Makefile2
-rw-r--r--mm/debug.c1
-rw-r--r--mm/filemap.c1
-rw-r--r--mm/filemap_xip.c1
-rw-r--r--mm/fremap.c283
-rw-r--r--mm/gup.c2
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/interval_tree.c34
-rw-r--r--mm/ksm.c2
-rw-r--r--mm/madvise.c13
-rw-r--r--mm/memcontrol.c187
-rw-r--r--mm/memory.c276
-rw-r--r--mm/migrate.c32
-rw-r--r--mm/mincore.c9
-rw-r--r--mm/mmap.c93
-rw-r--r--mm/mprotect.c2
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/msync.c5
-rw-r--r--mm/nommu.c8
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/rmap.c225
-rw-r--r--mm/shmem.c1
-rw-r--r--mm/slab.h4
-rw-r--r--mm/slab_common.c151
-rw-r--r--mm/slub.c37
-rw-r--r--mm/swap.c4
-rw-r--r--mm/vmstat.c124
142 files changed, 642 insertions, 2183 deletions
diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index d79b008e4a32..3f9f808b5119 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -317,10 +317,10 @@ maps this page at its virtual address.
317 about doing this. 317 about doing this.
318 318
319 The idea is, first at flush_dcache_page() time, if 319 The idea is, first at flush_dcache_page() time, if
320 page->mapping->i_mmap is an empty tree and ->i_mmap_nonlinear 320 page->mapping->i_mmap is an empty tree, just mark the architecture
321 an empty list, just mark the architecture private page flag bit. 321 private page flag bit. Later, in update_mmu_cache(), a check is
322 Later, in update_mmu_cache(), a check is made of this flag bit, 322 made of this flag bit, and if set the flush is done and the flag
323 and if set the flush is done and the flag bit is cleared. 323 bit is cleared.
324 324
325 IMPORTANT NOTE: It is often important, if you defer the flush, 325 IMPORTANT NOTE: It is often important, if you defer the flush,
326 that the actual flush occurs on the same CPU 326 that the actual flush occurs on the same CPU
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt
index 1b805a0efbb0..f6d9c99103a4 100644
--- a/Documentation/filesystems/fiemap.txt
+++ b/Documentation/filesystems/fiemap.txt
@@ -196,7 +196,8 @@ struct fiemap_extent_info {
196}; 196};
197 197
198It is intended that the file system should not need to access any of this 198It is intended that the file system should not need to access any of this
199structure directly. 199structure directly. Filesystem handlers should be tolerant to signals and return
200EINTR once fatal signal received.
200 201
201 202
202Flag checking should be done at the beginning of the ->fiemap callback via the 203Flag checking should be done at the beginning of the ->fiemap callback via the
diff --git a/Documentation/filesystems/inotify.txt b/Documentation/filesystems/inotify.txt
index cfd02712b83e..51f61db787fb 100644
--- a/Documentation/filesystems/inotify.txt
+++ b/Documentation/filesystems/inotify.txt
@@ -4,201 +4,10 @@
4 4
5 5
6Document started 15 Mar 2005 by Robert Love <rml@novell.com> 6Document started 15 Mar 2005 by Robert Love <rml@novell.com>
7Document updated 4 Jan 2015 by Zhang Zhen <zhenzhang.zhang@huawei.com>
8 --Deleted obsoleted interface, just refer to manpages for user interface.
7 9
8 10(i) Rationale
9(i) User Interface
10
11Inotify is controlled by a set of three system calls and normal file I/O on a
12returned file descriptor.
13
14First step in using inotify is to initialise an inotify instance:
15
16 int fd = inotify_init ();
17
18Each instance is associated with a unique, ordered queue.
19
20Change events are managed by "watches". A watch is an (object,mask) pair where
21the object is a file or directory and the mask is a bit mask of one or more
22inotify events that the application wishes to receive. See <linux/inotify.h>
23for valid events. A watch is referenced by a watch descriptor, or wd.
24
25Watches are added via a path to the file.
26
27Watches on a directory will return events on any files inside of the directory.
28
29Adding a watch is simple:
30
31 int wd = inotify_add_watch (fd, path, mask);
32
33Where "fd" is the return value from inotify_init(), path is the path to the
34object to watch, and mask is the watch mask (see <linux/inotify.h>).
35
36You can update an existing watch in the same manner, by passing in a new mask.
37
38An existing watch is removed via
39
40 int ret = inotify_rm_watch (fd, wd);
41
42Events are provided in the form of an inotify_event structure that is read(2)
43from a given inotify instance. The filename is of dynamic length and follows
44the struct. It is of size len. The filename is padded with null bytes to
45ensure proper alignment. This padding is reflected in len.
46
47You can slurp multiple events by passing a large buffer, for example
48
49 size_t len = read (fd, buf, BUF_LEN);
50
51Where "buf" is a pointer to an array of "inotify_event" structures at least
52BUF_LEN bytes in size. The above example will return as many events as are
53available and fit in BUF_LEN.
54
55Each inotify instance fd is also select()- and poll()-able.
56
57You can find the size of the current event queue via the standard FIONREAD
58ioctl on the fd returned by inotify_init().
59
60All watches are destroyed and cleaned up on close.
61
62
63(ii)
64
65Prototypes:
66
67 int inotify_init (void);
68 int inotify_add_watch (int fd, const char *path, __u32 mask);
69 int inotify_rm_watch (int fd, __u32 mask);
70
71
72(iii) Kernel Interface
73
74Inotify's kernel API consists a set of functions for managing watches and an
75event callback.
76
77To use the kernel API, you must first initialize an inotify instance with a set
78of inotify_operations. You are given an opaque inotify_handle, which you use
79for any further calls to inotify.
80
81 struct inotify_handle *ih = inotify_init(my_event_handler);
82
83You must provide a function for processing events and a function for destroying
84the inotify watch.
85
86 void handle_event(struct inotify_watch *watch, u32 wd, u32 mask,
87 u32 cookie, const char *name, struct inode *inode)
88
89 watch - the pointer to the inotify_watch that triggered this call
90 wd - the watch descriptor
91 mask - describes the event that occurred
92 cookie - an identifier for synchronizing events
93 name - the dentry name for affected files in a directory-based event
94 inode - the affected inode in a directory-based event
95
96 void destroy_watch(struct inotify_watch *watch)
97
98You may add watches by providing a pre-allocated and initialized inotify_watch
99structure and specifying the inode to watch along with an inotify event mask.
100You must pin the inode during the call. You will likely wish to embed the
101inotify_watch structure in a structure of your own which contains other
102information about the watch. Once you add an inotify watch, it is immediately
103subject to removal depending on filesystem events. You must grab a reference if
104you depend on the watch hanging around after the call.
105
106 inotify_init_watch(&my_watch->iwatch);
107 inotify_get_watch(&my_watch->iwatch); // optional
108 s32 wd = inotify_add_watch(ih, &my_watch->iwatch, inode, mask);
109 inotify_put_watch(&my_watch->iwatch); // optional
110
111You may use the watch descriptor (wd) or the address of the inotify_watch for
112other inotify operations. You must not directly read or manipulate data in the
113inotify_watch. Additionally, you must not call inotify_add_watch() more than
114once for a given inotify_watch structure, unless you have first called either
115inotify_rm_watch() or inotify_rm_wd().
116
117To determine if you have already registered a watch for a given inode, you may
118call inotify_find_watch(), which gives you both the wd and the watch pointer for
119the inotify_watch, or an error if the watch does not exist.
120
121 wd = inotify_find_watch(ih, inode, &watchp);
122
123You may use container_of() on the watch pointer to access your own data
124associated with a given watch. When an existing watch is found,
125inotify_find_watch() bumps the refcount before releasing its locks. You must
126put that reference with:
127
128 put_inotify_watch(watchp);
129
130Call inotify_find_update_watch() to update the event mask for an existing watch.
131inotify_find_update_watch() returns the wd of the updated watch, or an error if
132the watch does not exist.
133
134 wd = inotify_find_update_watch(ih, inode, mask);
135
136An existing watch may be removed by calling either inotify_rm_watch() or
137inotify_rm_wd().
138
139 int ret = inotify_rm_watch(ih, &my_watch->iwatch);
140 int ret = inotify_rm_wd(ih, wd);
141
142A watch may be removed while executing your event handler with the following:
143
144 inotify_remove_watch_locked(ih, iwatch);
145
146Call inotify_destroy() to remove all watches from your inotify instance and
147release it. If there are no outstanding references, inotify_destroy() will call
148your destroy_watch op for each watch.
149
150 inotify_destroy(ih);
151
152When inotify removes a watch, it sends an IN_IGNORED event to your callback.
153You may use this event as an indication to free the watch memory. Note that
154inotify may remove a watch due to filesystem events, as well as by your request.
155If you use IN_ONESHOT, inotify will remove the watch after the first event, at
156which point you may call the final inotify_put_watch.
157
158(iv) Kernel Interface Prototypes
159
160 struct inotify_handle *inotify_init(struct inotify_operations *ops);
161
162 inotify_init_watch(struct inotify_watch *watch);
163
164 s32 inotify_add_watch(struct inotify_handle *ih,
165 struct inotify_watch *watch,
166 struct inode *inode, u32 mask);
167
168 s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
169 struct inotify_watch **watchp);
170
171 s32 inotify_find_update_watch(struct inotify_handle *ih,
172 struct inode *inode, u32 mask);
173
174 int inotify_rm_wd(struct inotify_handle *ih, u32 wd);
175
176 int inotify_rm_watch(struct inotify_handle *ih,
177 struct inotify_watch *watch);
178
179 void inotify_remove_watch_locked(struct inotify_handle *ih,
180 struct inotify_watch *watch);
181
182 void inotify_destroy(struct inotify_handle *ih);
183
184 void get_inotify_watch(struct inotify_watch *watch);
185 void put_inotify_watch(struct inotify_watch *watch);
186
187
188(v) Internal Kernel Implementation
189
190Each inotify instance is represented by an inotify_handle structure.
191Inotify's userspace consumers also have an inotify_device which is
192associated with the inotify_handle, and on which events are queued.
193
194Each watch is associated with an inotify_watch structure. Watches are chained
195off of each associated inotify_handle and each associated inode.
196
197See fs/notify/inotify/inotify_fsnotify.c and fs/notify/inotify/inotify_user.c
198for the locking and lifetime rules.
199
200
201(vi) Rationale
202 11
203Q: What is the design decision behind not tying the watch to the open fd of 12Q: What is the design decision behind not tying the watch to the open fd of
204 the watched object? 13 the watched object?
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt
index 7618a287aa41..28f8c08201e2 100644
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.txt
@@ -100,3 +100,7 @@ coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode
100coherency=buffered Allow concurrent O_DIRECT writes without EX lock among 100coherency=buffered Allow concurrent O_DIRECT writes without EX lock among
101 nodes, which gains high performance at risk of getting 101 nodes, which gains high performance at risk of getting
102 stale data on other nodes. 102 stale data on other nodes.
103journal_async_commit Commit block can be written to disk without waiting
104 for descriptor blocks. If enabled older kernels cannot
105 mount the device. This will enable 'journal_checksum'
106 internally.
diff --git a/Documentation/vm/remap_file_pages.txt b/Documentation/vm/remap_file_pages.txt
index 560e4363a55d..f609142f406a 100644
--- a/Documentation/vm/remap_file_pages.txt
+++ b/Documentation/vm/remap_file_pages.txt
@@ -18,10 +18,9 @@ on 32-bit systems to map files bigger than can linearly fit into 32-bit
18virtual address space. This use-case is not critical anymore since 64-bit 18virtual address space. This use-case is not critical anymore since 64-bit
19systems are widely available. 19systems are widely available.
20 20
21The plan is to deprecate the syscall and replace it with an emulation. 21The syscall is deprecated and replaced it with an emulation now. The
22The emulation will create new VMAs instead of nonlinear mappings. It's 22emulation creates new VMAs instead of nonlinear mappings. It's going to
23going to work slower for rare users of remap_file_pages() but ABI is 23work slower for rare users of remap_file_pages() but ABI is preserved.
24preserved.
25 24
26One side effect of emulation (apart from performance) is that user can hit 25One side effect of emulation (apart from performance) is that user can hit
27vm.max_map_count limit more easily due to additional VMAs. See comment for 26vm.max_map_count limit more easily due to additional VMAs. See comment for
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index d8f9b7e89234..fce22cf88ee9 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -73,7 +73,6 @@ struct vm_area_struct;
73/* .. and these are ours ... */ 73/* .. and these are ours ... */
74#define _PAGE_DIRTY 0x20000 74#define _PAGE_DIRTY 0x20000
75#define _PAGE_ACCESSED 0x40000 75#define _PAGE_ACCESSED 0x40000
76#define _PAGE_FILE 0x80000 /* set:pagecache, unset:swap */
77 76
78/* 77/*
79 * NOTE! The "accessed" bit isn't necessarily exact: it can be kept exactly 78 * NOTE! The "accessed" bit isn't necessarily exact: it can be kept exactly
@@ -268,7 +267,6 @@ extern inline void pgd_clear(pgd_t * pgdp) { pgd_val(*pgdp) = 0; }
268extern inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_FOW); } 267extern inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_FOW); }
269extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } 268extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
270extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } 269extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
271extern inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
272extern inline int pte_special(pte_t pte) { return 0; } 270extern inline int pte_special(pte_t pte) { return 0; }
273 271
274extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; } 272extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_FOW; return pte; }
@@ -345,11 +343,6 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
345#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 343#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
346#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 344#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
347 345
348#define pte_to_pgoff(pte) (pte_val(pte) >> 32)
349#define pgoff_to_pte(off) ((pte_t) { ((off) << 32) | _PAGE_FILE })
350
351#define PTE_FILE_MAX_BITS 32
352
353#ifndef CONFIG_DISCONTIGMEM 346#ifndef CONFIG_DISCONTIGMEM
354#define kern_addr_valid(addr) (1) 347#define kern_addr_valid(addr) (1)
355#endif 348#endif
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 6b0b7f7ef783..bdc8ccaf390d 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -61,7 +61,6 @@
61#define _PAGE_WRITE (1<<4) /* Page has user write perm (H) */ 61#define _PAGE_WRITE (1<<4) /* Page has user write perm (H) */
62#define _PAGE_READ (1<<5) /* Page has user read perm (H) */ 62#define _PAGE_READ (1<<5) /* Page has user read perm (H) */
63#define _PAGE_MODIFIED (1<<6) /* Page modified (dirty) (S) */ 63#define _PAGE_MODIFIED (1<<6) /* Page modified (dirty) (S) */
64#define _PAGE_FILE (1<<7) /* page cache/ swap (S) */
65#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ 64#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */
66#define _PAGE_PRESENT (1<<10) /* TLB entry is valid (H) */ 65#define _PAGE_PRESENT (1<<10) /* TLB entry is valid (H) */
67 66
@@ -73,7 +72,6 @@
73#define _PAGE_READ (1<<3) /* Page has user read perm (H) */ 72#define _PAGE_READ (1<<3) /* Page has user read perm (H) */
74#define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */ 73#define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */
75#define _PAGE_MODIFIED (1<<5) /* Page modified (dirty) (S) */ 74#define _PAGE_MODIFIED (1<<5) /* Page modified (dirty) (S) */
76#define _PAGE_FILE (1<<6) /* page cache/ swap (S) */
77#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ 75#define _PAGE_GLOBAL (1<<8) /* Page is global (H) */
78#define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */ 76#define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */
79#define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr 77#define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr
@@ -268,15 +266,6 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
268 pte; \ 266 pte; \
269}) 267})
270 268
271/* TBD: Non linear mapping stuff */
272static inline int pte_file(pte_t pte)
273{
274 return pte_val(pte) & _PAGE_FILE;
275}
276
277#define PTE_FILE_MAX_BITS 30
278#define pgoff_to_pte(x) __pte(x)
279#define pte_to_pgoff(x) (pte_val(x) >> 2)
280#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) 269#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
281#define pfn_pte(pfn, prot) (__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) 270#define pfn_pte(pfn, prot) (__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)))
282#define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 271#define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
@@ -364,7 +353,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
364 353
365/* Encode swap {type,off} tuple into PTE 354/* Encode swap {type,off} tuple into PTE
366 * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that 355 * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
367 * both PAGE_FILE and PAGE_PRESENT are zero in a PTE holding swap "identifier" 356 * PAGE_PRESENT is zero in a PTE holding swap "identifier"
368 */ 357 */
369#define __swp_entry(type, off) ((swp_entry_t) { \ 358#define __swp_entry(type, off) ((swp_entry_t) { \
370 ((type) & 0x1f) | ((off) << 13) }) 359 ((type) & 0x1f) | ((off) << 13) })
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index f0279411847d..bcc5e300413f 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -118,7 +118,6 @@
118#define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ 118#define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */
119#define L_PTE_PRESENT (_AT(pteval_t, 1) << 0) 119#define L_PTE_PRESENT (_AT(pteval_t, 1) << 0)
120#define L_PTE_YOUNG (_AT(pteval_t, 1) << 1) 120#define L_PTE_YOUNG (_AT(pteval_t, 1) << 1)
121#define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */
122#define L_PTE_DIRTY (_AT(pteval_t, 1) << 6) 121#define L_PTE_DIRTY (_AT(pteval_t, 1) << 6)
123#define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) 122#define L_PTE_RDONLY (_AT(pteval_t, 1) << 7)
124#define L_PTE_USER (_AT(pteval_t, 1) << 8) 123#define L_PTE_USER (_AT(pteval_t, 1) << 8)
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index a31ecdad4b59..18dbc82f85e5 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -77,7 +77,6 @@
77 */ 77 */
78#define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */ 78#define L_PTE_VALID (_AT(pteval_t, 1) << 0) /* Valid */
79#define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Present */ 79#define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Present */
80#define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */
81#define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ 80#define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
82#define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ 81#define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
83#define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */ 82#define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */
diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
index 0642228ff785..c35e53ee6663 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h
@@ -54,8 +54,6 @@
54 54
55typedef pte_t *pte_addr_t; 55typedef pte_t *pte_addr_t;
56 56
57static inline int pte_file(pte_t pte) { return 0; }
58
59/* 57/*
60 * ZERO_PAGE is a global shared page that is always zero: used 58 * ZERO_PAGE is a global shared page that is always zero: used
61 * for zero-mapped memory areas etc.. 59 * for zero-mapped memory areas etc..
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index d5cac545ba33..f40354198bad 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -318,12 +318,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
318 * 318 *
319 * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 319 * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
320 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 320 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
321 * <--------------- offset ----------------------> < type -> 0 0 0 321 * <--------------- offset ------------------------> < type -> 0 0
322 * 322 *
323 * This gives us up to 31 swap files and 64GB per swap file. Note that 323 * This gives us up to 31 swap files and 128GB per swap file. Note that
324 * the offset field is always non-zero. 324 * the offset field is always non-zero.
325 */ 325 */
326#define __SWP_TYPE_SHIFT 3 326#define __SWP_TYPE_SHIFT 2
327#define __SWP_TYPE_BITS 5 327#define __SWP_TYPE_BITS 5
328#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) 328#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
329#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) 329#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
@@ -342,20 +342,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
342 */ 342 */
343#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) 343#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
344 344
345/*
346 * Encode and decode a file entry. File entries are stored in the Linux
347 * page tables as follows:
348 *
349 * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
350 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
351 * <----------------------- offset ------------------------> 1 0 0
352 */
353#define pte_file(pte) (pte_val(pte) & L_PTE_FILE)
354#define pte_to_pgoff(x) (pte_val(x) >> 3)
355#define pgoff_to_pte(x) __pte(((x) << 3) | L_PTE_FILE)
356
357#define PTE_FILE_MAX_BITS 29
358
359/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ 345/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
360/* FIXME: this is not correct */ 346/* FIXME: this is not correct */
361#define kern_addr_valid(addr) (1) 347#define kern_addr_valid(addr) (1)
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index ba1196c968d8..082b9f2f7e90 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -98,7 +98,7 @@
98#endif 98#endif
99#if !defined (CONFIG_ARM_LPAE) && \ 99#if !defined (CONFIG_ARM_LPAE) && \
100 (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ 100 (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\
101 L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED 101 L_PTE_PRESENT) > L_PTE_SHARED
102#error Invalid Linux PTE bit settings 102#error Invalid Linux PTE bit settings
103#endif 103#endif
104#endif /* CONFIG_MMU */ 104#endif /* CONFIG_MMU */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 210d632aa5ad..4c445057169d 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -25,7 +25,6 @@
25 * Software defined PTE bits definition. 25 * Software defined PTE bits definition.
26 */ 26 */
27#define PTE_VALID (_AT(pteval_t, 1) << 0) 27#define PTE_VALID (_AT(pteval_t, 1) << 0)
28#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */
29#define PTE_DIRTY (_AT(pteval_t, 1) << 55) 28#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
30#define PTE_SPECIAL (_AT(pteval_t, 1) << 56) 29#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
31#define PTE_WRITE (_AT(pteval_t, 1) << 57) 30#define PTE_WRITE (_AT(pteval_t, 1) << 57)
@@ -469,13 +468,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
469/* 468/*
470 * Encode and decode a swap entry: 469 * Encode and decode a swap entry:
471 * bits 0-1: present (must be zero) 470 * bits 0-1: present (must be zero)
472 * bit 2: PTE_FILE 471 * bits 2-7: swap type
473 * bits 3-8: swap type 472 * bits 8-57: swap offset
474 * bits 9-57: swap offset
475 */ 473 */
476#define __SWP_TYPE_SHIFT 3 474#define __SWP_TYPE_SHIFT 2
477#define __SWP_TYPE_BITS 6 475#define __SWP_TYPE_BITS 6
478#define __SWP_OFFSET_BITS 49 476#define __SWP_OFFSET_BITS 50
479#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) 477#define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1)
480#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) 478#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
481#define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) 479#define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1)
@@ -493,18 +491,6 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
493 */ 491 */
494#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) 492#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
495 493
496/*
497 * Encode and decode a file entry:
498 * bits 0-1: present (must be zero)
499 * bit 2: PTE_FILE
500 * bits 3-57: file offset / PAGE_SIZE
501 */
502#define pte_file(pte) (pte_val(pte) & PTE_FILE)
503#define pte_to_pgoff(x) (pte_val(x) >> 3)
504#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE)
505
506#define PTE_FILE_MAX_BITS 55
507
508extern int kern_addr_valid(unsigned long addr); 494extern int kern_addr_valid(unsigned long addr);
509 495
510#include <asm-generic/pgtable.h> 496#include <asm-generic/pgtable.h>
diff --git a/arch/avr32/include/asm/pgtable.h b/arch/avr32/include/asm/pgtable.h
index 4beff97e2033..ac7a817e2126 100644
--- a/arch/avr32/include/asm/pgtable.h
+++ b/arch/avr32/include/asm/pgtable.h
@@ -86,9 +86,6 @@ extern struct page *empty_zero_page;
86#define _PAGE_BIT_PRESENT 10 86#define _PAGE_BIT_PRESENT 10
87#define _PAGE_BIT_ACCESSED 11 /* software: page was accessed */ 87#define _PAGE_BIT_ACCESSED 11 /* software: page was accessed */
88 88
89/* The following flags are only valid when !PRESENT */
90#define _PAGE_BIT_FILE 0 /* software: pagecache or swap? */
91
92#define _PAGE_WT (1 << _PAGE_BIT_WT) 89#define _PAGE_WT (1 << _PAGE_BIT_WT)
93#define _PAGE_DIRTY (1 << _PAGE_BIT_DIRTY) 90#define _PAGE_DIRTY (1 << _PAGE_BIT_DIRTY)
94#define _PAGE_EXECUTE (1 << _PAGE_BIT_EXECUTE) 91#define _PAGE_EXECUTE (1 << _PAGE_BIT_EXECUTE)
@@ -101,7 +98,6 @@ extern struct page *empty_zero_page;
101/* Software flags */ 98/* Software flags */
102#define _PAGE_ACCESSED (1 << _PAGE_BIT_ACCESSED) 99#define _PAGE_ACCESSED (1 << _PAGE_BIT_ACCESSED)
103#define _PAGE_PRESENT (1 << _PAGE_BIT_PRESENT) 100#define _PAGE_PRESENT (1 << _PAGE_BIT_PRESENT)
104#define _PAGE_FILE (1 << _PAGE_BIT_FILE)
105 101
106/* 102/*
107 * Page types, i.e. sizes. _PAGE_TYPE_NONE corresponds to what is 103 * Page types, i.e. sizes. _PAGE_TYPE_NONE corresponds to what is
@@ -210,14 +206,6 @@ static inline int pte_special(pte_t pte)
210 return 0; 206 return 0;
211} 207}
212 208
213/*
214 * The following only work if pte_present() is not true.
215 */
216static inline int pte_file(pte_t pte)
217{
218 return pte_val(pte) & _PAGE_FILE;
219}
220
221/* Mutator functions for PTE bits */ 209/* Mutator functions for PTE bits */
222static inline pte_t pte_wrprotect(pte_t pte) 210static inline pte_t pte_wrprotect(pte_t pte)
223{ 211{
@@ -329,7 +317,6 @@ extern void update_mmu_cache(struct vm_area_struct * vma,
329 * Encode and decode a swap entry 317 * Encode and decode a swap entry
330 * 318 *
331 * Constraints: 319 * Constraints:
332 * _PAGE_FILE at bit 0
333 * _PAGE_TYPE_* at bits 2-3 (for emulating _PAGE_PROTNONE) 320 * _PAGE_TYPE_* at bits 2-3 (for emulating _PAGE_PROTNONE)
334 * _PAGE_PRESENT at bit 10 321 * _PAGE_PRESENT at bit 10
335 * 322 *
@@ -346,18 +333,6 @@ extern void update_mmu_cache(struct vm_area_struct * vma,
346#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 333#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
347#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 334#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
348 335
349/*
350 * Encode and decode a nonlinear file mapping entry. We have to
351 * preserve _PAGE_FILE and _PAGE_PRESENT here. _PAGE_TYPE_* isn't
352 * necessary, since _PAGE_FILE implies !_PAGE_PROTNONE (?)
353 */
354#define PTE_FILE_MAX_BITS 30
355#define pte_to_pgoff(pte) (((pte_val(pte) >> 1) & 0x1ff) \
356 | ((pte_val(pte) >> 11) << 9))
357#define pgoff_to_pte(off) ((pte_t) { ((((off) & 0x1ff) << 1) \
358 | (((off) >> 9) << 11) \
359 | _PAGE_FILE) })
360
361typedef pte_t *pte_addr_t; 336typedef pte_t *pte_addr_t;
362 337
363#define kern_addr_valid(addr) (1) 338#define kern_addr_valid(addr) (1)
diff --git a/arch/blackfin/include/asm/pgtable.h b/arch/blackfin/include/asm/pgtable.h
index 0b049019eba7..b88a1558b0b9 100644
--- a/arch/blackfin/include/asm/pgtable.h
+++ b/arch/blackfin/include/asm/pgtable.h
@@ -45,11 +45,6 @@ extern void paging_init(void);
45#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 45#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
46#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 46#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
47 47
48static inline int pte_file(pte_t pte)
49{
50 return 0;
51}
52
53#define set_pte(pteptr, pteval) (*(pteptr) = pteval) 48#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
54#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval) 49#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
55 50
diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h
index c0eed5b18860..78d4483ba40c 100644
--- a/arch/c6x/include/asm/pgtable.h
+++ b/arch/c6x/include/asm/pgtable.h
@@ -50,11 +50,6 @@ extern void paging_init(void);
50#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 50#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
51#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 51#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
52 52
53static inline int pte_file(pte_t pte)
54{
55 return 0;
56}
57
58#define set_pte(pteptr, pteval) (*(pteptr) = pteval) 53#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
59#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval) 54#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
60 55
diff --git a/arch/cris/include/arch-v10/arch/mmu.h b/arch/cris/include/arch-v10/arch/mmu.h
index e829e5a37bbe..47a5dd21749d 100644
--- a/arch/cris/include/arch-v10/arch/mmu.h
+++ b/arch/cris/include/arch-v10/arch/mmu.h
@@ -58,7 +58,6 @@ typedef struct
58/* Bits the HW doesn't care about but the kernel uses them in SW */ 58/* Bits the HW doesn't care about but the kernel uses them in SW */
59 59
60#define _PAGE_PRESENT (1<<4) /* page present in memory */ 60#define _PAGE_PRESENT (1<<4) /* page present in memory */
61#define _PAGE_FILE (1<<5) /* set: pagecache, unset: swap (when !PRESENT) */
62#define _PAGE_ACCESSED (1<<5) /* simulated in software using valid bit */ 61#define _PAGE_ACCESSED (1<<5) /* simulated in software using valid bit */
63#define _PAGE_MODIFIED (1<<6) /* simulated in software using we bit */ 62#define _PAGE_MODIFIED (1<<6) /* simulated in software using we bit */
64#define _PAGE_READ (1<<7) /* read-enabled */ 63#define _PAGE_READ (1<<7) /* read-enabled */
@@ -105,6 +104,4 @@ typedef struct
105#define __S110 PAGE_SHARED 104#define __S110 PAGE_SHARED
106#define __S111 PAGE_SHARED 105#define __S111 PAGE_SHARED
107 106
108#define PTE_FILE_MAX_BITS 26
109
110#endif 107#endif
diff --git a/arch/cris/include/arch-v32/arch/mmu.h b/arch/cris/include/arch-v32/arch/mmu.h
index c1a13e05e963..e6db1616dee5 100644
--- a/arch/cris/include/arch-v32/arch/mmu.h
+++ b/arch/cris/include/arch-v32/arch/mmu.h
@@ -53,7 +53,6 @@ typedef struct
53 * software. 53 * software.
54 */ 54 */
55#define _PAGE_PRESENT (1 << 5) /* Page is present in memory. */ 55#define _PAGE_PRESENT (1 << 5) /* Page is present in memory. */
56#define _PAGE_FILE (1 << 6) /* 1=pagecache, 0=swap (when !present) */
57#define _PAGE_ACCESSED (1 << 6) /* Simulated in software using valid bit. */ 56#define _PAGE_ACCESSED (1 << 6) /* Simulated in software using valid bit. */
58#define _PAGE_MODIFIED (1 << 7) /* Simulated in software using we bit. */ 57#define _PAGE_MODIFIED (1 << 7) /* Simulated in software using we bit. */
59#define _PAGE_READ (1 << 8) /* Read enabled. */ 58#define _PAGE_READ (1 << 8) /* Read enabled. */
@@ -108,6 +107,4 @@ typedef struct
108#define __S110 PAGE_SHARED_EXEC 107#define __S110 PAGE_SHARED_EXEC
109#define __S111 PAGE_SHARED_EXEC 108#define __S111 PAGE_SHARED_EXEC
110 109
111#define PTE_FILE_MAX_BITS 25
112
113#endif /* _ASM_CRIS_ARCH_MMU_H */ 110#endif /* _ASM_CRIS_ARCH_MMU_H */
diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h
index 8b8c86793225..e824257971c4 100644
--- a/arch/cris/include/asm/pgtable.h
+++ b/arch/cris/include/asm/pgtable.h
@@ -114,7 +114,6 @@ extern unsigned long empty_zero_page;
114static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } 114static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
115static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } 115static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; }
116static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } 116static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
117static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
118static inline int pte_special(pte_t pte) { return 0; } 117static inline int pte_special(pte_t pte) { return 0; }
119 118
120static inline pte_t pte_wrprotect(pte_t pte) 119static inline pte_t pte_wrprotect(pte_t pte)
@@ -290,9 +289,6 @@ static inline void update_mmu_cache(struct vm_area_struct * vma,
290 */ 289 */
291#define pgtable_cache_init() do { } while (0) 290#define pgtable_cache_init() do { } while (0)
292 291
293#define pte_to_pgoff(x) (pte_val(x) >> 6)
294#define pgoff_to_pte(x) __pte(((x) << 6) | _PAGE_FILE)
295
296typedef pte_t *pte_addr_t; 292typedef pte_t *pte_addr_t;
297 293
298#endif /* __ASSEMBLY__ */ 294#endif /* __ASSEMBLY__ */
diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h
index eb0110acd19b..c49699d5902d 100644
--- a/arch/frv/include/asm/pgtable.h
+++ b/arch/frv/include/asm/pgtable.h
@@ -62,10 +62,6 @@ typedef pte_t *pte_addr_t;
62#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 62#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
63#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 63#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
64 64
65#ifndef __ASSEMBLY__
66static inline int pte_file(pte_t pte) { return 0; }
67#endif
68
69#define ZERO_PAGE(vaddr) ({ BUG(); NULL; }) 65#define ZERO_PAGE(vaddr) ({ BUG(); NULL; })
70 66
71#define swapper_pg_dir ((pgd_t *) NULL) 67#define swapper_pg_dir ((pgd_t *) NULL)
@@ -298,7 +294,6 @@ static inline pmd_t *pmd_offset(pud_t *dir, unsigned long address)
298 294
299#define _PAGE_RESERVED_MASK (xAMPRx_RESERVED8 | xAMPRx_RESERVED13) 295#define _PAGE_RESERVED_MASK (xAMPRx_RESERVED8 | xAMPRx_RESERVED13)
300 296
301#define _PAGE_FILE 0x002 /* set:pagecache unset:swap */
302#define _PAGE_PROTNONE 0x000 /* If not present */ 297#define _PAGE_PROTNONE 0x000 /* If not present */
303 298
304#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) 299#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -463,27 +458,15 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
463 * Handle swap and file entries 458 * Handle swap and file entries
464 * - the PTE is encoded in the following format: 459 * - the PTE is encoded in the following format:
465 * bit 0: Must be 0 (!_PAGE_PRESENT) 460 * bit 0: Must be 0 (!_PAGE_PRESENT)
466 * bit 1: Type: 0 for swap, 1 for file (_PAGE_FILE) 461 * bits 1-6: Swap type
467 * bits 2-7: Swap type 462 * bits 7-31: Swap offset
468 * bits 8-31: Swap offset
469 * bits 2-31: File pgoff
470 */ 463 */
471#define __swp_type(x) (((x).val >> 2) & 0x1f) 464#define __swp_type(x) (((x).val >> 1) & 0x1f)
472#define __swp_offset(x) ((x).val >> 8) 465#define __swp_offset(x) ((x).val >> 7)
473#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 2) | ((offset) << 8) }) 466#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 7) })
474#define __pte_to_swp_entry(_pte) ((swp_entry_t) { (_pte).pte }) 467#define __pte_to_swp_entry(_pte) ((swp_entry_t) { (_pte).pte })
475#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 468#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
476 469
477static inline int pte_file(pte_t pte)
478{
479 return pte.pte & _PAGE_FILE;
480}
481
482#define PTE_FILE_MAX_BITS 29
483
484#define pte_to_pgoff(PTE) ((PTE).pte >> 2)
485#define pgoff_to_pte(off) __pte((off) << 2 | _PAGE_FILE)
486
487/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ 470/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
488#define PageSkip(page) (0) 471#define PageSkip(page) (0)
489#define kern_addr_valid(addr) (1) 472#define kern_addr_valid(addr) (1)
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index d8bd54fa431e..6e35e71d2aea 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -62,13 +62,6 @@ extern unsigned long zero_page_mask;
62#define _PAGE_ACCESSED (1<<2) 62#define _PAGE_ACCESSED (1<<2)
63 63
64/* 64/*
65 * _PAGE_FILE is only meaningful if _PAGE_PRESENT is false, while
66 * _PAGE_DIRTY is only meaningful if _PAGE_PRESENT is true.
67 * So we can overload the bit...
68 */
69#define _PAGE_FILE _PAGE_DIRTY /* set: pagecache, unset = swap */
70
71/*
72 * For now, let's say that Valid and Present are the same thing. 65 * For now, let's say that Valid and Present are the same thing.
73 * Alternatively, we could say that it's the "or" of R, W, and X 66 * Alternatively, we could say that it's the "or" of R, W, and X
74 * permissions. 67 * permissions.
@@ -456,57 +449,36 @@ static inline int pte_exec(pte_t pte)
456#define pgtable_cache_init() do { } while (0) 449#define pgtable_cache_init() do { } while (0)
457 450
458/* 451/*
459 * Swap/file PTE definitions. If _PAGE_PRESENT is zero, the rest of the 452 * Swap/file PTE definitions. If _PAGE_PRESENT is zero, the rest of the PTE is
460 * PTE is interpreted as swap information. Depending on the _PAGE_FILE 453 * interpreted as swap information. The remaining free bits are interpreted as
461 * bit, the remaining free bits are eitehr interpreted as a file offset 454 * swap type/offset tuple. Rather than have the TLB fill handler test
462 * or a swap type/offset tuple. Rather than have the TLB fill handler 455 * _PAGE_PRESENT, we're going to reserve the permissions bits and set them to
463 * test _PAGE_PRESENT, we're going to reserve the permissions bits 456 * all zeros for swap entries, which speeds up the miss handler at the cost of
464 * and set them to all zeros for swap entries, which speeds up the 457 * 3 bits of offset. That trade-off can be revisited if necessary, but Hexagon
465 * miss handler at the cost of 3 bits of offset. That trade-off can 458 * processor architecture and target applications suggest a lot of TLB misses
466 * be revisited if necessary, but Hexagon processor architecture and 459 * and not much swap space.
467 * target applications suggest a lot of TLB misses and not much swap space.
468 * 460 *
469 * Format of swap PTE: 461 * Format of swap PTE:
470 * bit 0: Present (zero) 462 * bit 0: Present (zero)
471 * bit 1: _PAGE_FILE (zero) 463 * bits 1-5: swap type (arch independent layer uses 5 bits max)
472 * bits 2-6: swap type (arch independent layer uses 5 bits max) 464 * bits 6-9: bits 3:0 of offset
473 * bits 7-9: bits 2:0 of offset 465 * bits 10-12: effectively _PAGE_PROTNONE (all zero)
474 * bits 10-12: effectively _PAGE_PROTNONE (all zero) 466 * bits 13-31: bits 22:4 of swap offset
475 * bits 13-31: bits 21:3 of swap offset
476 *
477 * Format of file PTE:
478 * bit 0: Present (zero)
479 * bit 1: _PAGE_FILE (zero)
480 * bits 2-9: bits 7:0 of offset
481 * bits 10-12: effectively _PAGE_PROTNONE (all zero)
482 * bits 13-31: bits 26:8 of swap offset
483 * 467 *
484 * The split offset makes some of the following macros a little gnarly, 468 * The split offset makes some of the following macros a little gnarly,
485 * but there's plenty of precedent for this sort of thing. 469 * but there's plenty of precedent for this sort of thing.
486 */ 470 */
487#define PTE_FILE_MAX_BITS 27
488 471
489/* Used for swap PTEs */ 472/* Used for swap PTEs */
490#define __swp_type(swp_pte) (((swp_pte).val >> 2) & 0x1f) 473#define __swp_type(swp_pte) (((swp_pte).val >> 1) & 0x1f)
491 474
492#define __swp_offset(swp_pte) \ 475#define __swp_offset(swp_pte) \
493 ((((swp_pte).val >> 7) & 0x7) | (((swp_pte).val >> 10) & 0x003ffff8)) 476 ((((swp_pte).val >> 6) & 0xf) | (((swp_pte).val >> 9) & 0x7ffff0))
494 477
495#define __swp_entry(type, offset) \ 478#define __swp_entry(type, offset) \
496 ((swp_entry_t) { \ 479 ((swp_entry_t) { \
497 ((type << 2) | \ 480 ((type << 1) | \
498 ((offset & 0x3ffff8) << 10) | ((offset & 0x7) << 7)) }) 481 ((offset & 0x7ffff0) << 9) | ((offset & 0xf) << 6)) })
499
500/* Used for file PTEs */
501#define pte_file(pte) \
502 ((pte_val(pte) & (_PAGE_FILE | _PAGE_PRESENT)) == _PAGE_FILE)
503
504#define pte_to_pgoff(pte) \
505 (((pte_val(pte) >> 2) & 0xff) | ((pte_val(pte) >> 5) & 0x07ffff00))
506
507#define pgoff_to_pte(off) \
508 ((pte_t) { ((((off) & 0x7ffff00) << 5) | (((off) & 0xff) << 2)\
509 | _PAGE_FILE) })
510 482
511/* Oh boy. There are a lot of possible arch overrides found in this file. */ 483/* Oh boy. There are a lot of possible arch overrides found in this file. */
512#include <asm-generic/pgtable.h> 484#include <asm-generic/pgtable.h>
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 7935115398a6..2f07bb3dda91 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -57,9 +57,6 @@
57#define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */ 57#define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */
58#define _PAGE_PROTNONE (__IA64_UL(1) << 63) 58#define _PAGE_PROTNONE (__IA64_UL(1) << 63)
59 59
60/* Valid only for a PTE with the present bit cleared: */
61#define _PAGE_FILE (1 << 1) /* see swap & file pte remarks below */
62
63#define _PFN_MASK _PAGE_PPN_MASK 60#define _PFN_MASK _PAGE_PPN_MASK
64/* Mask of bits which may be changed by pte_modify(); the odd bits are there for _PAGE_PROTNONE */ 61/* Mask of bits which may be changed by pte_modify(); the odd bits are there for _PAGE_PROTNONE */
65#define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | _PAGE_AR_MASK | _PAGE_ED) 62#define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | _PAGE_AR_MASK | _PAGE_ED)
@@ -300,7 +297,6 @@ extern unsigned long VMALLOC_END;
300#define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0) 297#define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0)
301#define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) 298#define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0)
302#define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) 299#define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0)
303#define pte_file(pte) ((pte_val(pte) & _PAGE_FILE) != 0)
304#define pte_special(pte) 0 300#define pte_special(pte) 0
305 301
306/* 302/*
@@ -472,27 +468,16 @@ extern void paging_init (void);
472 * 468 *
473 * Format of swap pte: 469 * Format of swap pte:
474 * bit 0 : present bit (must be zero) 470 * bit 0 : present bit (must be zero)
475 * bit 1 : _PAGE_FILE (must be zero) 471 * bits 1- 7: swap-type
476 * bits 2- 8: swap-type 472 * bits 8-62: swap offset
477 * bits 9-62: swap offset
478 * bit 63 : _PAGE_PROTNONE bit
479 *
480 * Format of file pte:
481 * bit 0 : present bit (must be zero)
482 * bit 1 : _PAGE_FILE (must be one)
483 * bits 2-62: file_offset/PAGE_SIZE
484 * bit 63 : _PAGE_PROTNONE bit 473 * bit 63 : _PAGE_PROTNONE bit
485 */ 474 */
486#define __swp_type(entry) (((entry).val >> 2) & 0x7f) 475#define __swp_type(entry) (((entry).val >> 1) & 0x7f)
487#define __swp_offset(entry) (((entry).val << 1) >> 10) 476#define __swp_offset(entry) (((entry).val << 1) >> 9)
488#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 2) | ((long) (offset) << 9) }) 477#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 1) | ((long) (offset) << 8) })
489#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 478#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
490#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 479#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
491 480
492#define PTE_FILE_MAX_BITS 61
493#define pte_to_pgoff(pte) ((pte_val(pte) << 1) >> 3)
494#define pgoff_to_pte(off) ((pte_t) { ((off) << 2) | _PAGE_FILE })
495
496/* 481/*
497 * ZERO_PAGE is a global shared page that is always zero: used 482 * ZERO_PAGE is a global shared page that is always zero: used
498 * for zero-mapped memory areas etc.. 483 * for zero-mapped memory areas etc..
diff --git a/arch/m32r/include/asm/pgtable-2level.h b/arch/m32r/include/asm/pgtable-2level.h
index 9cdaf7350ef6..8fd8ee70266a 100644
--- a/arch/m32r/include/asm/pgtable-2level.h
+++ b/arch/m32r/include/asm/pgtable-2level.h
@@ -70,9 +70,5 @@ static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address)
70#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) 70#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
71#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) 71#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
72 72
73#define PTE_FILE_MAX_BITS 29
74#define pte_to_pgoff(pte) (((pte_val(pte) >> 2) & 0x7f) | (((pte_val(pte) >> 10)) << 7))
75#define pgoff_to_pte(off) ((pte_t) { (((off) & 0x7f) << 2) | (((off) >> 7) << 10) | _PAGE_FILE })
76
77#endif /* __KERNEL__ */ 73#endif /* __KERNEL__ */
78#endif /* _ASM_M32R_PGTABLE_2LEVEL_H */ 74#endif /* _ASM_M32R_PGTABLE_2LEVEL_H */
diff --git a/arch/m32r/include/asm/pgtable.h b/arch/m32r/include/asm/pgtable.h
index 103ce6710f07..050f7a686e3d 100644
--- a/arch/m32r/include/asm/pgtable.h
+++ b/arch/m32r/include/asm/pgtable.h
@@ -80,8 +80,6 @@ extern unsigned long empty_zero_page[1024];
80 */ 80 */
81 81
82#define _PAGE_BIT_DIRTY 0 /* software: page changed */ 82#define _PAGE_BIT_DIRTY 0 /* software: page changed */
83#define _PAGE_BIT_FILE 0 /* when !present: nonlinear file
84 mapping */
85#define _PAGE_BIT_PRESENT 1 /* Valid: page is valid */ 83#define _PAGE_BIT_PRESENT 1 /* Valid: page is valid */
86#define _PAGE_BIT_GLOBAL 2 /* Global */ 84#define _PAGE_BIT_GLOBAL 2 /* Global */
87#define _PAGE_BIT_LARGE 3 /* Large */ 85#define _PAGE_BIT_LARGE 3 /* Large */
@@ -93,7 +91,6 @@ extern unsigned long empty_zero_page[1024];
93#define _PAGE_BIT_PROTNONE 9 /* software: if not present */ 91#define _PAGE_BIT_PROTNONE 9 /* software: if not present */
94 92
95#define _PAGE_DIRTY (1UL << _PAGE_BIT_DIRTY) 93#define _PAGE_DIRTY (1UL << _PAGE_BIT_DIRTY)
96#define _PAGE_FILE (1UL << _PAGE_BIT_FILE)
97#define _PAGE_PRESENT (1UL << _PAGE_BIT_PRESENT) 94#define _PAGE_PRESENT (1UL << _PAGE_BIT_PRESENT)
98#define _PAGE_GLOBAL (1UL << _PAGE_BIT_GLOBAL) 95#define _PAGE_GLOBAL (1UL << _PAGE_BIT_GLOBAL)
99#define _PAGE_LARGE (1UL << _PAGE_BIT_LARGE) 96#define _PAGE_LARGE (1UL << _PAGE_BIT_LARGE)
@@ -206,14 +203,6 @@ static inline int pte_write(pte_t pte)
206 return pte_val(pte) & _PAGE_WRITE; 203 return pte_val(pte) & _PAGE_WRITE;
207} 204}
208 205
209/*
210 * The following only works if pte_present() is not true.
211 */
212static inline int pte_file(pte_t pte)
213{
214 return pte_val(pte) & _PAGE_FILE;
215}
216
217static inline int pte_special(pte_t pte) 206static inline int pte_special(pte_t pte)
218{ 207{
219 return 0; 208 return 0;
diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h
index 3c793682e5d9..2500ce04fcc4 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -35,7 +35,6 @@
35 * hitting hardware. 35 * hitting hardware.
36 */ 36 */
37#define CF_PAGE_DIRTY 0x00000001 37#define CF_PAGE_DIRTY 0x00000001
38#define CF_PAGE_FILE 0x00000200
39#define CF_PAGE_ACCESSED 0x00001000 38#define CF_PAGE_ACCESSED 0x00001000
40 39
41#define _PAGE_CACHE040 0x020 /* 68040 cache mode, cachable, copyback */ 40#define _PAGE_CACHE040 0x020 /* 68040 cache mode, cachable, copyback */
@@ -243,11 +242,6 @@ static inline int pte_young(pte_t pte)
243 return pte_val(pte) & CF_PAGE_ACCESSED; 242 return pte_val(pte) & CF_PAGE_ACCESSED;
244} 243}
245 244
246static inline int pte_file(pte_t pte)
247{
248 return pte_val(pte) & CF_PAGE_FILE;
249}
250
251static inline int pte_special(pte_t pte) 245static inline int pte_special(pte_t pte)
252{ 246{
253 return 0; 247 return 0;
@@ -391,26 +385,13 @@ static inline void cache_page(void *vaddr)
391 *ptep = pte_mkcache(*ptep); 385 *ptep = pte_mkcache(*ptep);
392} 386}
393 387
394#define PTE_FILE_MAX_BITS 21
395#define PTE_FILE_SHIFT 11
396
397static inline unsigned long pte_to_pgoff(pte_t pte)
398{
399 return pte_val(pte) >> PTE_FILE_SHIFT;
400}
401
402static inline pte_t pgoff_to_pte(unsigned pgoff)
403{
404 return __pte((pgoff << PTE_FILE_SHIFT) + CF_PAGE_FILE);
405}
406
407/* 388/*
408 * Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) 389 * Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e))
409 */ 390 */
410#define __swp_type(x) ((x).val & 0xFF) 391#define __swp_type(x) ((x).val & 0xFF)
411#define __swp_offset(x) ((x).val >> PTE_FILE_SHIFT) 392#define __swp_offset(x) ((x).val >> 11)
412#define __swp_entry(typ, off) ((swp_entry_t) { (typ) | \ 393#define __swp_entry(typ, off) ((swp_entry_t) { (typ) | \
413 (off << PTE_FILE_SHIFT) }) 394 (off << 11) })
414#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 395#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
415#define __swp_entry_to_pte(x) (__pte((x).val)) 396#define __swp_entry_to_pte(x) (__pte((x).val))
416 397
diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h
index e0fdd4d08075..0085aab80e5a 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -28,7 +28,6 @@
28#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_NOCACHE) 28#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_NOCACHE)
29 29
30#define _PAGE_PROTNONE 0x004 30#define _PAGE_PROTNONE 0x004
31#define _PAGE_FILE 0x008 /* pagecache or swap? */
32 31
33#ifndef __ASSEMBLY__ 32#ifndef __ASSEMBLY__
34 33
@@ -168,7 +167,6 @@ static inline void pgd_set(pgd_t *pgdp, pmd_t *pmdp)
168static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RONLY); } 167static inline int pte_write(pte_t pte) { return !(pte_val(pte) & _PAGE_RONLY); }
169static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } 168static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
170static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } 169static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
171static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
172static inline int pte_special(pte_t pte) { return 0; } 170static inline int pte_special(pte_t pte) { return 0; }
173 171
174static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RONLY; return pte; } 172static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RONLY; return pte; }
@@ -266,19 +264,6 @@ static inline void cache_page(void *vaddr)
266 } 264 }
267} 265}
268 266
269#define PTE_FILE_MAX_BITS 28
270
271static inline unsigned long pte_to_pgoff(pte_t pte)
272{
273 return pte.pte >> 4;
274}
275
276static inline pte_t pgoff_to_pte(unsigned off)
277{
278 pte_t pte = { (off << 4) + _PAGE_FILE };
279 return pte;
280}
281
282/* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */ 267/* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */
283#define __swp_type(x) (((x).val >> 4) & 0xff) 268#define __swp_type(x) (((x).val >> 4) & 0xff)
284#define __swp_offset(x) ((x).val >> 12) 269#define __swp_offset(x) ((x).val >> 12)
diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index 11859b86b1f9..ac7d87a02335 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -37,8 +37,6 @@ extern void paging_init(void);
37#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 37#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
38#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 38#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
39 39
40static inline int pte_file(pte_t pte) { return 0; }
41
42/* 40/*
43 * ZERO_PAGE is a global shared page that is always zero: used 41 * ZERO_PAGE is a global shared page that is always zero: used
44 * for zero-mapped memory areas etc.. 42 * for zero-mapped memory areas etc..
diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h
index f55aa04161e8..48657f9fdece 100644
--- a/arch/m68k/include/asm/sun3_pgtable.h
+++ b/arch/m68k/include/asm/sun3_pgtable.h
@@ -38,8 +38,6 @@
38#define _PAGE_PRESENT (SUN3_PAGE_VALID) 38#define _PAGE_PRESENT (SUN3_PAGE_VALID)
39#define _PAGE_ACCESSED (SUN3_PAGE_ACCESSED) 39#define _PAGE_ACCESSED (SUN3_PAGE_ACCESSED)
40 40
41#define PTE_FILE_MAX_BITS 28
42
43/* Compound page protection values. */ 41/* Compound page protection values. */
44//todo: work out which ones *should* have SUN3_PAGE_NOCACHE and fix... 42//todo: work out which ones *should* have SUN3_PAGE_NOCACHE and fix...
45// is it just PAGE_KERNEL and PAGE_SHARED? 43// is it just PAGE_KERNEL and PAGE_SHARED?
@@ -168,7 +166,6 @@ static inline void pgd_clear (pgd_t *pgdp) {}
168static inline int pte_write(pte_t pte) { return pte_val(pte) & SUN3_PAGE_WRITEABLE; } 166static inline int pte_write(pte_t pte) { return pte_val(pte) & SUN3_PAGE_WRITEABLE; }
169static inline int pte_dirty(pte_t pte) { return pte_val(pte) & SUN3_PAGE_MODIFIED; } 167static inline int pte_dirty(pte_t pte) { return pte_val(pte) & SUN3_PAGE_MODIFIED; }
170static inline int pte_young(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; } 168static inline int pte_young(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; }
171static inline int pte_file(pte_t pte) { return pte_val(pte) & SUN3_PAGE_ACCESSED; }
172static inline int pte_special(pte_t pte) { return 0; } 169static inline int pte_special(pte_t pte) { return 0; }
173 170
174static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; } 171static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~SUN3_PAGE_WRITEABLE; return pte; }
@@ -202,18 +199,6 @@ static inline pmd_t *pmd_offset (pgd_t *pgd, unsigned long address)
202 return (pmd_t *) pgd; 199 return (pmd_t *) pgd;
203} 200}
204 201
205static inline unsigned long pte_to_pgoff(pte_t pte)
206{
207 return pte.pte & SUN3_PAGE_PGNUM_MASK;
208}
209
210static inline pte_t pgoff_to_pte(unsigned off)
211{
212 pte_t pte = { off + SUN3_PAGE_ACCESSED };
213 return pte;
214}
215
216
217/* Find an entry in the third-level pagetable. */ 202/* Find an entry in the third-level pagetable. */
218#define pte_index(address) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) 203#define pte_index(address) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
219#define pte_offset_kernel(pmd, address) ((pte_t *) __pmd_page(*pmd) + pte_index(address)) 204#define pte_offset_kernel(pmd, address) ((pte_t *) __pmd_page(*pmd) + pte_index(address))
diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h
index 0d9dc5487296..d0604c0a8702 100644
--- a/arch/metag/include/asm/pgtable.h
+++ b/arch/metag/include/asm/pgtable.h
@@ -47,7 +47,6 @@
47 */ 47 */
48#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1 48#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1
49#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2 49#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2
50#define _PAGE_FILE _PAGE_ALWAYS_ZERO_3
51 50
52/* Pages owned, and protected by, the kernel. */ 51/* Pages owned, and protected by, the kernel. */
53#define _PAGE_KERNEL _PAGE_PRIV 52#define _PAGE_KERNEL _PAGE_PRIV
@@ -219,7 +218,6 @@ extern unsigned long empty_zero_page;
219static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } 218static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
220static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } 219static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
221static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } 220static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
222static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
223static inline int pte_special(pte_t pte) { return 0; } 221static inline int pte_special(pte_t pte) { return 0; }
224 222
225static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= (~_PAGE_WRITE); return pte; } 223static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= (~_PAGE_WRITE); return pte; }
@@ -327,10 +325,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
327#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 325#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
328#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 326#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
329 327
330#define PTE_FILE_MAX_BITS 22
331#define pte_to_pgoff(x) (pte_val(x) >> 10)
332#define pgoff_to_pte(x) __pte(((x) << 10) | _PAGE_FILE)
333
334#define kern_addr_valid(addr) (1) 328#define kern_addr_valid(addr) (1)
335 329
336/* 330/*
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index df19d0c47be8..91b9b46fbb5d 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -40,10 +40,6 @@ extern int mem_init_done;
40#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 40#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
41#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 41#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
42 42
43#ifndef __ASSEMBLY__
44static inline int pte_file(pte_t pte) { return 0; }
45#endif /* __ASSEMBLY__ */
46
47#define ZERO_PAGE(vaddr) ({ BUG(); NULL; }) 43#define ZERO_PAGE(vaddr) ({ BUG(); NULL; })
48 44
49#define swapper_pg_dir ((pgd_t *) NULL) 45#define swapper_pg_dir ((pgd_t *) NULL)
@@ -207,7 +203,6 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
207 203
208/* Definitions for MicroBlaze. */ 204/* Definitions for MicroBlaze. */
209#define _PAGE_GUARDED 0x001 /* G: page is guarded from prefetch */ 205#define _PAGE_GUARDED 0x001 /* G: page is guarded from prefetch */
210#define _PAGE_FILE 0x001 /* when !present: nonlinear file mapping */
211#define _PAGE_PRESENT 0x002 /* software: PTE contains a translation */ 206#define _PAGE_PRESENT 0x002 /* software: PTE contains a translation */
212#define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */ 207#define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */
213#define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */ 208#define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */
@@ -337,7 +332,6 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
337static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } 332static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; }
338static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } 333static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
339static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } 334static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
340static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
341 335
342static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } 336static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; }
343static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } 337static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; }
@@ -499,11 +493,6 @@ static inline pmd_t *pmd_offset(pgd_t *dir, unsigned long address)
499 493
500#define pte_unmap(pte) kunmap_atomic(pte) 494#define pte_unmap(pte) kunmap_atomic(pte)
501 495
502/* Encode and decode a nonlinear file mapping entry */
503#define PTE_FILE_MAX_BITS 29
504#define pte_to_pgoff(pte) (pte_val(pte) >> 3)
505#define pgoff_to_pte(off) ((pte_t) { ((off) << 3) | _PAGE_FILE })
506
507extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; 496extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
508 497
509/* 498/*
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index 68984b612f9d..16aa9f23e17b 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -161,22 +161,6 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
161#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 161#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
162#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 162#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
163 163
164/*
165 * Encode and decode a nonlinear file mapping entry
166 */
167#define pte_to_pgoff(_pte) ((((_pte).pte >> 1 ) & 0x07) | \
168 (((_pte).pte >> 2 ) & 0x38) | \
169 (((_pte).pte >> 10) << 6 ))
170
171#define pgoff_to_pte(off) ((pte_t) { (((off) & 0x07) << 1 ) | \
172 (((off) & 0x38) << 2 ) | \
173 (((off) >> 6 ) << 10) | \
174 _PAGE_FILE })
175
176/*
177 * Bits 0, 4, 8, and 9 are taken, split up 28 bits of offset into this range:
178 */
179#define PTE_FILE_MAX_BITS 28
180#else 164#else
181 165
182#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) 166#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
@@ -188,13 +172,6 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
188#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_high }) 172#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_high })
189#define __swp_entry_to_pte(x) ((pte_t) { 0, (x).val }) 173#define __swp_entry_to_pte(x) ((pte_t) { 0, (x).val })
190 174
191/*
192 * Bits 0 and 1 of pte_high are taken, use the rest for the page offset...
193 */
194#define pte_to_pgoff(_pte) ((_pte).pte_high >> 2)
195#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) << 2 })
196
197#define PTE_FILE_MAX_BITS 30
198#else 175#else
199/* 176/*
200 * Constraints: 177 * Constraints:
@@ -209,19 +186,6 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
209#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 186#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
210#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 187#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
211 188
212/*
213 * Encode and decode a nonlinear file mapping entry
214 */
215#define pte_to_pgoff(_pte) ((((_pte).pte >> 1) & 0x7) | \
216 (((_pte).pte >> 2) & 0x8) | \
217 (((_pte).pte >> 8) << 4))
218
219#define pgoff_to_pte(off) ((pte_t) { (((off) & 0x7) << 1) | \
220 (((off) & 0x8) << 2) | \
221 (((off) >> 4) << 8) | \
222 _PAGE_FILE })
223
224#define PTE_FILE_MAX_BITS 28
225#endif /* defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) */ 189#endif /* defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) */
226 190
227#endif /* defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) */ 191#endif /* defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) */
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index e1c49a96807d..1659bb91ae21 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -291,13 +291,4 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
291#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 291#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
292#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 292#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
293 293
294/*
295 * Bits 0, 4, 6, and 7 are taken. Let's leave bits 1, 2, 3, and 5 alone to
296 * make things easier, and only use the upper 56 bits for the page offset...
297 */
298#define PTE_FILE_MAX_BITS 56
299
300#define pte_to_pgoff(_pte) ((_pte).pte >> 8)
301#define pgoff_to_pte(off) ((pte_t) { ((off) << 8) | _PAGE_FILE })
302
303#endif /* _ASM_PGTABLE_64_H */ 294#endif /* _ASM_PGTABLE_64_H */
diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h
index ca11f14f40a3..fc807aa5ec8d 100644
--- a/arch/mips/include/asm/pgtable-bits.h
+++ b/arch/mips/include/asm/pgtable-bits.h
@@ -48,8 +48,6 @@
48 48
49/* 49/*
50 * The following bits are implemented in software 50 * The following bits are implemented in software
51 *
52 * _PAGE_FILE semantics: set:pagecache unset:swap
53 */ 51 */
54#define _PAGE_PRESENT_SHIFT (_CACHE_SHIFT + 3) 52#define _PAGE_PRESENT_SHIFT (_CACHE_SHIFT + 3)
55#define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT) 53#define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT)
@@ -64,7 +62,6 @@
64 62
65#define _PAGE_SILENT_READ _PAGE_VALID 63#define _PAGE_SILENT_READ _PAGE_VALID
66#define _PAGE_SILENT_WRITE _PAGE_DIRTY 64#define _PAGE_SILENT_WRITE _PAGE_DIRTY
67#define _PAGE_FILE _PAGE_MODIFIED
68 65
69#define _PFN_SHIFT (PAGE_SHIFT - 12 + _CACHE_SHIFT + 3) 66#define _PFN_SHIFT (PAGE_SHIFT - 12 + _CACHE_SHIFT + 3)
70 67
@@ -72,8 +69,6 @@
72 69
73/* 70/*
74 * The following are implemented by software 71 * The following are implemented by software
75 *
76 * _PAGE_FILE semantics: set:pagecache unset:swap
77 */ 72 */
78#define _PAGE_PRESENT_SHIFT 0 73#define _PAGE_PRESENT_SHIFT 0
79#define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT) 74#define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT)
@@ -85,8 +80,6 @@
85#define _PAGE_ACCESSED (1 << _PAGE_ACCESSED_SHIFT) 80#define _PAGE_ACCESSED (1 << _PAGE_ACCESSED_SHIFT)
86#define _PAGE_MODIFIED_SHIFT 4 81#define _PAGE_MODIFIED_SHIFT 4
87#define _PAGE_MODIFIED (1 << _PAGE_MODIFIED_SHIFT) 82#define _PAGE_MODIFIED (1 << _PAGE_MODIFIED_SHIFT)
88#define _PAGE_FILE_SHIFT 4
89#define _PAGE_FILE (1 << _PAGE_FILE_SHIFT)
90 83
91/* 84/*
92 * And these are the hardware TLB bits 85 * And these are the hardware TLB bits
@@ -116,7 +109,6 @@
116 * The following bits are implemented in software 109 * The following bits are implemented in software
117 * 110 *
118 * _PAGE_READ / _PAGE_READ_SHIFT should be unused if cpu_has_rixi. 111 * _PAGE_READ / _PAGE_READ_SHIFT should be unused if cpu_has_rixi.
119 * _PAGE_FILE semantics: set:pagecache unset:swap
120 */ 112 */
121#define _PAGE_PRESENT_SHIFT (0) 113#define _PAGE_PRESENT_SHIFT (0)
122#define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT) 114#define _PAGE_PRESENT (1 << _PAGE_PRESENT_SHIFT)
@@ -128,7 +120,6 @@
128#define _PAGE_ACCESSED (1 << _PAGE_ACCESSED_SHIFT) 120#define _PAGE_ACCESSED (1 << _PAGE_ACCESSED_SHIFT)
129#define _PAGE_MODIFIED_SHIFT (_PAGE_ACCESSED_SHIFT + 1) 121#define _PAGE_MODIFIED_SHIFT (_PAGE_ACCESSED_SHIFT + 1)
130#define _PAGE_MODIFIED (1 << _PAGE_MODIFIED_SHIFT) 122#define _PAGE_MODIFIED (1 << _PAGE_MODIFIED_SHIFT)
131#define _PAGE_FILE (_PAGE_MODIFIED)
132 123
133#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT 124#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
134/* huge tlb page */ 125/* huge tlb page */
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 62a6ba383d4f..583ff4215479 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -231,7 +231,6 @@ extern pgd_t swapper_pg_dir[];
231static inline int pte_write(pte_t pte) { return pte.pte_low & _PAGE_WRITE; } 231static inline int pte_write(pte_t pte) { return pte.pte_low & _PAGE_WRITE; }
232static inline int pte_dirty(pte_t pte) { return pte.pte_low & _PAGE_MODIFIED; } 232static inline int pte_dirty(pte_t pte) { return pte.pte_low & _PAGE_MODIFIED; }
233static inline int pte_young(pte_t pte) { return pte.pte_low & _PAGE_ACCESSED; } 233static inline int pte_young(pte_t pte) { return pte.pte_low & _PAGE_ACCESSED; }
234static inline int pte_file(pte_t pte) { return pte.pte_low & _PAGE_FILE; }
235 234
236static inline pte_t pte_wrprotect(pte_t pte) 235static inline pte_t pte_wrprotect(pte_t pte)
237{ 236{
@@ -287,7 +286,6 @@ static inline pte_t pte_mkyoung(pte_t pte)
287static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } 286static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
288static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; } 287static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_MODIFIED; }
289static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } 288static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
290static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
291 289
292static inline pte_t pte_wrprotect(pte_t pte) 290static inline pte_t pte_wrprotect(pte_t pte)
293{ 291{
diff --git a/arch/mn10300/include/asm/pgtable.h b/arch/mn10300/include/asm/pgtable.h
index 2ddaa67e7983..629181ae111e 100644
--- a/arch/mn10300/include/asm/pgtable.h
+++ b/arch/mn10300/include/asm/pgtable.h
@@ -134,7 +134,6 @@ extern pte_t kernel_vmalloc_ptes[(VMALLOC_END - VMALLOC_START) / PAGE_SIZE];
134#define _PAGE_NX 0 /* no-execute bit */ 134#define _PAGE_NX 0 /* no-execute bit */
135 135
136/* If _PAGE_VALID is clear, we use these: */ 136/* If _PAGE_VALID is clear, we use these: */
137#define _PAGE_FILE xPTEL2_C /* set:pagecache unset:swap */
138#define _PAGE_PROTNONE 0x000 /* If not present */ 137#define _PAGE_PROTNONE 0x000 /* If not present */
139 138
140#define __PAGE_PROT_UWAUX 0x010 139#define __PAGE_PROT_UWAUX 0x010
@@ -241,11 +240,6 @@ static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
241static inline int pte_write(pte_t pte) { return pte_val(pte) & __PAGE_PROT_WRITE; } 240static inline int pte_write(pte_t pte) { return pte_val(pte) & __PAGE_PROT_WRITE; }
242static inline int pte_special(pte_t pte){ return 0; } 241static inline int pte_special(pte_t pte){ return 0; }
243 242
244/*
245 * The following only works if pte_present() is not true.
246 */
247static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
248
249static inline pte_t pte_rdprotect(pte_t pte) 243static inline pte_t pte_rdprotect(pte_t pte)
250{ 244{
251 pte_val(pte) &= ~(__PAGE_PROT_USER|__PAGE_PROT_UWAUX); return pte; 245 pte_val(pte) &= ~(__PAGE_PROT_USER|__PAGE_PROT_UWAUX); return pte;
@@ -338,16 +332,11 @@ static inline int pte_exec_kernel(pte_t pte)
338 return 1; 332 return 1;
339} 333}
340 334
341#define PTE_FILE_MAX_BITS 30
342
343#define pte_to_pgoff(pte) (pte_val(pte) >> 2)
344#define pgoff_to_pte(off) __pte((off) << 2 | _PAGE_FILE)
345
346/* Encode and de-code a swap entry */ 335/* Encode and de-code a swap entry */
347#define __swp_type(x) (((x).val >> 2) & 0x3f) 336#define __swp_type(x) (((x).val >> 1) & 0x3f)
348#define __swp_offset(x) ((x).val >> 8) 337#define __swp_offset(x) ((x).val >> 7)
349#define __swp_entry(type, offset) \ 338#define __swp_entry(type, offset) \
350 ((swp_entry_t) { ((type) << 2) | ((offset) << 8) }) 339 ((swp_entry_t) { ((type) << 1) | ((offset) << 7) })
351#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 340#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
352#define __swp_entry_to_pte(x) __pte((x).val) 341#define __swp_entry_to_pte(x) __pte((x).val)
353 342
diff --git a/arch/nios2/include/asm/pgtable-bits.h b/arch/nios2/include/asm/pgtable-bits.h
index ce9e7069aa96..bfddff383e89 100644
--- a/arch/nios2/include/asm/pgtable-bits.h
+++ b/arch/nios2/include/asm/pgtable-bits.h
@@ -30,6 +30,5 @@
30#define _PAGE_PRESENT (1<<25) /* PTE contains a translation */ 30#define _PAGE_PRESENT (1<<25) /* PTE contains a translation */
31#define _PAGE_ACCESSED (1<<26) /* page referenced */ 31#define _PAGE_ACCESSED (1<<26) /* page referenced */
32#define _PAGE_DIRTY (1<<27) /* dirty page */ 32#define _PAGE_DIRTY (1<<27) /* dirty page */
33#define _PAGE_FILE (1<<28) /* PTE used for file mapping or swap */
34 33
35#endif /* _ASM_NIOS2_PGTABLE_BITS_H */ 34#endif /* _ASM_NIOS2_PGTABLE_BITS_H */
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index ccbaffd47671..7b292e3a3138 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -112,8 +112,6 @@ static inline int pte_dirty(pte_t pte) \
112 { return pte_val(pte) & _PAGE_DIRTY; } 112 { return pte_val(pte) & _PAGE_DIRTY; }
113static inline int pte_young(pte_t pte) \ 113static inline int pte_young(pte_t pte) \
114 { return pte_val(pte) & _PAGE_ACCESSED; } 114 { return pte_val(pte) & _PAGE_ACCESSED; }
115static inline int pte_file(pte_t pte) \
116 { return pte_val(pte) & _PAGE_FILE; }
117static inline int pte_special(pte_t pte) { return 0; } 115static inline int pte_special(pte_t pte) { return 0; }
118 116
119#define pgprot_noncached pgprot_noncached 117#define pgprot_noncached pgprot_noncached
@@ -272,8 +270,7 @@ static inline void pte_clear(struct mm_struct *mm,
272 __FILE__, __LINE__, pgd_val(e)) 270 __FILE__, __LINE__, pgd_val(e))
273 271
274/* 272/*
275 * Encode and decode a swap entry (must be !pte_none(pte) && !pte_present(pte) 273 * Encode and decode a swap entry (must be !pte_none(pte) && !pte_present(pte):
276 * && !pte_file(pte)):
277 * 274 *
278 * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 ... 1 0 275 * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 ... 1 0
279 * 0 0 0 0 type. 0 0 0 0 0 0 offset......... 276 * 0 0 0 0 type. 0 0 0 0 0 0 offset.........
@@ -290,11 +287,6 @@ static inline void pte_clear(struct mm_struct *mm,
290#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) 287#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val })
291#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 288#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
292 289
293/* Encode and decode a nonlinear file mapping entry */
294#define PTE_FILE_MAX_BITS 25
295#define pte_to_pgoff(pte) (pte_val(pte) & 0x1ffffff)
296#define pgoff_to_pte(off) __pte(((off) & 0x1ffffff) | _PAGE_FILE)
297
298#define kern_addr_valid(addr) (1) 290#define kern_addr_valid(addr) (1)
299 291
300#include <asm-generic/pgtable.h> 292#include <asm-generic/pgtable.h>
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 37bf6a3ef8f4..18994ccb1185 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -125,7 +125,6 @@ extern void paging_init(void);
125#define _PAGE_CC 0x001 /* software: pte contains a translation */ 125#define _PAGE_CC 0x001 /* software: pte contains a translation */
126#define _PAGE_CI 0x002 /* cache inhibit */ 126#define _PAGE_CI 0x002 /* cache inhibit */
127#define _PAGE_WBC 0x004 /* write back cache */ 127#define _PAGE_WBC 0x004 /* write back cache */
128#define _PAGE_FILE 0x004 /* set: pagecache, unset: swap (when !PRESENT) */
129#define _PAGE_WOM 0x008 /* weakly ordered memory */ 128#define _PAGE_WOM 0x008 /* weakly ordered memory */
130 129
131#define _PAGE_A 0x010 /* accessed */ 130#define _PAGE_A 0x010 /* accessed */
@@ -240,7 +239,6 @@ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
240static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; } 239static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; }
241static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } 240static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
242static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } 241static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
243static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
244static inline int pte_special(pte_t pte) { return 0; } 242static inline int pte_special(pte_t pte) { return 0; }
245static inline pte_t pte_mkspecial(pte_t pte) { return pte; } 243static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
246 244
@@ -438,12 +436,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
438#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 436#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
439#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 437#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
440 438
441/* Encode and decode a nonlinear file mapping entry */
442
443#define PTE_FILE_MAX_BITS 26
444#define pte_to_pgoff(x) (pte_val(x) >> 6)
445#define pgoff_to_pte(x) __pte(((x) << 6) | _PAGE_FILE)
446
447#define kern_addr_valid(addr) (1) 439#define kern_addr_valid(addr) (1)
448 440
449#include <asm-generic/pgtable.h> 441#include <asm-generic/pgtable.h>
diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S
index 1d3c9c28ac25..f14793306b03 100644
--- a/arch/openrisc/kernel/head.S
+++ b/arch/openrisc/kernel/head.S
@@ -754,11 +754,6 @@ _dc_enable:
754 754
755/* ===============================================[ page table masks ]=== */ 755/* ===============================================[ page table masks ]=== */
756 756
757/* bit 4 is used in hardware as write back cache bit. we never use this bit
758 * explicitly, so we can reuse it as _PAGE_FILE bit and mask it out when
759 * writing into hardware pte's
760 */
761
762#define DTLB_UP_CONVERT_MASK 0x3fa 757#define DTLB_UP_CONVERT_MASK 0x3fa
763#define ITLB_UP_CONVERT_MASK 0x3a 758#define ITLB_UP_CONVERT_MASK 0x3a
764 759
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 22b89d1edba7..1d49a4a7749b 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -146,7 +146,6 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long);
146#define _PAGE_GATEWAY_BIT 28 /* (0x008) privilege promotion allowed */ 146#define _PAGE_GATEWAY_BIT 28 /* (0x008) privilege promotion allowed */
147#define _PAGE_DMB_BIT 27 /* (0x010) Data Memory Break enable (B bit) */ 147#define _PAGE_DMB_BIT 27 /* (0x010) Data Memory Break enable (B bit) */
148#define _PAGE_DIRTY_BIT 26 /* (0x020) Page Dirty (D bit) */ 148#define _PAGE_DIRTY_BIT 26 /* (0x020) Page Dirty (D bit) */
149#define _PAGE_FILE_BIT _PAGE_DIRTY_BIT /* overload this bit */
150#define _PAGE_REFTRAP_BIT 25 /* (0x040) Page Ref. Trap enable (T bit) */ 149#define _PAGE_REFTRAP_BIT 25 /* (0x040) Page Ref. Trap enable (T bit) */
151#define _PAGE_NO_CACHE_BIT 24 /* (0x080) Uncached Page (U bit) */ 150#define _PAGE_NO_CACHE_BIT 24 /* (0x080) Uncached Page (U bit) */
152#define _PAGE_ACCESSED_BIT 23 /* (0x100) Software: Page Accessed */ 151#define _PAGE_ACCESSED_BIT 23 /* (0x100) Software: Page Accessed */
@@ -167,13 +166,6 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long);
167/* PFN_PTE_SHIFT defines the shift of a PTE value to access the PFN field */ 166/* PFN_PTE_SHIFT defines the shift of a PTE value to access the PFN field */
168#define PFN_PTE_SHIFT 12 167#define PFN_PTE_SHIFT 12
169 168
170
171/* this is how many bits may be used by the file functions */
172#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_SHIFT)
173
174#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_SHIFT)
175#define pgoff_to_pte(off) ((pte_t) { ((off) << PTE_SHIFT) | _PAGE_FILE })
176
177#define _PAGE_READ (1 << xlate_pabit(_PAGE_READ_BIT)) 169#define _PAGE_READ (1 << xlate_pabit(_PAGE_READ_BIT))
178#define _PAGE_WRITE (1 << xlate_pabit(_PAGE_WRITE_BIT)) 170#define _PAGE_WRITE (1 << xlate_pabit(_PAGE_WRITE_BIT))
179#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) 171#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE)
@@ -186,7 +178,6 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long);
186#define _PAGE_ACCESSED (1 << xlate_pabit(_PAGE_ACCESSED_BIT)) 178#define _PAGE_ACCESSED (1 << xlate_pabit(_PAGE_ACCESSED_BIT))
187#define _PAGE_PRESENT (1 << xlate_pabit(_PAGE_PRESENT_BIT)) 179#define _PAGE_PRESENT (1 << xlate_pabit(_PAGE_PRESENT_BIT))
188#define _PAGE_USER (1 << xlate_pabit(_PAGE_USER_BIT)) 180#define _PAGE_USER (1 << xlate_pabit(_PAGE_USER_BIT))
189#define _PAGE_FILE (1 << xlate_pabit(_PAGE_FILE_BIT))
190 181
191#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) 182#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED)
192#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) 183#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -344,7 +335,6 @@ static inline void pgd_clear(pgd_t * pgdp) { }
344static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } 335static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
345static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } 336static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
346static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } 337static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
347static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
348static inline int pte_special(pte_t pte) { return 0; } 338static inline int pte_special(pte_t pte) { return 0; }
349 339
350static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } 340static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5e102422c9ab..ffb1d8ce97ae 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -249,10 +249,10 @@ static inline int is_module_addr(void *addr)
249 _PAGE_YOUNG) 249 _PAGE_YOUNG)
250 250
251/* 251/*
252 * handle_pte_fault uses pte_present, pte_none and pte_file to find out the 252 * handle_pte_fault uses pte_present and pte_none to find out the pte type
253 * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit 253 * WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to
254 * is used to distinguish present from not-present ptes. It is changed only 254 * distinguish present from not-present ptes. It is changed only with the page
255 * with the page table lock held. 255 * table lock held.
256 * 256 *
257 * The following table gives the different possible bit combinations for 257 * The following table gives the different possible bit combinations for
258 * the pte hardware and software bits in the last 12 bits of a pte: 258 * the pte hardware and software bits in the last 12 bits of a pte:
@@ -279,7 +279,6 @@ static inline int is_module_addr(void *addr)
279 * 279 *
280 * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 280 * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001
281 * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 281 * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400
282 * pte_file is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600
283 * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 282 * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402
284 */ 283 */
285 284
@@ -671,13 +670,6 @@ static inline int pte_swap(pte_t pte)
671 == (_PAGE_INVALID | _PAGE_TYPE); 670 == (_PAGE_INVALID | _PAGE_TYPE);
672} 671}
673 672
674static inline int pte_file(pte_t pte)
675{
676 /* Bit pattern: (pte & 0x601) == 0x600 */
677 return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT))
678 == (_PAGE_INVALID | _PAGE_PROTECT);
679}
680
681static inline int pte_special(pte_t pte) 673static inline int pte_special(pte_t pte)
682{ 674{
683 return (pte_val(pte) & _PAGE_SPECIAL); 675 return (pte_val(pte) & _PAGE_SPECIAL);
@@ -1756,19 +1748,6 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
1756#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 1748#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
1757#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 1749#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
1758 1750
1759#ifndef CONFIG_64BIT
1760# define PTE_FILE_MAX_BITS 26
1761#else /* CONFIG_64BIT */
1762# define PTE_FILE_MAX_BITS 59
1763#endif /* CONFIG_64BIT */
1764
1765#define pte_to_pgoff(__pte) \
1766 ((((__pte).pte >> 12) << 7) + (((__pte).pte >> 1) & 0x7f))
1767
1768#define pgoff_to_pte(__off) \
1769 ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \
1770 | _PAGE_INVALID | _PAGE_PROTECT })
1771
1772#endif /* !__ASSEMBLY__ */ 1751#endif /* !__ASSEMBLY__ */
1773 1752
1774#define kern_addr_valid(addr) (1) 1753#define kern_addr_valid(addr) (1)
diff --git a/arch/score/include/asm/pgtable-bits.h b/arch/score/include/asm/pgtable-bits.h
index 7d65a96a82e5..0e5c6f466520 100644
--- a/arch/score/include/asm/pgtable-bits.h
+++ b/arch/score/include/asm/pgtable-bits.h
@@ -6,7 +6,6 @@
6#define _PAGE_WRITE (1<<7) /* implemented in software */ 6#define _PAGE_WRITE (1<<7) /* implemented in software */
7#define _PAGE_PRESENT (1<<9) /* implemented in software */ 7#define _PAGE_PRESENT (1<<9) /* implemented in software */
8#define _PAGE_MODIFIED (1<<10) /* implemented in software */ 8#define _PAGE_MODIFIED (1<<10) /* implemented in software */
9#define _PAGE_FILE (1<<10)
10 9
11#define _PAGE_GLOBAL (1<<0) 10#define _PAGE_GLOBAL (1<<0)
12#define _PAGE_VALID (1<<1) 11#define _PAGE_VALID (1<<1)
diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h
index db96ad9afc03..5170ffdea643 100644
--- a/arch/score/include/asm/pgtable.h
+++ b/arch/score/include/asm/pgtable.h
@@ -90,15 +90,6 @@ static inline void pmd_clear(pmd_t *pmdp)
90 ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address)) 90 ((pte_t *)page_address(pmd_page(*(dir))) + __pte_offset(address))
91#define pte_unmap(pte) ((void)(pte)) 91#define pte_unmap(pte) ((void)(pte))
92 92
93/*
94 * Bits 9(_PAGE_PRESENT) and 10(_PAGE_FILE)are taken,
95 * split up 30 bits of offset into this range:
96 */
97#define PTE_FILE_MAX_BITS 30
98#define pte_to_pgoff(_pte) \
99 (((_pte).pte & 0x1ff) | (((_pte).pte >> 11) << 9))
100#define pgoff_to_pte(off) \
101 ((pte_t) {((off) & 0x1ff) | (((off) >> 9) << 11) | _PAGE_FILE})
102#define __pte_to_swp_entry(pte) \ 93#define __pte_to_swp_entry(pte) \
103 ((swp_entry_t) { pte_val(pte)}) 94 ((swp_entry_t) { pte_val(pte)})
104#define __swp_entry_to_pte(x) ((pte_t) {(x).val}) 95#define __swp_entry_to_pte(x) ((pte_t) {(x).val})
@@ -169,8 +160,8 @@ static inline pgprot_t pgprot_noncached(pgprot_t _prot)
169} 160}
170 161
171#define __swp_type(x) ((x).val & 0x1f) 162#define __swp_type(x) ((x).val & 0x1f)
172#define __swp_offset(x) ((x).val >> 11) 163#define __swp_offset(x) ((x).val >> 10)
173#define __swp_entry(type, offset) ((swp_entry_t){(type) | ((offset) << 11)}) 164#define __swp_entry(type, offset) ((swp_entry_t){(type) | ((offset) << 10)})
174 165
175extern unsigned long empty_zero_page; 166extern unsigned long empty_zero_page;
176extern unsigned long zero_page_mask; 167extern unsigned long zero_page_mask;
@@ -198,11 +189,6 @@ static inline int pte_young(pte_t pte)
198 return pte_val(pte) & _PAGE_ACCESSED; 189 return pte_val(pte) & _PAGE_ACCESSED;
199} 190}
200 191
201static inline int pte_file(pte_t pte)
202{
203 return pte_val(pte) & _PAGE_FILE;
204}
205
206#define pte_special(pte) (0) 192#define pte_special(pte) (0)
207 193
208static inline pte_t pte_wrprotect(pte_t pte) 194static inline pte_t pte_wrprotect(pte_t pte)
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 0f09f5285d5e..eb4ef274ae9b 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -1,7 +1,7 @@
1config SUPERH 1config SUPERH
2 def_bool y 2 def_bool y
3 select ARCH_MIGHT_HAVE_PC_PARPORT 3 select ARCH_MIGHT_HAVE_PC_PARPORT
4 select EXPERT 4 select HAVE_PATA_PLATFORM
5 select CLKDEV_LOOKUP 5 select CLKDEV_LOOKUP
6 select HAVE_IDE if HAS_IOPORT_MAP 6 select HAVE_IDE if HAS_IOPORT_MAP
7 select HAVE_MEMBLOCK 7 select HAVE_MEMBLOCK
diff --git a/arch/sh/boards/mach-se/7343/irq.c b/arch/sh/boards/mach-se/7343/irq.c
index 7646bf0486c2..1087dba9b015 100644
--- a/arch/sh/boards/mach-se/7343/irq.c
+++ b/arch/sh/boards/mach-se/7343/irq.c
@@ -14,9 +14,6 @@
14#define DRV_NAME "SE7343-FPGA" 14#define DRV_NAME "SE7343-FPGA"
15#define pr_fmt(fmt) DRV_NAME ": " fmt 15#define pr_fmt(fmt) DRV_NAME ": " fmt
16 16
17#define irq_reg_readl ioread16
18#define irq_reg_writel iowrite16
19
20#include <linux/init.h> 17#include <linux/init.h>
21#include <linux/irq.h> 18#include <linux/irq.h>
22#include <linux/interrupt.h> 19#include <linux/interrupt.h>
diff --git a/arch/sh/boards/mach-se/7722/irq.c b/arch/sh/boards/mach-se/7722/irq.c
index f5e2af1bf040..00e699232621 100644
--- a/arch/sh/boards/mach-se/7722/irq.c
+++ b/arch/sh/boards/mach-se/7722/irq.c
@@ -11,9 +11,6 @@
11#define DRV_NAME "SE7722-FPGA" 11#define DRV_NAME "SE7722-FPGA"
12#define pr_fmt(fmt) DRV_NAME ": " fmt 12#define pr_fmt(fmt) DRV_NAME ": " fmt
13 13
14#define irq_reg_readl ioread16
15#define irq_reg_writel iowrite16
16
17#include <linux/init.h> 14#include <linux/init.h>
18#include <linux/irq.h> 15#include <linux/irq.h>
19#include <linux/interrupt.h> 16#include <linux/interrupt.h>
diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h
index 0bce3d81569e..c646e563abce 100644
--- a/arch/sh/include/asm/pgtable_32.h
+++ b/arch/sh/include/asm/pgtable_32.h
@@ -26,8 +26,6 @@
26 * and timing control which (together with bit 0) are moved into the 26 * and timing control which (together with bit 0) are moved into the
27 * old-style PTEA on the parts that support it. 27 * old-style PTEA on the parts that support it.
28 * 28 *
29 * XXX: Leave the _PAGE_FILE and _PAGE_WT overhaul for a rainy day.
30 *
31 * SH-X2 MMUs and extended PTEs 29 * SH-X2 MMUs and extended PTEs
32 * 30 *
33 * SH-X2 supports an extended mode TLB with split data arrays due to the 31 * SH-X2 supports an extended mode TLB with split data arrays due to the
@@ -51,7 +49,6 @@
51#define _PAGE_PRESENT 0x100 /* V-bit : page is valid */ 49#define _PAGE_PRESENT 0x100 /* V-bit : page is valid */
52#define _PAGE_PROTNONE 0x200 /* software: if not present */ 50#define _PAGE_PROTNONE 0x200 /* software: if not present */
53#define _PAGE_ACCESSED 0x400 /* software: page referenced */ 51#define _PAGE_ACCESSED 0x400 /* software: page referenced */
54#define _PAGE_FILE _PAGE_WT /* software: pagecache or swap? */
55#define _PAGE_SPECIAL 0x800 /* software: special page */ 52#define _PAGE_SPECIAL 0x800 /* software: special page */
56 53
57#define _PAGE_SZ_MASK (_PAGE_SZ0 | _PAGE_SZ1) 54#define _PAGE_SZ_MASK (_PAGE_SZ0 | _PAGE_SZ1)
@@ -105,14 +102,13 @@ static inline unsigned long copy_ptea_attributes(unsigned long x)
105/* Mask which drops unused bits from the PTEL value */ 102/* Mask which drops unused bits from the PTEL value */
106#if defined(CONFIG_CPU_SH3) 103#if defined(CONFIG_CPU_SH3)
107#define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED| \ 104#define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED| \
108 _PAGE_FILE | _PAGE_SZ1 | \ 105 _PAGE_SZ1 | _PAGE_HW_SHARED)
109 _PAGE_HW_SHARED)
110#elif defined(CONFIG_X2TLB) 106#elif defined(CONFIG_X2TLB)
111/* Get rid of the legacy PR/SZ bits when using extended mode */ 107/* Get rid of the legacy PR/SZ bits when using extended mode */
112#define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED | \ 108#define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED | \
113 _PAGE_FILE | _PAGE_PR_MASK | _PAGE_SZ_MASK) 109 _PAGE_PR_MASK | _PAGE_SZ_MASK)
114#else 110#else
115#define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED | _PAGE_FILE) 111#define _PAGE_CLEAR_FLAGS (_PAGE_PROTNONE | _PAGE_ACCESSED)
116#endif 112#endif
117 113
118#define _PAGE_FLAGS_HARDWARE_MASK (phys_addr_mask() & ~(_PAGE_CLEAR_FLAGS)) 114#define _PAGE_FLAGS_HARDWARE_MASK (phys_addr_mask() & ~(_PAGE_CLEAR_FLAGS))
@@ -343,7 +339,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
343#define pte_not_present(pte) (!((pte).pte_low & _PAGE_PRESENT)) 339#define pte_not_present(pte) (!((pte).pte_low & _PAGE_PRESENT))
344#define pte_dirty(pte) ((pte).pte_low & _PAGE_DIRTY) 340#define pte_dirty(pte) ((pte).pte_low & _PAGE_DIRTY)
345#define pte_young(pte) ((pte).pte_low & _PAGE_ACCESSED) 341#define pte_young(pte) ((pte).pte_low & _PAGE_ACCESSED)
346#define pte_file(pte) ((pte).pte_low & _PAGE_FILE)
347#define pte_special(pte) ((pte).pte_low & _PAGE_SPECIAL) 342#define pte_special(pte) ((pte).pte_low & _PAGE_SPECIAL)
348 343
349#ifdef CONFIG_X2TLB 344#ifdef CONFIG_X2TLB
@@ -445,7 +440,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
445 * Encode and de-code a swap entry 440 * Encode and de-code a swap entry
446 * 441 *
447 * Constraints: 442 * Constraints:
448 * _PAGE_FILE at bit 0
449 * _PAGE_PRESENT at bit 8 443 * _PAGE_PRESENT at bit 8
450 * _PAGE_PROTNONE at bit 9 444 * _PAGE_PROTNONE at bit 9
451 * 445 *
@@ -453,9 +447,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
453 * swap offset into bits 10:30. For the 64-bit PTE case, we keep the 447 * swap offset into bits 10:30. For the 64-bit PTE case, we keep the
454 * preserved bits in the low 32-bits and use the upper 32 as the swap 448 * preserved bits in the low 32-bits and use the upper 32 as the swap
455 * offset (along with a 5-bit type), following the same approach as x86 449 * offset (along with a 5-bit type), following the same approach as x86
456 * PAE. This keeps the logic quite simple, and allows for a full 32 450 * PAE. This keeps the logic quite simple.
457 * PTE_FILE_MAX_BITS, as opposed to the 29-bits we're constrained with
458 * in the pte_low case.
459 * 451 *
460 * As is evident by the Alpha code, if we ever get a 64-bit unsigned 452 * As is evident by the Alpha code, if we ever get a 64-bit unsigned
461 * long (swp_entry_t) to match up with the 64-bit PTEs, this all becomes 453 * long (swp_entry_t) to match up with the 64-bit PTEs, this all becomes
@@ -471,13 +463,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
471#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) 463#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high })
472#define __swp_entry_to_pte(x) ((pte_t){ 0, (x).val }) 464#define __swp_entry_to_pte(x) ((pte_t){ 0, (x).val })
473 465
474/*
475 * Encode and decode a nonlinear file mapping entry
476 */
477#define pte_to_pgoff(pte) ((pte).pte_high)
478#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) })
479
480#define PTE_FILE_MAX_BITS 32
481#else 466#else
482#define __swp_type(x) ((x).val & 0xff) 467#define __swp_type(x) ((x).val & 0xff)
483#define __swp_offset(x) ((x).val >> 10) 468#define __swp_offset(x) ((x).val >> 10)
@@ -485,13 +470,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
485 470
486#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 1 }) 471#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 1 })
487#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 1 }) 472#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 1 })
488
489/*
490 * Encode and decode a nonlinear file mapping entry
491 */
492#define PTE_FILE_MAX_BITS 29
493#define pte_to_pgoff(pte) (pte_val(pte) >> 1)
494#define pgoff_to_pte(off) ((pte_t) { ((off) << 1) | _PAGE_FILE })
495#endif 473#endif
496 474
497#endif /* __ASSEMBLY__ */ 475#endif /* __ASSEMBLY__ */
diff --git a/arch/sh/include/asm/pgtable_64.h b/arch/sh/include/asm/pgtable_64.h
index dda8c82601b9..07424968df62 100644
--- a/arch/sh/include/asm/pgtable_64.h
+++ b/arch/sh/include/asm/pgtable_64.h
@@ -107,7 +107,6 @@ static __inline__ void set_pte(pte_t *pteptr, pte_t pteval)
107#define _PAGE_DEVICE 0x001 /* CB0: if uncacheable, 1->device (i.e. no write-combining or reordering at bus level) */ 107#define _PAGE_DEVICE 0x001 /* CB0: if uncacheable, 1->device (i.e. no write-combining or reordering at bus level) */
108#define _PAGE_CACHABLE 0x002 /* CB1: uncachable/cachable */ 108#define _PAGE_CACHABLE 0x002 /* CB1: uncachable/cachable */
109#define _PAGE_PRESENT 0x004 /* software: page referenced */ 109#define _PAGE_PRESENT 0x004 /* software: page referenced */
110#define _PAGE_FILE 0x004 /* software: only when !present */
111#define _PAGE_SIZE0 0x008 /* SZ0-bit : size of page */ 110#define _PAGE_SIZE0 0x008 /* SZ0-bit : size of page */
112#define _PAGE_SIZE1 0x010 /* SZ1-bit : size of page */ 111#define _PAGE_SIZE1 0x010 /* SZ1-bit : size of page */
113#define _PAGE_SHARED 0x020 /* software: reflects PTEH's SH */ 112#define _PAGE_SHARED 0x020 /* software: reflects PTEH's SH */
@@ -129,7 +128,7 @@ static __inline__ void set_pte(pte_t *pteptr, pte_t pteval)
129#define _PAGE_WIRED _PAGE_EXT(0x001) /* software: wire the tlb entry */ 128#define _PAGE_WIRED _PAGE_EXT(0x001) /* software: wire the tlb entry */
130#define _PAGE_SPECIAL _PAGE_EXT(0x002) 129#define _PAGE_SPECIAL _PAGE_EXT(0x002)
131 130
132#define _PAGE_CLEAR_FLAGS (_PAGE_PRESENT | _PAGE_FILE | _PAGE_SHARED | \ 131#define _PAGE_CLEAR_FLAGS (_PAGE_PRESENT | _PAGE_SHARED | \
133 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_WIRED) 132 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_WIRED)
134 133
135/* Mask which drops software flags */ 134/* Mask which drops software flags */
@@ -260,7 +259,6 @@ static __inline__ void set_pte(pte_t *pteptr, pte_t pteval)
260 */ 259 */
261static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } 260static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
262static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } 261static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
263static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
264static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } 262static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; }
265static inline int pte_special(pte_t pte){ return pte_val(pte) & _PAGE_SPECIAL; } 263static inline int pte_special(pte_t pte){ return pte_val(pte) & _PAGE_SPECIAL; }
266 264
@@ -304,11 +302,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
304#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 302#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
305#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 303#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
306 304
307/* Encode and decode a nonlinear file mapping entry */
308#define PTE_FILE_MAX_BITS 29
309#define pte_to_pgoff(pte) (pte_val(pte))
310#define pgoff_to_pte(off) ((pte_t) { (off) | _PAGE_FILE })
311
312#endif /* !__ASSEMBLY__ */ 305#endif /* !__ASSEMBLY__ */
313 306
314#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) 307#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index b9b91ae19fe1..b2f7dc46a7d1 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -221,14 +221,6 @@ static inline int pte_young(pte_t pte)
221 return pte_val(pte) & SRMMU_REF; 221 return pte_val(pte) & SRMMU_REF;
222} 222}
223 223
224/*
225 * The following only work if pte_present() is not true.
226 */
227static inline int pte_file(pte_t pte)
228{
229 return pte_val(pte) & SRMMU_FILE;
230}
231
232static inline int pte_special(pte_t pte) 224static inline int pte_special(pte_t pte)
233{ 225{
234 return 0; 226 return 0;
@@ -375,22 +367,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
375#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 367#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
376#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 368#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
377 369
378/* file-offset-in-pte helpers */
379static inline unsigned long pte_to_pgoff(pte_t pte)
380{
381 return pte_val(pte) >> SRMMU_PTE_FILE_SHIFT;
382}
383
384static inline pte_t pgoff_to_pte(unsigned long pgoff)
385{
386 return __pte((pgoff << SRMMU_PTE_FILE_SHIFT) | SRMMU_FILE);
387}
388
389/*
390 * This is made a constant because mm/fremap.c required a constant.
391 */
392#define PTE_FILE_MAX_BITS 24
393
394static inline unsigned long 370static inline unsigned long
395__get_phys (unsigned long addr) 371__get_phys (unsigned long addr)
396{ 372{
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 1ff9e7864168..2ac7873ad6fd 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -137,7 +137,6 @@ bool kern_addr_valid(unsigned long addr);
137#define _PAGE_SOFT_4U _AC(0x0000000000001F80,UL) /* Software bits: */ 137#define _PAGE_SOFT_4U _AC(0x0000000000001F80,UL) /* Software bits: */
138#define _PAGE_EXEC_4U _AC(0x0000000000001000,UL) /* Executable SW bit */ 138#define _PAGE_EXEC_4U _AC(0x0000000000001000,UL) /* Executable SW bit */
139#define _PAGE_MODIFIED_4U _AC(0x0000000000000800,UL) /* Modified (dirty) */ 139#define _PAGE_MODIFIED_4U _AC(0x0000000000000800,UL) /* Modified (dirty) */
140#define _PAGE_FILE_4U _AC(0x0000000000000800,UL) /* Pagecache page */
141#define _PAGE_ACCESSED_4U _AC(0x0000000000000400,UL) /* Accessed (ref'd) */ 140#define _PAGE_ACCESSED_4U _AC(0x0000000000000400,UL) /* Accessed (ref'd) */
142#define _PAGE_READ_4U _AC(0x0000000000000200,UL) /* Readable SW Bit */ 141#define _PAGE_READ_4U _AC(0x0000000000000200,UL) /* Readable SW Bit */
143#define _PAGE_WRITE_4U _AC(0x0000000000000100,UL) /* Writable SW Bit */ 142#define _PAGE_WRITE_4U _AC(0x0000000000000100,UL) /* Writable SW Bit */
@@ -167,7 +166,6 @@ bool kern_addr_valid(unsigned long addr);
167#define _PAGE_EXEC_4V _AC(0x0000000000000080,UL) /* Executable Page */ 166#define _PAGE_EXEC_4V _AC(0x0000000000000080,UL) /* Executable Page */
168#define _PAGE_W_4V _AC(0x0000000000000040,UL) /* Writable */ 167#define _PAGE_W_4V _AC(0x0000000000000040,UL) /* Writable */
169#define _PAGE_SOFT_4V _AC(0x0000000000000030,UL) /* Software bits */ 168#define _PAGE_SOFT_4V _AC(0x0000000000000030,UL) /* Software bits */
170#define _PAGE_FILE_4V _AC(0x0000000000000020,UL) /* Pagecache page */
171#define _PAGE_PRESENT_4V _AC(0x0000000000000010,UL) /* Present */ 169#define _PAGE_PRESENT_4V _AC(0x0000000000000010,UL) /* Present */
172#define _PAGE_RESV_4V _AC(0x0000000000000008,UL) /* Reserved */ 170#define _PAGE_RESV_4V _AC(0x0000000000000008,UL) /* Reserved */
173#define _PAGE_SZ16GB_4V _AC(0x0000000000000007,UL) /* 16GB Page */ 171#define _PAGE_SZ16GB_4V _AC(0x0000000000000007,UL) /* 16GB Page */
@@ -332,22 +330,6 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
332} 330}
333#endif 331#endif
334 332
335static inline pte_t pgoff_to_pte(unsigned long off)
336{
337 off <<= PAGE_SHIFT;
338
339 __asm__ __volatile__(
340 "\n661: or %0, %2, %0\n"
341 " .section .sun4v_1insn_patch, \"ax\"\n"
342 " .word 661b\n"
343 " or %0, %3, %0\n"
344 " .previous\n"
345 : "=r" (off)
346 : "0" (off), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V));
347
348 return __pte(off);
349}
350
351static inline pgprot_t pgprot_noncached(pgprot_t prot) 333static inline pgprot_t pgprot_noncached(pgprot_t prot)
352{ 334{
353 unsigned long val = pgprot_val(prot); 335 unsigned long val = pgprot_val(prot);
@@ -609,22 +591,6 @@ static inline unsigned long pte_exec(pte_t pte)
609 return (pte_val(pte) & mask); 591 return (pte_val(pte) & mask);
610} 592}
611 593
612static inline unsigned long pte_file(pte_t pte)
613{
614 unsigned long val = pte_val(pte);
615
616 __asm__ __volatile__(
617 "\n661: and %0, %2, %0\n"
618 " .section .sun4v_1insn_patch, \"ax\"\n"
619 " .word 661b\n"
620 " and %0, %3, %0\n"
621 " .previous\n"
622 : "=r" (val)
623 : "0" (val), "i" (_PAGE_FILE_4U), "i" (_PAGE_FILE_4V));
624
625 return val;
626}
627
628static inline unsigned long pte_present(pte_t pte) 594static inline unsigned long pte_present(pte_t pte)
629{ 595{
630 unsigned long val = pte_val(pte); 596 unsigned long val = pte_val(pte);
@@ -971,12 +937,6 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
971#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 937#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
972#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 938#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
973 939
974/* File offset in PTE support. */
975unsigned long pte_file(pte_t);
976#define pte_to_pgoff(pte) (pte_val(pte) >> PAGE_SHIFT)
977pte_t pgoff_to_pte(unsigned long);
978#define PTE_FILE_MAX_BITS (64UL - PAGE_SHIFT - 1UL)
979
980int page_in_phys_avail(unsigned long paddr); 940int page_in_phys_avail(unsigned long paddr);
981 941
982/* 942/*
diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h
index 79da17866fa8..ae51a111a8c7 100644
--- a/arch/sparc/include/asm/pgtsrmmu.h
+++ b/arch/sparc/include/asm/pgtsrmmu.h
@@ -80,10 +80,6 @@
80#define SRMMU_PRIV 0x1c 80#define SRMMU_PRIV 0x1c
81#define SRMMU_PRIV_RDONLY 0x18 81#define SRMMU_PRIV_RDONLY 0x18
82 82
83#define SRMMU_FILE 0x40 /* Implemented in software */
84
85#define SRMMU_PTE_FILE_SHIFT 8 /* == 32-PTE_FILE_MAX_BITS */
86
87#define SRMMU_CHG_MASK (0xffffff00 | SRMMU_REF | SRMMU_DIRTY) 83#define SRMMU_CHG_MASK (0xffffff00 | SRMMU_REF | SRMMU_DIRTY)
88 84
89/* SRMMU swap entry encoding 85/* SRMMU swap entry encoding
@@ -94,13 +90,13 @@
94 * oooooooooooooooooootttttRRRRRRRR 90 * oooooooooooooooooootttttRRRRRRRR
95 * fedcba9876543210fedcba9876543210 91 * fedcba9876543210fedcba9876543210
96 * 92 *
97 * The bottom 8 bits are reserved for protection and status bits, especially 93 * The bottom 7 bits are reserved for protection and status bits, especially
98 * FILE and PRESENT. 94 * PRESENT.
99 */ 95 */
100#define SRMMU_SWP_TYPE_MASK 0x1f 96#define SRMMU_SWP_TYPE_MASK 0x1f
101#define SRMMU_SWP_TYPE_SHIFT SRMMU_PTE_FILE_SHIFT 97#define SRMMU_SWP_TYPE_SHIFT 7
102#define SRMMU_SWP_OFF_MASK 0x7ffff 98#define SRMMU_SWP_OFF_MASK 0xfffff
103#define SRMMU_SWP_OFF_SHIFT (SRMMU_PTE_FILE_SHIFT + 5) 99#define SRMMU_SWP_OFF_SHIFT (SRMMU_SWP_TYPE_SHIFT + 5)
104 100
105/* Some day I will implement true fine grained access bits for 101/* Some day I will implement true fine grained access bits for
106 * user pages because the SRMMU gives us the capabilities to 102 * user pages because the SRMMU gives us the capabilities to
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index 5d1950788c69..bc75b6ef2e79 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -285,17 +285,6 @@ extern void start_mm_caching(struct mm_struct *mm);
285extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next); 285extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next);
286 286
287/* 287/*
288 * Support non-linear file mappings (see sys_remap_file_pages).
289 * This is defined by CLIENT1 set but CLIENT0 and _PAGE_PRESENT clear, and the
290 * file offset in the 32 high bits.
291 */
292#define _PAGE_FILE HV_PTE_CLIENT1
293#define PTE_FILE_MAX_BITS 32
294#define pte_file(pte) (hv_pte_get_client1(pte) && !hv_pte_get_client0(pte))
295#define pte_to_pgoff(pte) ((pte).val >> 32)
296#define pgoff_to_pte(off) ((pte_t) { (((long long)(off)) << 32) | _PAGE_FILE })
297
298/*
299 * Encode and de-code a swap entry (see <linux/swapops.h>). 288 * Encode and de-code a swap entry (see <linux/swapops.h>).
300 * We put the swap file type+offset in the 32 high bits; 289 * We put the swap file type+offset in the 32 high bits;
301 * I believe we can just leave the low bits clear. 290 * I believe we can just leave the low bits clear.
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index cd3387370ebb..0029b3fb651b 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -263,10 +263,6 @@ static int pte_to_home(pte_t pte)
263/* Update the home of a PTE if necessary (can also be used for a pgprot_t). */ 263/* Update the home of a PTE if necessary (can also be used for a pgprot_t). */
264pte_t pte_set_home(pte_t pte, int home) 264pte_t pte_set_home(pte_t pte, int home)
265{ 265{
266 /* Check for non-linear file mapping "PTEs" and pass them through. */
267 if (pte_file(pte))
268 return pte;
269
270#if CHIP_HAS_MMIO() 266#if CHIP_HAS_MMIO()
271 /* Check for MMIO mappings and pass them through. */ 267 /* Check for MMIO mappings and pass them through. */
272 if (hv_pte_get_mode(pte) == HV_PTE_MODE_MMIO) 268 if (hv_pte_get_mode(pte) == HV_PTE_MODE_MMIO)
diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h
index f534b73e753e..7afe86035fa7 100644
--- a/arch/um/include/asm/pgtable-2level.h
+++ b/arch/um/include/asm/pgtable-2level.h
@@ -41,13 +41,4 @@ static inline void pgd_mkuptodate(pgd_t pgd) { }
41#define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot)) 41#define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot))
42#define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot)) 42#define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot))
43 43
44/*
45 * Bits 0 through 4 are taken
46 */
47#define PTE_FILE_MAX_BITS 27
48
49#define pte_to_pgoff(pte) (pte_val(pte) >> 5)
50
51#define pgoff_to_pte(off) ((pte_t) { ((off) << 5) + _PAGE_FILE })
52
53#endif 44#endif
diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
index 0032f9212e74..344c559c0a17 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -112,25 +112,5 @@ static inline pmd_t pfn_pmd(pfn_t page_nr, pgprot_t pgprot)
112 return __pmd((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)); 112 return __pmd((page_nr << PAGE_SHIFT) | pgprot_val(pgprot));
113} 113}
114 114
115/*
116 * Bits 0 through 3 are taken in the low part of the pte,
117 * put the 32 bits of offset into the high part.
118 */
119#define PTE_FILE_MAX_BITS 32
120
121#ifdef CONFIG_64BIT
122
123#define pte_to_pgoff(p) ((p).pte >> 32)
124
125#define pgoff_to_pte(off) ((pte_t) { ((off) << 32) | _PAGE_FILE })
126
127#else
128
129#define pte_to_pgoff(pte) ((pte).pte_high)
130
131#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) })
132
133#endif
134
135#endif 115#endif
136 116
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index bf974f712af7..2324b624f195 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -18,7 +18,6 @@
18#define _PAGE_ACCESSED 0x080 18#define _PAGE_ACCESSED 0x080
19#define _PAGE_DIRTY 0x100 19#define _PAGE_DIRTY 0x100
20/* If _PAGE_PRESENT is clear, we use these: */ 20/* If _PAGE_PRESENT is clear, we use these: */
21#define _PAGE_FILE 0x008 /* nonlinear file mapping, saved PTE; unset:swap */
22#define _PAGE_PROTNONE 0x010 /* if the user mapped it with PROT_NONE; 21#define _PAGE_PROTNONE 0x010 /* if the user mapped it with PROT_NONE;
23 pte_present gives true */ 22 pte_present gives true */
24 23
@@ -151,14 +150,6 @@ static inline int pte_write(pte_t pte)
151 !(pte_get_bits(pte, _PAGE_PROTNONE))); 150 !(pte_get_bits(pte, _PAGE_PROTNONE)));
152} 151}
153 152
154/*
155 * The following only works if pte_present() is not true.
156 */
157static inline int pte_file(pte_t pte)
158{
159 return pte_get_bits(pte, _PAGE_FILE);
160}
161
162static inline int pte_dirty(pte_t pte) 153static inline int pte_dirty(pte_t pte)
163{ 154{
164 return pte_get_bits(pte, _PAGE_DIRTY); 155 return pte_get_bits(pte, _PAGE_DIRTY);
diff --git a/arch/unicore32/include/asm/pgtable-hwdef.h b/arch/unicore32/include/asm/pgtable-hwdef.h
index 7314e859cca0..e37fa471c2be 100644
--- a/arch/unicore32/include/asm/pgtable-hwdef.h
+++ b/arch/unicore32/include/asm/pgtable-hwdef.h
@@ -44,7 +44,6 @@
44#define PTE_TYPE_INVALID (3 << 0) 44#define PTE_TYPE_INVALID (3 << 0)
45 45
46#define PTE_PRESENT (1 << 2) 46#define PTE_PRESENT (1 << 2)
47#define PTE_FILE (1 << 3) /* only when !PRESENT */
48#define PTE_YOUNG (1 << 3) 47#define PTE_YOUNG (1 << 3)
49#define PTE_DIRTY (1 << 4) 48#define PTE_DIRTY (1 << 4)
50#define PTE_CACHEABLE (1 << 5) 49#define PTE_CACHEABLE (1 << 5)
diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h
index ed6f7d000fba..818d0f5598e3 100644
--- a/arch/unicore32/include/asm/pgtable.h
+++ b/arch/unicore32/include/asm/pgtable.h
@@ -283,20 +283,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
283#define MAX_SWAPFILES_CHECK() \ 283#define MAX_SWAPFILES_CHECK() \
284 BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS) 284 BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
285 285
286/*
287 * Encode and decode a file entry. File entries are stored in the Linux
288 * page tables as follows:
289 *
290 * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1
291 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
292 * <----------------------- offset ----------------------> 1 0 0 0
293 */
294#define pte_file(pte) (pte_val(pte) & PTE_FILE)
295#define pte_to_pgoff(x) (pte_val(x) >> 4)
296#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE)
297
298#define PTE_FILE_MAX_BITS 28
299
300/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ 286/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
301/* FIXME: this is not correct */ 287/* FIXME: this is not correct */
302#define kern_addr_valid(addr) (1) 288#define kern_addr_valid(addr) (1)
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 206a87fdd22d..fd74a11959de 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -62,44 +62,8 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi
62 return ((value >> rightshift) & mask) << leftshift; 62 return ((value >> rightshift) & mask) << leftshift;
63} 63}
64 64
65/*
66 * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
67 * split up the 29 bits of offset into this range.
68 */
69#define PTE_FILE_MAX_BITS 29
70#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
71#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
72#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
73#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
74#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
75
76#define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1)
77#define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1)
78
79#define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1)
80#define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2)
81
82static __always_inline pgoff_t pte_to_pgoff(pte_t pte)
83{
84 return (pgoff_t)
85 (pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) +
86 pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) +
87 pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, -1UL, PTE_FILE_LSHIFT3));
88}
89
90static __always_inline pte_t pgoff_to_pte(pgoff_t off)
91{
92 return (pte_t){
93 .pte_low =
94 pte_bitop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) +
95 pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) +
96 pte_bitop(off, PTE_FILE_LSHIFT3, -1UL, PTE_FILE_SHIFT3) +
97 _PAGE_FILE,
98 };
99}
100
101/* Encode and de-code a swap entry */ 65/* Encode and de-code a swap entry */
102#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) 66#define SWP_TYPE_BITS 5
103#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) 67#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
104 68
105#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) 69#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 81bb91b49a88..cdaa58c9b39e 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -176,18 +176,6 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
176#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) 176#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
177#endif 177#endif
178 178
179/*
180 * Bits 0, 6 and 7 are taken in the low part of the pte,
181 * put the 32 bits of offset into the high part.
182 *
183 * For soft-dirty tracking 11 bit is taken from
184 * the low part of pte as well.
185 */
186#define pte_to_pgoff(pte) ((pte).pte_high)
187#define pgoff_to_pte(off) \
188 ((pte_t) { { .pte_low = _PAGE_FILE, .pte_high = (off) } })
189#define PTE_FILE_MAX_BITS 32
190
191/* Encode and de-code a swap entry */ 179/* Encode and de-code a swap entry */
192#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) 180#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
193#define __swp_type(x) (((x).val) & 0x1f) 181#define __swp_type(x) (((x).val) & 0x1f)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index e8a5454acc99..0fe03f834fb1 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -115,11 +115,6 @@ static inline int pte_write(pte_t pte)
115 return pte_flags(pte) & _PAGE_RW; 115 return pte_flags(pte) & _PAGE_RW;
116} 116}
117 117
118static inline int pte_file(pte_t pte)
119{
120 return pte_flags(pte) & _PAGE_FILE;
121}
122
123static inline int pte_huge(pte_t pte) 118static inline int pte_huge(pte_t pte)
124{ 119{
125 return pte_flags(pte) & _PAGE_PSE; 120 return pte_flags(pte) & _PAGE_PSE;
@@ -329,21 +324,6 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
329 return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); 324 return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
330} 325}
331 326
332static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
333{
334 return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
335}
336
337static inline pte_t pte_file_mksoft_dirty(pte_t pte)
338{
339 return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
340}
341
342static inline int pte_file_soft_dirty(pte_t pte)
343{
344 return pte_flags(pte) & _PAGE_SOFT_DIRTY;
345}
346
347#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ 327#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
348 328
349/* 329/*
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 4572b2f30237..e227970f983e 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -133,10 +133,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
133/* PUD - Level3 access */ 133/* PUD - Level3 access */
134 134
135/* PMD - Level 2 access */ 135/* PMD - Level 2 access */
136#define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
137#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \
138 _PAGE_FILE })
139#define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
140 136
141/* PTE - Level 1 access. */ 137/* PTE - Level 1 access. */
142 138
@@ -145,7 +141,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
145#define pte_unmap(pte) ((void)(pte))/* NOP */ 141#define pte_unmap(pte) ((void)(pte))/* NOP */
146 142
147/* Encode and de-code a swap entry */ 143/* Encode and de-code a swap entry */
148#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) 144#define SWP_TYPE_BITS 5
149#ifdef CONFIG_NUMA_BALANCING 145#ifdef CONFIG_NUMA_BALANCING
150/* Automatic NUMA balancing needs to be distinguishable from swap entries */ 146/* Automatic NUMA balancing needs to be distinguishable from swap entries */
151#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2) 147#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 25bcd4a89517..5185a4f599ec 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -38,8 +38,6 @@
38/* If _PAGE_BIT_PRESENT is clear, we use these: */ 38/* If _PAGE_BIT_PRESENT is clear, we use these: */
39/* - if the user mapped it with PROT_NONE; pte_present gives true */ 39/* - if the user mapped it with PROT_NONE; pte_present gives true */
40#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL 40#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
41/* - set: nonlinear file mapping, saved PTE; unset:swap */
42#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
43 41
44#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) 42#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
45#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) 43#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
@@ -114,7 +112,6 @@
114#define _PAGE_NX (_AT(pteval_t, 0)) 112#define _PAGE_NX (_AT(pteval_t, 0))
115#endif 113#endif
116 114
117#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
118#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) 115#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
119 116
120#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ 117#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 8b977ebf9388..bca0aa3a003f 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -178,4 +178,15 @@ static __init int setup_hugepagesz(char *opt)
178 return 1; 178 return 1;
179} 179}
180__setup("hugepagesz=", setup_hugepagesz); 180__setup("hugepagesz=", setup_hugepagesz);
181
182#ifdef CONFIG_CMA
183static __init int gigantic_pages_init(void)
184{
185 /* With CMA we can allocate gigantic pages at runtime */
186 if (cpu_has_gbpages && !size_to_hstate(1UL << PUD_SHIFT))
187 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
188 return 0;
189}
190arch_initcall(gigantic_pages_init);
191#endif
181#endif 192#endif
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 872bf0194e6d..01b80dce9d65 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -89,8 +89,6 @@
89 * (PAGE_NONE)| PPN | 0 | 00 | ADW | 01 | 11 | 11 | 89 * (PAGE_NONE)| PPN | 0 | 00 | ADW | 01 | 11 | 11 |
90 * +-----------------------------------------+ 90 * +-----------------------------------------+
91 * swap | index | type | 01 | 11 | 00 | 91 * swap | index | type | 01 | 11 | 00 |
92 * +- - - - - - - - - - - - - - - - - - - - -+
93 * file | file offset | 01 | 11 | 10 |
94 * +-----------------------------------------+ 92 * +-----------------------------------------+
95 * 93 *
96 * For T1050 hardware and earlier the layout differs for present and (PAGE_NONE) 94 * For T1050 hardware and earlier the layout differs for present and (PAGE_NONE)
@@ -111,7 +109,6 @@
111 * index swap offset / PAGE_SIZE (bit 11-31: 21 bits -> 8 GB) 109 * index swap offset / PAGE_SIZE (bit 11-31: 21 bits -> 8 GB)
112 * (note that the index is always non-zero) 110 * (note that the index is always non-zero)
113 * type swap type (5 bits -> 32 types) 111 * type swap type (5 bits -> 32 types)
114 * file offset 26-bit offset into the file, in increments of PAGE_SIZE
115 * 112 *
116 * Notes: 113 * Notes:
117 * - (PROT_NONE) is a special case of 'present' but causes an exception for 114 * - (PROT_NONE) is a special case of 'present' but causes an exception for
@@ -144,7 +141,6 @@
144#define _PAGE_HW_VALID 0x00 141#define _PAGE_HW_VALID 0x00
145#define _PAGE_NONE 0x0f 142#define _PAGE_NONE 0x0f
146#endif 143#endif
147#define _PAGE_FILE (1<<1) /* file mapped page, only if !present */
148 144
149#define _PAGE_USER (1<<4) /* user access (ring=1) */ 145#define _PAGE_USER (1<<4) /* user access (ring=1) */
150 146
@@ -260,7 +256,6 @@ static inline void pgtable_cache_init(void) { }
260static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITABLE; } 256static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITABLE; }
261static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } 257static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
262static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } 258static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
263static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
264static inline int pte_special(pte_t pte) { return 0; } 259static inline int pte_special(pte_t pte) { return 0; }
265 260
266static inline pte_t pte_wrprotect(pte_t pte) 261static inline pte_t pte_wrprotect(pte_t pte)
@@ -390,11 +385,6 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
390#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 385#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
391#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 386#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
392 387
393#define PTE_FILE_MAX_BITS 26
394#define pte_to_pgoff(pte) (pte_val(pte) >> 6)
395#define pgoff_to_pte(off) \
396 ((pte_t) { ((off) << 6) | _PAGE_CA_INVALID | _PAGE_FILE | _PAGE_USER })
397
398#endif /* !defined (__ASSEMBLY__) */ 388#endif /* !defined (__ASSEMBLY__) */
399 389
400 390
diff --git a/drivers/gpu/drm/drm_vma_manager.c b/drivers/gpu/drm/drm_vma_manager.c
index 63b471205072..68c1f32fb086 100644
--- a/drivers/gpu/drm/drm_vma_manager.c
+++ b/drivers/gpu/drm/drm_vma_manager.c
@@ -50,8 +50,7 @@
50 * 50 *
51 * You must not use multiple offset managers on a single address_space. 51 * You must not use multiple offset managers on a single address_space.
52 * Otherwise, mm-core will be unable to tear down memory mappings as the VM will 52 * Otherwise, mm-core will be unable to tear down memory mappings as the VM will
53 * no longer be linear. Please use VM_NONLINEAR in that case and implement your 53 * no longer be linear.
54 * own offset managers.
55 * 54 *
56 * This offset manager works on page-based addresses. That is, every argument 55 * This offset manager works on page-based addresses. That is, every argument
57 * and return code (with the exception of drm_vma_node_offset_addr()) is given 56 * and return code (with the exception of drm_vma_node_offset_addr()) is given
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 5594505e6e73..b40133796b87 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -831,7 +831,6 @@ static const struct vm_operations_struct v9fs_file_vm_ops = {
831 .fault = filemap_fault, 831 .fault = filemap_fault,
832 .map_pages = filemap_map_pages, 832 .map_pages = filemap_map_pages,
833 .page_mkwrite = v9fs_vm_page_mkwrite, 833 .page_mkwrite = v9fs_vm_page_mkwrite,
834 .remap_pages = generic_file_remap_pages,
835}; 834};
836 835
837static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { 836static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
@@ -839,7 +838,6 @@ static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
839 .fault = filemap_fault, 838 .fault = filemap_fault,
840 .map_pages = filemap_map_pages, 839 .map_pages = filemap_map_pages,
841 .page_mkwrite = v9fs_vm_page_mkwrite, 840 .page_mkwrite = v9fs_vm_page_mkwrite,
842 .remap_pages = generic_file_remap_pages,
843}; 841};
844 842
845 843
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e4090259569b..a606ab551296 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2081,7 +2081,6 @@ static const struct vm_operations_struct btrfs_file_vm_ops = {
2081 .fault = filemap_fault, 2081 .fault = filemap_fault,
2082 .map_pages = filemap_map_pages, 2082 .map_pages = filemap_map_pages,
2083 .page_mkwrite = btrfs_page_mkwrite, 2083 .page_mkwrite = btrfs_page_mkwrite,
2084 .remap_pages = generic_file_remap_pages,
2085}; 2084};
2086 2085
2087static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) 2086static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c81c0e004588..24be059fd1f8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1569,7 +1569,6 @@ out:
1569static struct vm_operations_struct ceph_vmops = { 1569static struct vm_operations_struct ceph_vmops = {
1570 .fault = ceph_filemap_fault, 1570 .fault = ceph_filemap_fault,
1571 .page_mkwrite = ceph_page_mkwrite, 1571 .page_mkwrite = ceph_page_mkwrite,
1572 .remap_pages = generic_file_remap_pages,
1573}; 1572};
1574 1573
1575int ceph_mmap(struct file *file, struct vm_area_struct *vma) 1574int ceph_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c1a86764bbf7..8fe1f7a21b3e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3236,7 +3236,6 @@ static struct vm_operations_struct cifs_file_vm_ops = {
3236 .fault = filemap_fault, 3236 .fault = filemap_fault,
3237 .map_pages = filemap_map_pages, 3237 .map_pages = filemap_map_pages,
3238 .page_mkwrite = cifs_page_mkwrite, 3238 .page_mkwrite = cifs_page_mkwrite,
3239 .remap_pages = generic_file_remap_pages,
3240}; 3239};
3241 3240
3242int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 3241int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8131be8c0af3..7cb592386121 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -195,7 +195,6 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
195 .fault = filemap_fault, 195 .fault = filemap_fault,
196 .map_pages = filemap_map_pages, 196 .map_pages = filemap_map_pages,
197 .page_mkwrite = ext4_page_mkwrite, 197 .page_mkwrite = ext4_page_mkwrite,
198 .remap_pages = generic_file_remap_pages,
199}; 198};
200 199
201static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) 200static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3c27e0ecb3bc..5674ba13102b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -92,7 +92,6 @@ static const struct vm_operations_struct f2fs_file_vm_ops = {
92 .fault = filemap_fault, 92 .fault = filemap_fault,
93 .map_pages = filemap_map_pages, 93 .map_pages = filemap_map_pages,
94 .page_mkwrite = f2fs_vm_page_mkwrite, 94 .page_mkwrite = f2fs_vm_page_mkwrite,
95 .remap_pages = generic_file_remap_pages,
96}; 95};
97 96
98static int get_parent_ino(struct inode *inode, nid_t *pino) 97static int get_parent_ino(struct inode *inode, nid_t *pino)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 760b2c552197..d769e594855b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2062,7 +2062,6 @@ static const struct vm_operations_struct fuse_file_vm_ops = {
2062 .fault = filemap_fault, 2062 .fault = filemap_fault,
2063 .map_pages = filemap_map_pages, 2063 .map_pages = filemap_map_pages,
2064 .page_mkwrite = fuse_page_mkwrite, 2064 .page_mkwrite = fuse_page_mkwrite,
2065 .remap_pages = generic_file_remap_pages,
2066}; 2065};
2067 2066
2068static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) 2067static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 6e600abf694a..ec9c2d33477a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -498,7 +498,6 @@ static const struct vm_operations_struct gfs2_vm_ops = {
498 .fault = filemap_fault, 498 .fault = filemap_fault,
499 .map_pages = filemap_map_pages, 499 .map_pages = filemap_map_pages,
500 .page_mkwrite = gfs2_page_mkwrite, 500 .page_mkwrite = gfs2_page_mkwrite,
501 .remap_pages = generic_file_remap_pages,
502}; 501};
503 502
504/** 503/**
diff --git a/fs/inode.c b/fs/inode.c
index f30872ade6d7..3a53b1da3fb8 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -356,7 +356,6 @@ void address_space_init_once(struct address_space *mapping)
356 INIT_LIST_HEAD(&mapping->private_list); 356 INIT_LIST_HEAD(&mapping->private_list);
357 spin_lock_init(&mapping->private_lock); 357 spin_lock_init(&mapping->private_lock);
358 mapping->i_mmap = RB_ROOT; 358 mapping->i_mmap = RB_ROOT;
359 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
360} 359}
361EXPORT_SYMBOL(address_space_init_once); 360EXPORT_SYMBOL(address_space_init_once);
362 361
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 214c3c11fbc2..5d01d2638ca5 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -379,6 +379,11 @@ int __generic_block_fiemap(struct inode *inode,
379 past_eof = true; 379 past_eof = true;
380 } 380 }
381 cond_resched(); 381 cond_resched();
382 if (fatal_signal_pending(current)) {
383 ret = -EINTR;
384 break;
385 }
386
382 } while (1); 387 } while (1);
383 388
384 /* If ret is 1 then we just hit the end of the extent array */ 389 /* If ret is 1 then we just hit the end of the extent array */
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2ab6f00dba5b..94712fc781fa 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -646,7 +646,6 @@ static const struct vm_operations_struct nfs_file_vm_ops = {
646 .fault = filemap_fault, 646 .fault = filemap_fault,
647 .map_pages = filemap_map_pages, 647 .map_pages = filemap_map_pages,
648 .page_mkwrite = nfs_vm_page_mkwrite, 648 .page_mkwrite = nfs_vm_page_mkwrite,
649 .remap_pages = generic_file_remap_pages,
650}; 649};
651 650
652static int nfs_need_sync_write(struct file *filp, struct inode *inode) 651static int nfs_need_sync_write(struct file *filp, struct inode *inode)
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 3a03e0aea1fb..a8c728acb7a8 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -128,7 +128,6 @@ static const struct vm_operations_struct nilfs_file_vm_ops = {
128 .fault = filemap_fault, 128 .fault = filemap_fault,
129 .map_pages = filemap_map_pages, 129 .map_pages = filemap_map_pages,
130 .page_mkwrite = nilfs_page_mkwrite, 130 .page_mkwrite = nilfs_page_mkwrite,
131 .remap_pages = generic_file_remap_pages,
132}; 131};
133 132
134static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) 133static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 30d3addfad75..51ceb8107284 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -140,7 +140,7 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
140 } 140 }
141 141
142 if (S_ISDIR(path->dentry->d_inode->i_mode) && 142 if (S_ISDIR(path->dentry->d_inode->i_mode) &&
143 (marks_ignored_mask & FS_ISDIR)) 143 !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
144 return false; 144 return false;
145 145
146 if (event_mask & marks_mask & ~marks_ignored_mask) 146 if (event_mask & marks_mask & ~marks_ignored_mask)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index bff8567aa42d..cf275500a665 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -487,20 +487,27 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
487 unsigned int flags, 487 unsigned int flags,
488 int *destroy) 488 int *destroy)
489{ 489{
490 __u32 oldmask; 490 __u32 oldmask = 0;
491 491
492 spin_lock(&fsn_mark->lock); 492 spin_lock(&fsn_mark->lock);
493 if (!(flags & FAN_MARK_IGNORED_MASK)) { 493 if (!(flags & FAN_MARK_IGNORED_MASK)) {
494 __u32 tmask = fsn_mark->mask & ~mask;
495
496 if (flags & FAN_MARK_ONDIR)
497 tmask &= ~FAN_ONDIR;
498
494 oldmask = fsn_mark->mask; 499 oldmask = fsn_mark->mask;
495 fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask)); 500 fsnotify_set_mark_mask_locked(fsn_mark, tmask);
496 } else { 501 } else {
497 oldmask = fsn_mark->ignored_mask; 502 __u32 tmask = fsn_mark->ignored_mask & ~mask;
498 fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask)); 503 if (flags & FAN_MARK_ONDIR)
504 tmask &= ~FAN_ONDIR;
505
506 fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
499 } 507 }
508 *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask);
500 spin_unlock(&fsn_mark->lock); 509 spin_unlock(&fsn_mark->lock);
501 510
502 *destroy = !(oldmask & ~mask);
503
504 return mask & oldmask; 511 return mask & oldmask;
505} 512}
506 513
@@ -569,20 +576,22 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
569 576
570 spin_lock(&fsn_mark->lock); 577 spin_lock(&fsn_mark->lock);
571 if (!(flags & FAN_MARK_IGNORED_MASK)) { 578 if (!(flags & FAN_MARK_IGNORED_MASK)) {
579 __u32 tmask = fsn_mark->mask | mask;
580
581 if (flags & FAN_MARK_ONDIR)
582 tmask |= FAN_ONDIR;
583
572 oldmask = fsn_mark->mask; 584 oldmask = fsn_mark->mask;
573 fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask)); 585 fsnotify_set_mark_mask_locked(fsn_mark, tmask);
574 } else { 586 } else {
575 __u32 tmask = fsn_mark->ignored_mask | mask; 587 __u32 tmask = fsn_mark->ignored_mask | mask;
588 if (flags & FAN_MARK_ONDIR)
589 tmask |= FAN_ONDIR;
590
576 fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); 591 fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
577 if (flags & FAN_MARK_IGNORED_SURV_MODIFY) 592 if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
578 fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; 593 fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
579 } 594 }
580
581 if (!(flags & FAN_MARK_ONDIR)) {
582 __u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR;
583 fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
584 }
585
586 spin_unlock(&fsn_mark->lock); 595 spin_unlock(&fsn_mark->lock);
587 596
588 return mask & ~oldmask; 597 return mask & ~oldmask;
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 7e8282dcea2a..c58a1bcfda0f 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -245,16 +245,14 @@ int ocfs2_set_acl(handle_t *handle,
245 ret = posix_acl_equiv_mode(acl, &mode); 245 ret = posix_acl_equiv_mode(acl, &mode);
246 if (ret < 0) 246 if (ret < 0)
247 return ret; 247 return ret;
248 else {
249 if (ret == 0)
250 acl = NULL;
251 248
252 ret = ocfs2_acl_set_mode(inode, di_bh, 249 if (ret == 0)
253 handle, mode); 250 acl = NULL;
254 if (ret)
255 return ret;
256 251
257 } 252 ret = ocfs2_acl_set_mode(inode, di_bh,
253 handle, mode);
254 if (ret)
255 return ret;
258 } 256 }
259 break; 257 break;
260 case ACL_TYPE_DEFAULT: 258 case ACL_TYPE_DEFAULT:
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index fcae9ef1a328..044158bd22be 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6873,7 +6873,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6873 if (IS_ERR(handle)) { 6873 if (IS_ERR(handle)) {
6874 ret = PTR_ERR(handle); 6874 ret = PTR_ERR(handle);
6875 mlog_errno(ret); 6875 mlog_errno(ret);
6876 goto out_unlock; 6876 goto out;
6877 } 6877 }
6878 6878
6879 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, 6879 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
@@ -6931,7 +6931,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6931 if (ret) { 6931 if (ret) {
6932 mlog_errno(ret); 6932 mlog_errno(ret);
6933 need_free = 1; 6933 need_free = 1;
6934 goto out_commit; 6934 goto out_unlock;
6935 } 6935 }
6936 6936
6937 page_end = PAGE_CACHE_SIZE; 6937 page_end = PAGE_CACHE_SIZE;
@@ -6964,12 +6964,16 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6964 if (ret) { 6964 if (ret) {
6965 mlog_errno(ret); 6965 mlog_errno(ret);
6966 need_free = 1; 6966 need_free = 1;
6967 goto out_commit; 6967 goto out_unlock;
6968 } 6968 }
6969 6969
6970 inode->i_blocks = ocfs2_inode_sector_count(inode); 6970 inode->i_blocks = ocfs2_inode_sector_count(inode);
6971 } 6971 }
6972 6972
6973out_unlock:
6974 if (pages)
6975 ocfs2_unlock_and_free_pages(pages, num_pages);
6976
6973out_commit: 6977out_commit:
6974 if (ret < 0 && did_quota) 6978 if (ret < 0 && did_quota)
6975 dquot_free_space_nodirty(inode, 6979 dquot_free_space_nodirty(inode,
@@ -6989,15 +6993,11 @@ out_commit:
6989 6993
6990 ocfs2_commit_trans(osb, handle); 6994 ocfs2_commit_trans(osb, handle);
6991 6995
6992out_unlock: 6996out:
6993 if (data_ac) 6997 if (data_ac)
6994 ocfs2_free_alloc_context(data_ac); 6998 ocfs2_free_alloc_context(data_ac);
6995 6999 if (pages)
6996out:
6997 if (pages) {
6998 ocfs2_unlock_and_free_pages(pages, num_pages);
6999 kfree(pages); 7000 kfree(pages);
7000 }
7001 7001
7002 return ret; 7002 return ret;
7003} 7003}
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 2e355e0f8335..56c403a563bc 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1016,7 +1016,8 @@ void o2net_fill_node_map(unsigned long *map, unsigned bytes)
1016 1016
1017 memset(map, 0, bytes); 1017 memset(map, 0, bytes);
1018 for (node = 0; node < O2NM_MAX_NODES; ++node) { 1018 for (node = 0; node < O2NM_MAX_NODES; ++node) {
1019 o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret); 1019 if (!o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret))
1020 continue;
1020 if (!ret) { 1021 if (!ret) {
1021 set_bit(node, map); 1022 set_bit(node, map);
1022 sc_put(sc); 1023 sc_put(sc);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index dc024367110a..b95e7df5b76a 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -107,12 +107,12 @@ struct o2net_node {
107 struct list_head nn_status_list; 107 struct list_head nn_status_list;
108 108
109 /* connects are attempted from when heartbeat comes up until either hb 109 /* connects are attempted from when heartbeat comes up until either hb
110 * goes down, the node is unconfigured, no connect attempts succeed 110 * goes down, the node is unconfigured, or a connect succeeds.
111 * before O2NET_CONN_IDLE_DELAY, or a connect succeeds. connect_work 111 * connect_work is queued from set_nn_state both from hb up and from
112 * is queued from set_nn_state both from hb up and from itself if a 112 * itself if a connect attempt fails and so can be self-arming.
113 * connect attempt fails and so can be self-arming. shutdown is 113 * shutdown is careful to first mark the nn such that no connects will
114 * careful to first mark the nn such that no connects will be attempted 114 * be attempted before canceling delayed connect work and flushing the
115 * before canceling delayed connect work and flushing the queue. */ 115 * queue. */
116 struct delayed_work nn_connect_work; 116 struct delayed_work nn_connect_work;
117 unsigned long nn_last_connect_attempt; 117 unsigned long nn_last_connect_attempt;
118 118
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 319e786175af..b08050bd3f2e 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -3456,10 +3456,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
3456 int blocksize = dir->i_sb->s_blocksize; 3456 int blocksize = dir->i_sb->s_blocksize;
3457 3457
3458 status = ocfs2_read_dir_block(dir, 0, &bh, 0); 3458 status = ocfs2_read_dir_block(dir, 0, &bh, 0);
3459 if (status) { 3459 if (status)
3460 mlog_errno(status);
3461 goto bail; 3460 goto bail;
3462 }
3463 3461
3464 rec_len = OCFS2_DIR_REC_LEN(namelen); 3462 rec_len = OCFS2_DIR_REC_LEN(namelen);
3465 offset = 0; 3463 offset = 0;
@@ -3480,10 +3478,9 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
3480 status = ocfs2_read_dir_block(dir, 3478 status = ocfs2_read_dir_block(dir,
3481 offset >> sb->s_blocksize_bits, 3479 offset >> sb->s_blocksize_bits,
3482 &bh, 0); 3480 &bh, 0);
3483 if (status) { 3481 if (status)
3484 mlog_errno(status);
3485 goto bail; 3482 goto bail;
3486 } 3483
3487 /* move to next block */ 3484 /* move to next block */
3488 de = (struct ocfs2_dir_entry *) bh->b_data; 3485 de = (struct ocfs2_dir_entry *) bh->b_data;
3489 } 3486 }
@@ -3513,7 +3510,6 @@ next:
3513 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len)); 3510 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
3514 } 3511 }
3515 3512
3516 status = 0;
3517bail: 3513bail:
3518 brelse(bh); 3514 brelse(bh);
3519 if (status) 3515 if (status)
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index b46278f9ae44..fd6bbbbd7d78 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -385,8 +385,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
385 head = &res->granted; 385 head = &res->granted;
386 386
387 list_for_each_entry(lock, head, list) { 387 list_for_each_entry(lock, head, list) {
388 if (lock->ml.cookie == cookie) 388 /* if lock is found but unlock is pending ignore the bast */
389 if (lock->ml.cookie == cookie) {
390 if (lock->unlock_pending)
391 break;
389 goto do_ast; 392 goto do_ast;
393 }
390 } 394 }
391 395
392 mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, " 396 mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, "
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 149eb556b8c6..825136070d2c 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -406,7 +406,7 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len)
406 } 406 }
407 spin_unlock(&dlm->spinlock); 407 spin_unlock(&dlm->spinlock);
408 408
409 out += snprintf(buf + out, len - out, "Total on list: %ld\n", total); 409 out += snprintf(buf + out, len - out, "Total on list: %lu\n", total);
410 410
411 return out; 411 return out;
412} 412}
@@ -464,7 +464,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
464 spin_unlock(&dlm->master_lock); 464 spin_unlock(&dlm->master_lock);
465 465
466 out += snprintf(buf + out, len - out, 466 out += snprintf(buf + out, len - out,
467 "Total: %ld, Longest: %ld\n", total, longest); 467 "Total: %lu, Longest: %lu\n", total, longest);
468 return out; 468 return out;
469} 469}
470 470
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 50a59d2337b2..7df88a6dd626 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -674,20 +674,6 @@ static void dlm_leave_domain(struct dlm_ctxt *dlm)
674 spin_unlock(&dlm->spinlock); 674 spin_unlock(&dlm->spinlock);
675} 675}
676 676
677int dlm_joined(struct dlm_ctxt *dlm)
678{
679 int ret = 0;
680
681 spin_lock(&dlm_domain_lock);
682
683 if (dlm->dlm_state == DLM_CTXT_JOINED)
684 ret = 1;
685
686 spin_unlock(&dlm_domain_lock);
687
688 return ret;
689}
690
691int dlm_shutting_down(struct dlm_ctxt *dlm) 677int dlm_shutting_down(struct dlm_ctxt *dlm)
692{ 678{
693 int ret = 0; 679 int ret = 0;
diff --git a/fs/ocfs2/dlm/dlmdomain.h b/fs/ocfs2/dlm/dlmdomain.h
index 2f7f60bfeb3b..fd6122a38dbd 100644
--- a/fs/ocfs2/dlm/dlmdomain.h
+++ b/fs/ocfs2/dlm/dlmdomain.h
@@ -28,7 +28,6 @@
28extern spinlock_t dlm_domain_lock; 28extern spinlock_t dlm_domain_lock;
29extern struct list_head dlm_domains; 29extern struct list_head dlm_domains;
30 30
31int dlm_joined(struct dlm_ctxt *dlm);
32int dlm_shutting_down(struct dlm_ctxt *dlm); 31int dlm_shutting_down(struct dlm_ctxt *dlm);
33void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, 32void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm,
34 int node_num); 33 int node_num);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index cecd875653e4..ce12e0b1a31f 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1070,6 +1070,9 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
1070 dead_node, dlm->name); 1070 dead_node, dlm->name);
1071 list_del_init(&lock->list); 1071 list_del_init(&lock->list);
1072 dlm_lock_put(lock); 1072 dlm_lock_put(lock);
1073 /* Can't schedule DLM_UNLOCK_FREE_LOCK
1074 * - do manually */
1075 dlm_lock_put(lock);
1073 break; 1076 break;
1074 } 1077 }
1075 } 1078 }
@@ -2346,6 +2349,10 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
2346 dead_node, dlm->name); 2349 dead_node, dlm->name);
2347 list_del_init(&lock->list); 2350 list_del_init(&lock->list);
2348 dlm_lock_put(lock); 2351 dlm_lock_put(lock);
2352 /* Can't schedule
2353 * DLM_UNLOCK_FREE_LOCK
2354 * - do manually */
2355 dlm_lock_put(lock);
2349 break; 2356 break;
2350 } 2357 }
2351 } 2358 }
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 1c423af04c69..11849a44dc5a 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3750,6 +3750,9 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3750 break; 3750 break;
3751 spin_unlock(&dentry_attach_lock); 3751 spin_unlock(&dentry_attach_lock);
3752 3752
3753 if (S_ISDIR(dl->dl_inode->i_mode))
3754 shrink_dcache_parent(dentry);
3755
3753 mlog(0, "d_delete(%pd);\n", dentry); 3756 mlog(0, "d_delete(%pd);\n", dentry);
3754 3757
3755 /* 3758 /*
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3950693dd0f6..245db4f504da 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -569,7 +569,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
569 handle_t *handle = NULL; 569 handle_t *handle = NULL;
570 struct ocfs2_alloc_context *data_ac = NULL; 570 struct ocfs2_alloc_context *data_ac = NULL;
571 struct ocfs2_alloc_context *meta_ac = NULL; 571 struct ocfs2_alloc_context *meta_ac = NULL;
572 enum ocfs2_alloc_restarted why; 572 enum ocfs2_alloc_restarted why = RESTART_NONE;
573 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 573 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
574 struct ocfs2_extent_tree et; 574 struct ocfs2_extent_tree et;
575 int did_quota = 0; 575 int did_quota = 0;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4f502382180f..d10860fde165 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1447,7 +1447,6 @@ bail:
1447 * requires that we call do_exit(). And it isn't exported, but 1447 * requires that we call do_exit(). And it isn't exported, but
1448 * complete_and_exit() seems to be a minimal wrapper around it. */ 1448 * complete_and_exit() seems to be a minimal wrapper around it. */
1449 complete_and_exit(NULL, status); 1449 complete_and_exit(NULL, status);
1450 return status;
1451} 1450}
1452 1451
1453void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) 1452void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 10d66c75cecb..9581d190f6e1 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -173,7 +173,6 @@ out:
173static const struct vm_operations_struct ocfs2_file_vm_ops = { 173static const struct vm_operations_struct ocfs2_file_vm_ops = {
174 .fault = ocfs2_fault, 174 .fault = ocfs2_fault,
175 .page_mkwrite = ocfs2_page_mkwrite, 175 .page_mkwrite = ocfs2_page_mkwrite,
176 .remap_pages = generic_file_remap_pages,
177}; 176};
178 177
179int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) 178int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 7d6b7d090452..fdbcbfed529e 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -279,6 +279,8 @@ enum ocfs2_mount_options
279 writes */ 279 writes */
280 OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */ 280 OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */
281 OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ 281 OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */
282
283 OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
282}; 284};
283 285
284#define OCFS2_OSB_SOFT_RO 0x0001 286#define OCFS2_OSB_SOFT_RO 0x0001
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 89c0b2620814..3d0b63d34225 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -73,12 +73,6 @@ static loff_t ol_dqblk_off(struct super_block *sb, int c, int off)
73 ol_dqblk_block_off(sb, c, off); 73 ol_dqblk_block_off(sb, c, off);
74} 74}
75 75
76/* Compute block number from given offset */
77static inline unsigned int ol_dqblk_file_block(struct super_block *sb, loff_t off)
78{
79 return off >> sb->s_blocksize_bits;
80}
81
82static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off) 76static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off)
83{ 77{
84 return off & ((1 << sb->s_blocksize_bits) - 1); 78 return off & ((1 << sb->s_blocksize_bits) - 1);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index d81f6e2a97f5..ee541f92dab4 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2428,8 +2428,6 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2428 get_bh(prev_bh); 2428 get_bh(prev_bh);
2429 } 2429 }
2430 2430
2431 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
2432
2433 trace_ocfs2_calc_refcount_meta_credits_iterate( 2431 trace_ocfs2_calc_refcount_meta_credits_iterate(
2434 recs_add, (unsigned long long)cpos, clusters, 2432 recs_add, (unsigned long long)cpos, clusters,
2435 (unsigned long long)le64_to_cpu(rec.r_cpos), 2433 (unsigned long long)le64_to_cpu(rec.r_cpos),
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index 41ffd36c689c..6a348b0294ab 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -39,7 +39,7 @@
39#define OCFS2_CHECK_RESERVATIONS 39#define OCFS2_CHECK_RESERVATIONS
40#endif 40#endif
41 41
42DEFINE_SPINLOCK(resv_lock); 42static DEFINE_SPINLOCK(resv_lock);
43 43
44#define OCFS2_MIN_RESV_WINDOW_BITS 8 44#define OCFS2_MIN_RESV_WINDOW_BITS 8
45#define OCFS2_MAX_RESV_WINDOW_BITS 1024 45#define OCFS2_MAX_RESV_WINDOW_BITS 1024
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 706c71c2955d..87a1f7679d9b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -191,6 +191,7 @@ enum {
191 Opt_coherency_full, 191 Opt_coherency_full,
192 Opt_resv_level, 192 Opt_resv_level,
193 Opt_dir_resv_level, 193 Opt_dir_resv_level,
194 Opt_journal_async_commit,
194 Opt_err, 195 Opt_err,
195}; 196};
196 197
@@ -222,6 +223,7 @@ static const match_table_t tokens = {
222 {Opt_coherency_full, "coherency=full"}, 223 {Opt_coherency_full, "coherency=full"},
223 {Opt_resv_level, "resv_level=%u"}, 224 {Opt_resv_level, "resv_level=%u"},
224 {Opt_dir_resv_level, "dir_resv_level=%u"}, 225 {Opt_dir_resv_level, "dir_resv_level=%u"},
226 {Opt_journal_async_commit, "journal_async_commit"},
225 {Opt_err, NULL} 227 {Opt_err, NULL}
226}; 228};
227 229
@@ -1470,6 +1472,9 @@ static int ocfs2_parse_options(struct super_block *sb,
1470 option < OCFS2_MAX_RESV_LEVEL) 1472 option < OCFS2_MAX_RESV_LEVEL)
1471 mopt->dir_resv_level = option; 1473 mopt->dir_resv_level = option;
1472 break; 1474 break;
1475 case Opt_journal_async_commit:
1476 mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
1477 break;
1473 default: 1478 default:
1474 mlog(ML_ERROR, 1479 mlog(ML_ERROR,
1475 "Unrecognized mount option \"%s\" " 1480 "Unrecognized mount option \"%s\" "
@@ -1576,6 +1581,9 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
1576 if (osb->osb_dir_resv_level != osb->osb_resv_level) 1581 if (osb->osb_dir_resv_level != osb->osb_resv_level)
1577 seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level); 1582 seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level);
1578 1583
1584 if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
1585 seq_printf(s, ",journal_async_commit");
1586
1579 return 0; 1587 return 0;
1580} 1588}
1581 1589
@@ -2445,6 +2453,15 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
2445 goto finally; 2453 goto finally;
2446 } 2454 }
2447 2455
2456 if (osb->s_mount_opt & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
2457 jbd2_journal_set_features(osb->journal->j_journal,
2458 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2459 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2460 else
2461 jbd2_journal_clear_features(osb->journal->j_journal,
2462 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2463 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2464
2448 if (dirty) { 2465 if (dirty) {
2449 /* recover my local alloc if we didn't unmount cleanly. */ 2466 /* recover my local alloc if we didn't unmount cleanly. */
2450 status = ocfs2_begin_local_alloc_recovery(osb, 2467 status = ocfs2_begin_local_alloc_recovery(osb,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 662f8dee149f..85b190dc132f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -5334,16 +5334,6 @@ out:
5334 return ret; 5334 return ret;
5335} 5335}
5336 5336
5337static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
5338 struct ocfs2_xattr_bucket *bucket,
5339 int offs)
5340{
5341 int block_off = offs >> inode->i_sb->s_blocksize_bits;
5342
5343 offs = offs % inode->i_sb->s_blocksize;
5344 return bucket_block(bucket, block_off) + offs;
5345}
5346
5347/* 5337/*
5348 * Truncate the specified xe_off entry in xattr bucket. 5338 * Truncate the specified xe_off entry in xattr bucket.
5349 * bucket is indicated by header_bh and len is the new length. 5339 * bucket is indicated by header_bh and len is the new length.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 246eae84b13b..6396f88c6687 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -443,7 +443,6 @@ struct mem_size_stats {
443 unsigned long anonymous; 443 unsigned long anonymous;
444 unsigned long anonymous_thp; 444 unsigned long anonymous_thp;
445 unsigned long swap; 445 unsigned long swap;
446 unsigned long nonlinear;
447 u64 pss; 446 u64 pss;
448}; 447};
449 448
@@ -484,7 +483,6 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
484{ 483{
485 struct mem_size_stats *mss = walk->private; 484 struct mem_size_stats *mss = walk->private;
486 struct vm_area_struct *vma = mss->vma; 485 struct vm_area_struct *vma = mss->vma;
487 pgoff_t pgoff = linear_page_index(vma, addr);
488 struct page *page = NULL; 486 struct page *page = NULL;
489 487
490 if (pte_present(*pte)) { 488 if (pte_present(*pte)) {
@@ -496,17 +494,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
496 mss->swap += PAGE_SIZE; 494 mss->swap += PAGE_SIZE;
497 else if (is_migration_entry(swpent)) 495 else if (is_migration_entry(swpent))
498 page = migration_entry_to_page(swpent); 496 page = migration_entry_to_page(swpent);
499 } else if (pte_file(*pte)) {
500 if (pte_to_pgoff(*pte) != pgoff)
501 mss->nonlinear += PAGE_SIZE;
502 } 497 }
503 498
504 if (!page) 499 if (!page)
505 return; 500 return;
506
507 if (page->index != pgoff)
508 mss->nonlinear += PAGE_SIZE;
509
510 smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); 501 smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte));
511} 502}
512 503
@@ -596,7 +587,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
596 [ilog2(VM_ACCOUNT)] = "ac", 587 [ilog2(VM_ACCOUNT)] = "ac",
597 [ilog2(VM_NORESERVE)] = "nr", 588 [ilog2(VM_NORESERVE)] = "nr",
598 [ilog2(VM_HUGETLB)] = "ht", 589 [ilog2(VM_HUGETLB)] = "ht",
599 [ilog2(VM_NONLINEAR)] = "nl",
600 [ilog2(VM_ARCH_1)] = "ar", 590 [ilog2(VM_ARCH_1)] = "ar",
601 [ilog2(VM_DONTDUMP)] = "dd", 591 [ilog2(VM_DONTDUMP)] = "dd",
602#ifdef CONFIG_MEM_SOFT_DIRTY 592#ifdef CONFIG_MEM_SOFT_DIRTY
@@ -668,10 +658,6 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
668 (vma->vm_flags & VM_LOCKED) ? 658 (vma->vm_flags & VM_LOCKED) ?
669 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); 659 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
670 660
671 if (vma->vm_flags & VM_NONLINEAR)
672 seq_printf(m, "Nonlinear: %8lu kB\n",
673 mss.nonlinear >> 10);
674
675 show_smap_vma_flags(m, vma); 661 show_smap_vma_flags(m, vma);
676 m_cache_vma(m, vma); 662 m_cache_vma(m, vma);
677 return 0; 663 return 0;
@@ -772,8 +758,6 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
772 ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); 758 ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
773 } else if (is_swap_pte(ptent)) { 759 } else if (is_swap_pte(ptent)) {
774 ptent = pte_swp_clear_soft_dirty(ptent); 760 ptent = pte_swp_clear_soft_dirty(ptent);
775 } else if (pte_file(ptent)) {
776 ptent = pte_file_clear_soft_dirty(ptent);
777 } 761 }
778 762
779 set_pte_at(vma->vm_mm, addr, pte, ptent); 763 set_pte_at(vma->vm_mm, addr, pte, ptent);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 538519ee37d9..035e51011444 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1536,7 +1536,6 @@ static const struct vm_operations_struct ubifs_file_vm_ops = {
1536 .fault = filemap_fault, 1536 .fault = filemap_fault,
1537 .map_pages = filemap_map_pages, 1537 .map_pages = filemap_map_pages,
1538 .page_mkwrite = ubifs_vm_page_mkwrite, 1538 .page_mkwrite = ubifs_vm_page_mkwrite,
1539 .remap_pages = generic_file_remap_pages,
1540}; 1539};
1541 1540
1542static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) 1541static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 712d312d8e3e..f2d05a19d68c 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1404,5 +1404,4 @@ static const struct vm_operations_struct xfs_file_vm_ops = {
1404 .fault = filemap_fault, 1404 .fault = filemap_fault,
1405 .map_pages = filemap_map_pages, 1405 .map_pages = filemap_map_pages,
1406 .page_mkwrite = xfs_vm_page_mkwrite, 1406 .page_mkwrite = xfs_vm_page_mkwrite,
1407 .remap_pages = generic_file_remap_pages,
1408}; 1407};
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 177d5973b132..129de9204d18 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -474,21 +474,6 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
474{ 474{
475 return pte; 475 return pte;
476} 476}
477
478static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
479{
480 return pte;
481}
482
483static inline pte_t pte_file_mksoft_dirty(pte_t pte)
484{
485 return pte;
486}
487
488static inline int pte_file_soft_dirty(pte_t pte)
489{
490 return 0;
491}
492#endif 477#endif
493 478
494#ifndef __HAVE_PFNMAP_TRACKING 479#ifndef __HAVE_PFNMAP_TRACKING
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ddd2fa7cefd3..f125b88443bd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -401,7 +401,6 @@ struct address_space {
401 spinlock_t tree_lock; /* and lock protecting it */ 401 spinlock_t tree_lock; /* and lock protecting it */
402 atomic_t i_mmap_writable;/* count VM_SHARED mappings */ 402 atomic_t i_mmap_writable;/* count VM_SHARED mappings */
403 struct rb_root i_mmap; /* tree of private and shared mappings */ 403 struct rb_root i_mmap; /* tree of private and shared mappings */
404 struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
405 struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */ 404 struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
406 /* Protected by tree_lock together with the radix tree */ 405 /* Protected by tree_lock together with the radix tree */
407 unsigned long nrpages; /* number of total pages */ 406 unsigned long nrpages; /* number of total pages */
@@ -493,8 +492,7 @@ static inline void i_mmap_unlock_read(struct address_space *mapping)
493 */ 492 */
494static inline int mapping_mapped(struct address_space *mapping) 493static inline int mapping_mapped(struct address_space *mapping)
495{ 494{
496 return !RB_EMPTY_ROOT(&mapping->i_mmap) || 495 return !RB_EMPTY_ROOT(&mapping->i_mmap);
497 !list_empty(&mapping->i_mmap_nonlinear);
498} 496}
499 497
500/* 498/*
@@ -2501,8 +2499,6 @@ extern int sb_min_blocksize(struct super_block *, int);
2501 2499
2502extern int generic_file_mmap(struct file *, struct vm_area_struct *); 2500extern int generic_file_mmap(struct file *, struct vm_area_struct *);
2503extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); 2501extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
2504extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
2505 unsigned long size, pgoff_t pgoff);
2506int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); 2502int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
2507extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); 2503extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
2508extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); 2504extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 1c804b057fb1..7ee1774edee5 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -101,8 +101,10 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
101 new_dir_mask |= FS_ISDIR; 101 new_dir_mask |= FS_ISDIR;
102 } 102 }
103 103
104 fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie); 104 fsnotify(old_dir, old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_name,
105 fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name, fs_cookie); 105 fs_cookie);
106 fsnotify(new_dir, new_dir_mask, source, FSNOTIFY_EVENT_INODE, new_name,
107 fs_cookie);
106 108
107 if (target) 109 if (target)
108 fsnotify_link_count(target); 110 fsnotify_link_count(target);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 431b7fc605c9..7d7856359920 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -86,7 +86,7 @@ void free_huge_page(struct page *page);
86pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); 86pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
87#endif 87#endif
88 88
89extern unsigned long hugepages_treat_as_movable; 89extern int hugepages_treat_as_movable;
90extern int sysctl_hugetlb_shm_group; 90extern int sysctl_hugetlb_shm_group;
91extern struct list_head huge_boot_pages; 91extern struct list_head huge_boot_pages;
92 92
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7c95af8d552c..fb212e1d700d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -403,10 +403,9 @@ void memcg_update_array_size(int num_groups);
403struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep); 403struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep);
404void __memcg_kmem_put_cache(struct kmem_cache *cachep); 404void __memcg_kmem_put_cache(struct kmem_cache *cachep);
405 405
406int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order); 406int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
407void __memcg_uncharge_slab(struct kmem_cache *cachep, int order); 407 unsigned long nr_pages);
408 408void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages);
409int __memcg_cleanup_cache_params(struct kmem_cache *s);
410 409
411/** 410/**
412 * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. 411 * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 237b3ba29225..65db4aee738a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -138,7 +138,6 @@ extern unsigned int kobjsize(const void *objp);
138#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ 138#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
139#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ 139#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
140#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ 140#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
141#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
142#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ 141#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */
143#define VM_ARCH_2 0x02000000 142#define VM_ARCH_2 0x02000000
144#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ 143#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */
@@ -206,21 +205,19 @@ extern unsigned int kobjsize(const void *objp);
206extern pgprot_t protection_map[16]; 205extern pgprot_t protection_map[16];
207 206
208#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ 207#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
209#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ 208#define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */
210#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ 209#define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */
211#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ 210#define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */
212#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ 211#define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */
213#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ 212#define FAULT_FLAG_TRIED 0x20 /* Second try */
214#define FAULT_FLAG_TRIED 0x40 /* second try */ 213#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */
215#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */
216 214
217/* 215/*
218 * vm_fault is filled by the the pagefault handler and passed to the vma's 216 * vm_fault is filled by the the pagefault handler and passed to the vma's
219 * ->fault function. The vma's ->fault is responsible for returning a bitmask 217 * ->fault function. The vma's ->fault is responsible for returning a bitmask
220 * of VM_FAULT_xxx flags that give details about how the fault was handled. 218 * of VM_FAULT_xxx flags that give details about how the fault was handled.
221 * 219 *
222 * pgoff should be used in favour of virtual_address, if possible. If pgoff 220 * pgoff should be used in favour of virtual_address, if possible.
223 * is used, one may implement ->remap_pages to get nonlinear mapping support.
224 */ 221 */
225struct vm_fault { 222struct vm_fault {
226 unsigned int flags; /* FAULT_FLAG_xxx flags */ 223 unsigned int flags; /* FAULT_FLAG_xxx flags */
@@ -287,10 +284,6 @@ struct vm_operations_struct {
287 struct mempolicy *(*get_policy)(struct vm_area_struct *vma, 284 struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
288 unsigned long addr); 285 unsigned long addr);
289#endif 286#endif
290 /* called by sys_remap_file_pages() to populate non-linear mapping */
291 int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,
292 unsigned long size, pgoff_t pgoff);
293
294 /* 287 /*
295 * Called by vm_normal_page() for special PTEs to find the 288 * Called by vm_normal_page() for special PTEs to find the
296 * page for @addr. This is useful if the default behavior 289 * page for @addr. This is useful if the default behavior
@@ -454,6 +447,12 @@ static inline struct page *compound_head_by_tail(struct page *tail)
454 return tail; 447 return tail;
455} 448}
456 449
450/*
451 * Since either compound page could be dismantled asynchronously in THP
452 * or we access asynchronously arbitrary positioned struct page, there
453 * would be tail flag race. To handle this race, we should call
454 * smp_rmb() before checking tail flag. compound_head_by_tail() did it.
455 */
457static inline struct page *compound_head(struct page *page) 456static inline struct page *compound_head(struct page *page)
458{ 457{
459 if (unlikely(PageTail(page))) 458 if (unlikely(PageTail(page)))
@@ -462,6 +461,18 @@ static inline struct page *compound_head(struct page *page)
462} 461}
463 462
464/* 463/*
464 * If we access compound page synchronously such as access to
465 * allocated page, there is no need to handle tail flag race, so we can
466 * check tail flag directly without any synchronization primitive.
467 */
468static inline struct page *compound_head_fast(struct page *page)
469{
470 if (unlikely(PageTail(page)))
471 return page->first_page;
472 return page;
473}
474
475/*
465 * The atomic page->_mapcount, starts from -1: so that transitions 476 * The atomic page->_mapcount, starts from -1: so that transitions
466 * both from it and to it can be tracked, using atomic_inc_and_test 477 * both from it and to it can be tracked, using atomic_inc_and_test
467 * and atomic_add_negative(-1). 478 * and atomic_add_negative(-1).
@@ -539,7 +550,14 @@ static inline void get_page(struct page *page)
539static inline struct page *virt_to_head_page(const void *x) 550static inline struct page *virt_to_head_page(const void *x)
540{ 551{
541 struct page *page = virt_to_page(x); 552 struct page *page = virt_to_page(x);
542 return compound_head(page); 553
554 /*
555 * We don't need to worry about synchronization of tail flag
556 * when we call virt_to_head_page() since it is only called for
557 * already allocated page and this page won't be freed until
558 * this virt_to_head_page() is finished. So use _fast variant.
559 */
560 return compound_head_fast(page);
543} 561}
544 562
545/* 563/*
@@ -1129,7 +1147,6 @@ extern void user_shm_unlock(size_t, struct user_struct *);
1129 * Parameter block passed down to zap_pte_range in exceptional cases. 1147 * Parameter block passed down to zap_pte_range in exceptional cases.
1130 */ 1148 */
1131struct zap_details { 1149struct zap_details {
1132 struct vm_area_struct *nonlinear_vma; /* Check page->index if set */
1133 struct address_space *check_mapping; /* Check page->mapping if set */ 1150 struct address_space *check_mapping; /* Check page->mapping if set */
1134 pgoff_t first_index; /* Lowest page->index to unmap */ 1151 pgoff_t first_index; /* Lowest page->index to unmap */
1135 pgoff_t last_index; /* Highest page->index to unmap */ 1152 pgoff_t last_index; /* Highest page->index to unmap */
@@ -1785,12 +1802,6 @@ struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
1785 for (vma = vma_interval_tree_iter_first(root, start, last); \ 1802 for (vma = vma_interval_tree_iter_first(root, start, last); \
1786 vma; vma = vma_interval_tree_iter_next(vma, start, last)) 1803 vma; vma = vma_interval_tree_iter_next(vma, start, last))
1787 1804
1788static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
1789 struct list_head *list)
1790{
1791 list_add_tail(&vma->shared.nonlinear, list);
1792}
1793
1794void anon_vma_interval_tree_insert(struct anon_vma_chain *node, 1805void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
1795 struct rb_root *root); 1806 struct rb_root *root);
1796void anon_vma_interval_tree_remove(struct anon_vma_chain *node, 1807void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6d34aa266a8c..07c8bd3f7b48 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -273,15 +273,11 @@ struct vm_area_struct {
273 273
274 /* 274 /*
275 * For areas with an address space and backing store, 275 * For areas with an address space and backing store,
276 * linkage into the address_space->i_mmap interval tree, or 276 * linkage into the address_space->i_mmap interval tree.
277 * linkage of vma in the address_space->i_mmap_nonlinear list.
278 */ 277 */
279 union { 278 struct {
280 struct { 279 struct rb_node rb;
281 struct rb_node rb; 280 unsigned long rb_subtree_last;
282 unsigned long rb_subtree_last;
283 } linear;
284 struct list_head nonlinear;
285 } shared; 281 } shared;
286 282
287 /* 283 /*
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index d9d7e7e56352..b38f559130d5 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -246,7 +246,6 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
246 * arg: passed to rmap_one() and invalid_vma() 246 * arg: passed to rmap_one() and invalid_vma()
247 * rmap_one: executed on each vma where page is mapped 247 * rmap_one: executed on each vma where page is mapped
248 * done: for checking traversing termination condition 248 * done: for checking traversing termination condition
249 * file_nonlinear: for handling file nonlinear mapping
250 * anon_lock: for getting anon_lock by optimized way rather than default 249 * anon_lock: for getting anon_lock by optimized way rather than default
251 * invalid_vma: for skipping uninterested vma 250 * invalid_vma: for skipping uninterested vma
252 */ 251 */
@@ -255,7 +254,6 @@ struct rmap_walk_control {
255 int (*rmap_one)(struct page *page, struct vm_area_struct *vma, 254 int (*rmap_one)(struct page *page, struct vm_area_struct *vma,
256 unsigned long addr, void *arg); 255 unsigned long addr, void *arg);
257 int (*done)(struct page *page); 256 int (*done)(struct page *page);
258 int (*file_nonlinear)(struct page *, struct address_space *, void *arg);
259 struct anon_vma *(*anon_lock)(struct page *page); 257 struct anon_vma *(*anon_lock)(struct page *page);
260 bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); 258 bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
261}; 259};
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9a139b637069..2e3b448cfa2d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -116,9 +116,8 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
116 unsigned long, 116 unsigned long,
117 void (*)(void *)); 117 void (*)(void *));
118#ifdef CONFIG_MEMCG_KMEM 118#ifdef CONFIG_MEMCG_KMEM
119struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *, 119void memcg_create_kmem_cache(struct mem_cgroup *, struct kmem_cache *);
120 struct kmem_cache *, 120void memcg_destroy_kmem_caches(struct mem_cgroup *);
121 const char *);
122#endif 121#endif
123void kmem_cache_destroy(struct kmem_cache *); 122void kmem_cache_destroy(struct kmem_cache *);
124int kmem_cache_shrink(struct kmem_cache *); 123int kmem_cache_shrink(struct kmem_cache *);
@@ -491,7 +490,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
491 * Child caches will hold extra metadata needed for its operation. Fields are: 490 * Child caches will hold extra metadata needed for its operation. Fields are:
492 * 491 *
493 * @memcg: pointer to the memcg this cache belongs to 492 * @memcg: pointer to the memcg this cache belongs to
494 * @list: list_head for the list of all caches in this memcg
495 * @root_cache: pointer to the global, root cache, this cache was derived from 493 * @root_cache: pointer to the global, root cache, this cache was derived from
496 */ 494 */
497struct memcg_cache_params { 495struct memcg_cache_params {
@@ -503,7 +501,6 @@ struct memcg_cache_params {
503 }; 501 };
504 struct { 502 struct {
505 struct mem_cgroup *memcg; 503 struct mem_cgroup *memcg;
506 struct list_head list;
507 struct kmem_cache *root_cache; 504 struct kmem_cache *root_cache;
508 }; 505 };
509 }; 506 };
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 6adfb7bfbf44..50cbc876be56 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -54,7 +54,7 @@ static inline pgoff_t swp_offset(swp_entry_t entry)
54/* check whether a pte points to a swap entry */ 54/* check whether a pte points to a swap entry */
55static inline int is_swap_pte(pte_t pte) 55static inline int is_swap_pte(pte_t pte)
56{ 56{
57 return !pte_none(pte) && !pte_present_nonuma(pte) && !pte_file(pte); 57 return !pte_none(pte) && !pte_present_nonuma(pte);
58} 58}
59#endif 59#endif
60 60
@@ -66,7 +66,6 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
66{ 66{
67 swp_entry_t arch_entry; 67 swp_entry_t arch_entry;
68 68
69 BUG_ON(pte_file(pte));
70 if (pte_swp_soft_dirty(pte)) 69 if (pte_swp_soft_dirty(pte))
71 pte = pte_swp_clear_soft_dirty(pte); 70 pte = pte_swp_clear_soft_dirty(pte);
72 arch_entry = __pte_to_swp_entry(pte); 71 arch_entry = __pte_to_swp_entry(pte);
@@ -82,7 +81,6 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry)
82 swp_entry_t arch_entry; 81 swp_entry_t arch_entry;
83 82
84 arch_entry = __swp_entry(swp_type(entry), swp_offset(entry)); 83 arch_entry = __swp_entry(swp_type(entry), swp_offset(entry));
85 BUG_ON(pte_file(__swp_entry_to_pte(arch_entry)));
86 return __swp_entry_to_pte(arch_entry); 84 return __swp_entry_to_pte(arch_entry);
87} 85}
88 86
diff --git a/kernel/fork.c b/kernel/fork.c
index 4dc2ddade9f1..b379d9abddc7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -438,12 +438,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
438 atomic_inc(&mapping->i_mmap_writable); 438 atomic_inc(&mapping->i_mmap_writable);
439 flush_dcache_mmap_lock(mapping); 439 flush_dcache_mmap_lock(mapping);
440 /* insert tmp into the share list, just after mpnt */ 440 /* insert tmp into the share list, just after mpnt */
441 if (unlikely(tmp->vm_flags & VM_NONLINEAR)) 441 vma_interval_tree_insert_after(tmp, mpnt,
442 vma_nonlinear_insert(tmp, 442 &mapping->i_mmap);
443 &mapping->i_mmap_nonlinear);
444 else
445 vma_interval_tree_insert_after(tmp, mpnt,
446 &mapping->i_mmap);
447 flush_dcache_mmap_unlock(mapping); 443 flush_dcache_mmap_unlock(mapping);
448 i_mmap_unlock_write(mapping); 444 i_mmap_unlock_write(mapping);
449 } 445 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 137c7f69b264..88ea2d6e0031 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1248,7 +1248,6 @@ static struct ctl_table vm_table[] = {
1248 .maxlen = sizeof(unsigned long), 1248 .maxlen = sizeof(unsigned long),
1249 .mode = 0644, 1249 .mode = 0644,
1250 .proc_handler = hugetlb_sysctl_handler, 1250 .proc_handler = hugetlb_sysctl_handler,
1251 .extra1 = &zero,
1252 }, 1251 },
1253#ifdef CONFIG_NUMA 1252#ifdef CONFIG_NUMA
1254 { 1253 {
@@ -1257,7 +1256,6 @@ static struct ctl_table vm_table[] = {
1257 .maxlen = sizeof(unsigned long), 1256 .maxlen = sizeof(unsigned long),
1258 .mode = 0644, 1257 .mode = 0644,
1259 .proc_handler = &hugetlb_mempolicy_sysctl_handler, 1258 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1260 .extra1 = &zero,
1261 }, 1259 },
1262#endif 1260#endif
1263 { 1261 {
@@ -1280,7 +1278,6 @@ static struct ctl_table vm_table[] = {
1280 .maxlen = sizeof(unsigned long), 1278 .maxlen = sizeof(unsigned long),
1281 .mode = 0644, 1279 .mode = 0644,
1282 .proc_handler = hugetlb_overcommit_handler, 1280 .proc_handler = hugetlb_overcommit_handler,
1283 .extra1 = &zero,
1284 }, 1281 },
1285#endif 1282#endif
1286 { 1283 {
diff --git a/mm/Makefile b/mm/Makefile
index 4bf586e66378..3548460ab7b6 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -3,7 +3,7 @@
3# 3#
4 4
5mmu-y := nommu.o 5mmu-y := nommu.o
6mmu-$(CONFIG_MMU) := fremap.o gup.o highmem.o memory.o mincore.o \ 6mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \
7 mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ 7 mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
8 vmalloc.o pagewalk.o pgtable-generic.o 8 vmalloc.o pagewalk.o pgtable-generic.o
9 9
diff --git a/mm/debug.c b/mm/debug.c
index 0e58f3211f89..d69cb5a7ba9a 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -130,7 +130,6 @@ static const struct trace_print_flags vmaflags_names[] = {
130 {VM_ACCOUNT, "account" }, 130 {VM_ACCOUNT, "account" },
131 {VM_NORESERVE, "noreserve" }, 131 {VM_NORESERVE, "noreserve" },
132 {VM_HUGETLB, "hugetlb" }, 132 {VM_HUGETLB, "hugetlb" },
133 {VM_NONLINEAR, "nonlinear" },
134#if defined(CONFIG_X86) 133#if defined(CONFIG_X86)
135 {VM_PAT, "pat" }, 134 {VM_PAT, "pat" },
136#elif defined(CONFIG_PPC) 135#elif defined(CONFIG_PPC)
diff --git a/mm/filemap.c b/mm/filemap.c
index 673e4581a2e5..bf7a27142704 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2087,7 +2087,6 @@ const struct vm_operations_struct generic_file_vm_ops = {
2087 .fault = filemap_fault, 2087 .fault = filemap_fault,
2088 .map_pages = filemap_map_pages, 2088 .map_pages = filemap_map_pages,
2089 .page_mkwrite = filemap_page_mkwrite, 2089 .page_mkwrite = filemap_page_mkwrite,
2090 .remap_pages = generic_file_remap_pages,
2091}; 2090};
2092 2091
2093/* This is used for a general mmap of a disk file */ 2092/* This is used for a general mmap of a disk file */
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 0d105aeff82f..70c09da1a419 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -301,7 +301,6 @@ out:
301static const struct vm_operations_struct xip_file_vm_ops = { 301static const struct vm_operations_struct xip_file_vm_ops = {
302 .fault = xip_file_fault, 302 .fault = xip_file_fault,
303 .page_mkwrite = filemap_page_mkwrite, 303 .page_mkwrite = filemap_page_mkwrite,
304 .remap_pages = generic_file_remap_pages,
305}; 304};
306 305
307int xip_file_mmap(struct file * file, struct vm_area_struct * vma) 306int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
diff --git a/mm/fremap.c b/mm/fremap.c
deleted file mode 100644
index 2805d71cf476..000000000000
--- a/mm/fremap.c
+++ /dev/null
@@ -1,283 +0,0 @@
1/*
2 * linux/mm/fremap.c
3 *
4 * Explicit pagetable population and nonlinear (random) mappings support.
5 *
6 * started by Ingo Molnar, Copyright (C) 2002, 2003
7 */
8#include <linux/export.h>
9#include <linux/backing-dev.h>
10#include <linux/mm.h>
11#include <linux/swap.h>
12#include <linux/file.h>
13#include <linux/mman.h>
14#include <linux/pagemap.h>
15#include <linux/swapops.h>
16#include <linux/rmap.h>
17#include <linux/syscalls.h>
18#include <linux/mmu_notifier.h>
19
20#include <asm/mmu_context.h>
21#include <asm/cacheflush.h>
22#include <asm/tlbflush.h>
23
24#include "internal.h"
25
26static int mm_counter(struct page *page)
27{
28 return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES;
29}
30
31static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
32 unsigned long addr, pte_t *ptep)
33{
34 pte_t pte = *ptep;
35 struct page *page;
36 swp_entry_t entry;
37
38 if (pte_present(pte)) {
39 flush_cache_page(vma, addr, pte_pfn(pte));
40 pte = ptep_clear_flush_notify(vma, addr, ptep);
41 page = vm_normal_page(vma, addr, pte);
42 if (page) {
43 if (pte_dirty(pte))
44 set_page_dirty(page);
45 update_hiwater_rss(mm);
46 dec_mm_counter(mm, mm_counter(page));
47 page_remove_rmap(page);
48 page_cache_release(page);
49 }
50 } else { /* zap_pte() is not called when pte_none() */
51 if (!pte_file(pte)) {
52 update_hiwater_rss(mm);
53 entry = pte_to_swp_entry(pte);
54 if (non_swap_entry(entry)) {
55 if (is_migration_entry(entry)) {
56 page = migration_entry_to_page(entry);
57 dec_mm_counter(mm, mm_counter(page));
58 }
59 } else {
60 free_swap_and_cache(entry);
61 dec_mm_counter(mm, MM_SWAPENTS);
62 }
63 }
64 pte_clear_not_present_full(mm, addr, ptep, 0);
65 }
66}
67
68/*
69 * Install a file pte to a given virtual memory address, release any
70 * previously existing mapping.
71 */
72static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
73 unsigned long addr, unsigned long pgoff, pgprot_t prot)
74{
75 int err = -ENOMEM;
76 pte_t *pte, ptfile;
77 spinlock_t *ptl;
78
79 pte = get_locked_pte(mm, addr, &ptl);
80 if (!pte)
81 goto out;
82
83 ptfile = pgoff_to_pte(pgoff);
84
85 if (!pte_none(*pte))
86 zap_pte(mm, vma, addr, pte);
87
88 set_pte_at(mm, addr, pte, pte_file_mksoft_dirty(ptfile));
89 /*
90 * We don't need to run update_mmu_cache() here because the "file pte"
91 * being installed by install_file_pte() is not a real pte - it's a
92 * non-present entry (like a swap entry), noting what file offset should
93 * be mapped there when there's a fault (in a non-linear vma where
94 * that's not obvious).
95 */
96 pte_unmap_unlock(pte, ptl);
97 err = 0;
98out:
99 return err;
100}
101
102int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
103 unsigned long size, pgoff_t pgoff)
104{
105 struct mm_struct *mm = vma->vm_mm;
106 int err;
107
108 do {
109 err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot);
110 if (err)
111 return err;
112
113 size -= PAGE_SIZE;
114 addr += PAGE_SIZE;
115 pgoff++;
116 } while (size);
117
118 return 0;
119}
120EXPORT_SYMBOL(generic_file_remap_pages);
121
122/**
123 * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma
124 * @start: start of the remapped virtual memory range
125 * @size: size of the remapped virtual memory range
126 * @prot: new protection bits of the range (see NOTE)
127 * @pgoff: to-be-mapped page of the backing store file
128 * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO.
129 *
130 * sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma
131 * (shared backing store file).
132 *
133 * This syscall works purely via pagetables, so it's the most efficient
134 * way to map the same (large) file into a given virtual window. Unlike
135 * mmap()/mremap() it does not create any new vmas. The new mappings are
136 * also safe across swapout.
137 *
138 * NOTE: the @prot parameter right now is ignored (but must be zero),
139 * and the vma's default protection is used. Arbitrary protections
140 * might be implemented in the future.
141 */
142SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
143 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
144{
145 struct mm_struct *mm = current->mm;
146 struct address_space *mapping;
147 struct vm_area_struct *vma;
148 int err = -EINVAL;
149 int has_write_lock = 0;
150 vm_flags_t vm_flags = 0;
151
152 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
153 "See Documentation/vm/remap_file_pages.txt.\n",
154 current->comm, current->pid);
155
156 if (prot)
157 return err;
158 /*
159 * Sanitize the syscall parameters:
160 */
161 start = start & PAGE_MASK;
162 size = size & PAGE_MASK;
163
164 /* Does the address range wrap, or is the span zero-sized? */
165 if (start + size <= start)
166 return err;
167
168 /* Does pgoff wrap? */
169 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
170 return err;
171
172 /* Can we represent this offset inside this architecture's pte's? */
173#if PTE_FILE_MAX_BITS < BITS_PER_LONG
174 if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS))
175 return err;
176#endif
177
178 /* We need down_write() to change vma->vm_flags. */
179 down_read(&mm->mmap_sem);
180 retry:
181 vma = find_vma(mm, start);
182
183 /*
184 * Make sure the vma is shared, that it supports prefaulting,
185 * and that the remapped range is valid and fully within
186 * the single existing vma.
187 */
188 if (!vma || !(vma->vm_flags & VM_SHARED))
189 goto out;
190
191 if (!vma->vm_ops || !vma->vm_ops->remap_pages)
192 goto out;
193
194 if (start < vma->vm_start || start + size > vma->vm_end)
195 goto out;
196
197 /* Must set VM_NONLINEAR before any pages are populated. */
198 if (!(vma->vm_flags & VM_NONLINEAR)) {
199 /*
200 * vm_private_data is used as a swapout cursor
201 * in a VM_NONLINEAR vma.
202 */
203 if (vma->vm_private_data)
204 goto out;
205
206 /* Don't need a nonlinear mapping, exit success */
207 if (pgoff == linear_page_index(vma, start)) {
208 err = 0;
209 goto out;
210 }
211
212 if (!has_write_lock) {
213get_write_lock:
214 up_read(&mm->mmap_sem);
215 down_write(&mm->mmap_sem);
216 has_write_lock = 1;
217 goto retry;
218 }
219 mapping = vma->vm_file->f_mapping;
220 /*
221 * page_mkclean doesn't work on nonlinear vmas, so if
222 * dirty pages need to be accounted, emulate with linear
223 * vmas.
224 */
225 if (mapping_cap_account_dirty(mapping)) {
226 unsigned long addr;
227 struct file *file = get_file(vma->vm_file);
228 /* mmap_region may free vma; grab the info now */
229 vm_flags = vma->vm_flags;
230
231 addr = mmap_region(file, start, size, vm_flags, pgoff);
232 fput(file);
233 if (IS_ERR_VALUE(addr)) {
234 err = addr;
235 } else {
236 BUG_ON(addr != start);
237 err = 0;
238 }
239 goto out_freed;
240 }
241 i_mmap_lock_write(mapping);
242 flush_dcache_mmap_lock(mapping);
243 vma->vm_flags |= VM_NONLINEAR;
244 vma_interval_tree_remove(vma, &mapping->i_mmap);
245 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
246 flush_dcache_mmap_unlock(mapping);
247 i_mmap_unlock_write(mapping);
248 }
249
250 if (vma->vm_flags & VM_LOCKED) {
251 /*
252 * drop PG_Mlocked flag for over-mapped range
253 */
254 if (!has_write_lock)
255 goto get_write_lock;
256 vm_flags = vma->vm_flags;
257 munlock_vma_pages_range(vma, start, start + size);
258 vma->vm_flags = vm_flags;
259 }
260
261 mmu_notifier_invalidate_range_start(mm, start, start + size);
262 err = vma->vm_ops->remap_pages(vma, start, size, pgoff);
263 mmu_notifier_invalidate_range_end(mm, start, start + size);
264
265 /*
266 * We can't clear VM_NONLINEAR because we'd have to do
267 * it after ->populate completes, and that would prevent
268 * downgrading the lock. (Locks can't be upgraded).
269 */
270
271out:
272 if (vma)
273 vm_flags = vma->vm_flags;
274out_freed:
275 if (likely(!has_write_lock))
276 up_read(&mm->mmap_sem);
277 else
278 up_write(&mm->mmap_sem);
279 if (!err && ((vm_flags & VM_LOCKED) || !(flags & MAP_NONBLOCK)))
280 mm_populate(start, size);
281
282 return err;
283}
diff --git a/mm/gup.c b/mm/gup.c
index 8dd50ce6326f..12bc2bc33da7 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -55,7 +55,7 @@ retry:
55 */ 55 */
56 if (likely(!(flags & FOLL_MIGRATION))) 56 if (likely(!(flags & FOLL_MIGRATION)))
57 goto no_page; 57 goto no_page;
58 if (pte_none(pte) || pte_file(pte)) 58 if (pte_none(pte))
59 goto no_page; 59 goto no_page;
60 entry = pte_to_swp_entry(pte); 60 entry = pte_to_swp_entry(pte);
61 if (!is_migration_entry(entry)) 61 if (!is_migration_entry(entry))
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 85032de5e20f..be0e5d0db5ec 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -35,7 +35,7 @@
35#include <linux/node.h> 35#include <linux/node.h>
36#include "internal.h" 36#include "internal.h"
37 37
38unsigned long hugepages_treat_as_movable; 38int hugepages_treat_as_movable;
39 39
40int hugetlb_max_hstate __read_mostly; 40int hugetlb_max_hstate __read_mostly;
41unsigned int default_hstate_idx; 41unsigned int default_hstate_idx;
diff --git a/mm/interval_tree.c b/mm/interval_tree.c
index 8da581fa9060..f2c2492681bf 100644
--- a/mm/interval_tree.c
+++ b/mm/interval_tree.c
@@ -21,8 +21,8 @@ static inline unsigned long vma_last_pgoff(struct vm_area_struct *v)
21 return v->vm_pgoff + ((v->vm_end - v->vm_start) >> PAGE_SHIFT) - 1; 21 return v->vm_pgoff + ((v->vm_end - v->vm_start) >> PAGE_SHIFT) - 1;
22} 22}
23 23
24INTERVAL_TREE_DEFINE(struct vm_area_struct, shared.linear.rb, 24INTERVAL_TREE_DEFINE(struct vm_area_struct, shared.rb,
25 unsigned long, shared.linear.rb_subtree_last, 25 unsigned long, shared.rb_subtree_last,
26 vma_start_pgoff, vma_last_pgoff,, vma_interval_tree) 26 vma_start_pgoff, vma_last_pgoff,, vma_interval_tree)
27 27
28/* Insert node immediately after prev in the interval tree */ 28/* Insert node immediately after prev in the interval tree */
@@ -36,26 +36,26 @@ void vma_interval_tree_insert_after(struct vm_area_struct *node,
36 36
37 VM_BUG_ON_VMA(vma_start_pgoff(node) != vma_start_pgoff(prev), node); 37 VM_BUG_ON_VMA(vma_start_pgoff(node) != vma_start_pgoff(prev), node);
38 38
39 if (!prev->shared.linear.rb.rb_right) { 39 if (!prev->shared.rb.rb_right) {
40 parent = prev; 40 parent = prev;
41 link = &prev->shared.linear.rb.rb_right; 41 link = &prev->shared.rb.rb_right;
42 } else { 42 } else {
43 parent = rb_entry(prev->shared.linear.rb.rb_right, 43 parent = rb_entry(prev->shared.rb.rb_right,
44 struct vm_area_struct, shared.linear.rb); 44 struct vm_area_struct, shared.rb);
45 if (parent->shared.linear.rb_subtree_last < last) 45 if (parent->shared.rb_subtree_last < last)
46 parent->shared.linear.rb_subtree_last = last; 46 parent->shared.rb_subtree_last = last;
47 while (parent->shared.linear.rb.rb_left) { 47 while (parent->shared.rb.rb_left) {
48 parent = rb_entry(parent->shared.linear.rb.rb_left, 48 parent = rb_entry(parent->shared.rb.rb_left,
49 struct vm_area_struct, shared.linear.rb); 49 struct vm_area_struct, shared.rb);
50 if (parent->shared.linear.rb_subtree_last < last) 50 if (parent->shared.rb_subtree_last < last)
51 parent->shared.linear.rb_subtree_last = last; 51 parent->shared.rb_subtree_last = last;
52 } 52 }
53 link = &parent->shared.linear.rb.rb_left; 53 link = &parent->shared.rb.rb_left;
54 } 54 }
55 55
56 node->shared.linear.rb_subtree_last = last; 56 node->shared.rb_subtree_last = last;
57 rb_link_node(&node->shared.linear.rb, &parent->shared.linear.rb, link); 57 rb_link_node(&node->shared.rb, &parent->shared.rb, link);
58 rb_insert_augmented(&node->shared.linear.rb, root, 58 rb_insert_augmented(&node->shared.rb, root,
59 &vma_interval_tree_augment); 59 &vma_interval_tree_augment);
60} 60}
61 61
diff --git a/mm/ksm.c b/mm/ksm.c
index 15647fb0394f..4162dce2eb44 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1748,7 +1748,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
1748 */ 1748 */
1749 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | 1749 if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE |
1750 VM_PFNMAP | VM_IO | VM_DONTEXPAND | 1750 VM_PFNMAP | VM_IO | VM_DONTEXPAND |
1751 VM_HUGETLB | VM_NONLINEAR | VM_MIXEDMAP)) 1751 VM_HUGETLB | VM_MIXEDMAP))
1752 return 0; /* just ignore the advice */ 1752 return 0; /* just ignore the advice */
1753 1753
1754#ifdef VM_SAO 1754#ifdef VM_SAO
diff --git a/mm/madvise.c b/mm/madvise.c
index a271adc93289..d79fb5e8f80a 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -155,7 +155,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
155 pte = *(orig_pte + ((index - start) / PAGE_SIZE)); 155 pte = *(orig_pte + ((index - start) / PAGE_SIZE));
156 pte_unmap_unlock(orig_pte, ptl); 156 pte_unmap_unlock(orig_pte, ptl);
157 157
158 if (pte_present(pte) || pte_none(pte) || pte_file(pte)) 158 if (pte_present(pte) || pte_none(pte))
159 continue; 159 continue;
160 entry = pte_to_swp_entry(pte); 160 entry = pte_to_swp_entry(pte);
161 if (unlikely(non_swap_entry(entry))) 161 if (unlikely(non_swap_entry(entry)))
@@ -278,14 +278,7 @@ static long madvise_dontneed(struct vm_area_struct *vma,
278 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) 278 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
279 return -EINVAL; 279 return -EINVAL;
280 280
281 if (unlikely(vma->vm_flags & VM_NONLINEAR)) { 281 zap_page_range(vma, start, end - start, NULL);
282 struct zap_details details = {
283 .nonlinear_vma = vma,
284 .last_index = ULONG_MAX,
285 };
286 zap_page_range(vma, start, end - start, &details);
287 } else
288 zap_page_range(vma, start, end - start, NULL);
289 return 0; 282 return 0;
290} 283}
291 284
@@ -303,7 +296,7 @@ static long madvise_remove(struct vm_area_struct *vma,
303 296
304 *prev = NULL; /* tell sys_madvise we drop mmap_sem */ 297 *prev = NULL; /* tell sys_madvise we drop mmap_sem */
305 298
306 if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) 299 if (vma->vm_flags & (VM_LOCKED | VM_HUGETLB))
307 return -EINVAL; 300 return -EINVAL;
308 301
309 f = vma->vm_file; 302 f = vma->vm_file;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2f6893c2f01b..f3f8a4f52a0c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -343,9 +343,6 @@ struct mem_cgroup {
343 struct cg_proto tcp_mem; 343 struct cg_proto tcp_mem;
344#endif 344#endif
345#if defined(CONFIG_MEMCG_KMEM) 345#if defined(CONFIG_MEMCG_KMEM)
346 /* analogous to slab_common's slab_caches list, but per-memcg;
347 * protected by memcg_slab_mutex */
348 struct list_head memcg_slab_caches;
349 /* Index in the kmem_cache->memcg_params->memcg_caches array */ 346 /* Index in the kmem_cache->memcg_params->memcg_caches array */
350 int kmemcg_id; 347 int kmemcg_id;
351#endif 348#endif
@@ -2476,27 +2473,8 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
2476} 2473}
2477 2474
2478#ifdef CONFIG_MEMCG_KMEM 2475#ifdef CONFIG_MEMCG_KMEM
2479/* 2476int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
2480 * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or 2477 unsigned long nr_pages)
2481 * destroyed. It protects memcg_caches arrays and memcg_slab_caches lists.
2482 */
2483static DEFINE_MUTEX(memcg_slab_mutex);
2484
2485/*
2486 * This is a bit cumbersome, but it is rarely used and avoids a backpointer
2487 * in the memcg_cache_params struct.
2488 */
2489static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
2490{
2491 struct kmem_cache *cachep;
2492
2493 VM_BUG_ON(p->is_root_cache);
2494 cachep = p->root_cache;
2495 return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg));
2496}
2497
2498static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
2499 unsigned long nr_pages)
2500{ 2478{
2501 struct page_counter *counter; 2479 struct page_counter *counter;
2502 int ret = 0; 2480 int ret = 0;
@@ -2533,8 +2511,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
2533 return ret; 2511 return ret;
2534} 2512}
2535 2513
2536static void memcg_uncharge_kmem(struct mem_cgroup *memcg, 2514void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages)
2537 unsigned long nr_pages)
2538{ 2515{
2539 page_counter_uncharge(&memcg->memory, nr_pages); 2516 page_counter_uncharge(&memcg->memory, nr_pages);
2540 if (do_swap_account) 2517 if (do_swap_account)
@@ -2579,10 +2556,7 @@ static int memcg_alloc_cache_id(void)
2579 else if (size > MEMCG_CACHES_MAX_SIZE) 2556 else if (size > MEMCG_CACHES_MAX_SIZE)
2580 size = MEMCG_CACHES_MAX_SIZE; 2557 size = MEMCG_CACHES_MAX_SIZE;
2581 2558
2582 mutex_lock(&memcg_slab_mutex);
2583 err = memcg_update_all_caches(size); 2559 err = memcg_update_all_caches(size);
2584 mutex_unlock(&memcg_slab_mutex);
2585
2586 if (err) { 2560 if (err) {
2587 ida_simple_remove(&kmem_limited_groups, id); 2561 ida_simple_remove(&kmem_limited_groups, id);
2588 return err; 2562 return err;
@@ -2605,123 +2579,20 @@ void memcg_update_array_size(int num)
2605 memcg_limited_groups_array_size = num; 2579 memcg_limited_groups_array_size = num;
2606} 2580}
2607 2581
2608static void memcg_register_cache(struct mem_cgroup *memcg, 2582struct memcg_kmem_cache_create_work {
2609 struct kmem_cache *root_cache)
2610{
2611 static char memcg_name_buf[NAME_MAX + 1]; /* protected by
2612 memcg_slab_mutex */
2613 struct kmem_cache *cachep;
2614 int id;
2615
2616 lockdep_assert_held(&memcg_slab_mutex);
2617
2618 id = memcg_cache_id(memcg);
2619
2620 /*
2621 * Since per-memcg caches are created asynchronously on first
2622 * allocation (see memcg_kmem_get_cache()), several threads can try to
2623 * create the same cache, but only one of them may succeed.
2624 */
2625 if (cache_from_memcg_idx(root_cache, id))
2626 return;
2627
2628 cgroup_name(memcg->css.cgroup, memcg_name_buf, NAME_MAX + 1);
2629 cachep = memcg_create_kmem_cache(memcg, root_cache, memcg_name_buf);
2630 /*
2631 * If we could not create a memcg cache, do not complain, because
2632 * that's not critical at all as we can always proceed with the root
2633 * cache.
2634 */
2635 if (!cachep)
2636 return;
2637
2638 list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
2639
2640 /*
2641 * Since readers won't lock (see cache_from_memcg_idx()), we need a
2642 * barrier here to ensure nobody will see the kmem_cache partially
2643 * initialized.
2644 */
2645 smp_wmb();
2646
2647 BUG_ON(root_cache->memcg_params->memcg_caches[id]);
2648 root_cache->memcg_params->memcg_caches[id] = cachep;
2649}
2650
2651static void memcg_unregister_cache(struct kmem_cache *cachep)
2652{
2653 struct kmem_cache *root_cache;
2654 struct mem_cgroup *memcg;
2655 int id;
2656
2657 lockdep_assert_held(&memcg_slab_mutex);
2658
2659 BUG_ON(is_root_cache(cachep));
2660
2661 root_cache = cachep->memcg_params->root_cache;
2662 memcg = cachep->memcg_params->memcg;
2663 id = memcg_cache_id(memcg);
2664
2665 BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep);
2666 root_cache->memcg_params->memcg_caches[id] = NULL;
2667
2668 list_del(&cachep->memcg_params->list);
2669
2670 kmem_cache_destroy(cachep);
2671}
2672
2673int __memcg_cleanup_cache_params(struct kmem_cache *s)
2674{
2675 struct kmem_cache *c;
2676 int i, failed = 0;
2677
2678 mutex_lock(&memcg_slab_mutex);
2679 for_each_memcg_cache_index(i) {
2680 c = cache_from_memcg_idx(s, i);
2681 if (!c)
2682 continue;
2683
2684 memcg_unregister_cache(c);
2685
2686 if (cache_from_memcg_idx(s, i))
2687 failed++;
2688 }
2689 mutex_unlock(&memcg_slab_mutex);
2690 return failed;
2691}
2692
2693static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
2694{
2695 struct kmem_cache *cachep;
2696 struct memcg_cache_params *params, *tmp;
2697
2698 if (!memcg_kmem_is_active(memcg))
2699 return;
2700
2701 mutex_lock(&memcg_slab_mutex);
2702 list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
2703 cachep = memcg_params_to_cache(params);
2704 memcg_unregister_cache(cachep);
2705 }
2706 mutex_unlock(&memcg_slab_mutex);
2707}
2708
2709struct memcg_register_cache_work {
2710 struct mem_cgroup *memcg; 2583 struct mem_cgroup *memcg;
2711 struct kmem_cache *cachep; 2584 struct kmem_cache *cachep;
2712 struct work_struct work; 2585 struct work_struct work;
2713}; 2586};
2714 2587
2715static void memcg_register_cache_func(struct work_struct *w) 2588static void memcg_kmem_cache_create_func(struct work_struct *w)
2716{ 2589{
2717 struct memcg_register_cache_work *cw = 2590 struct memcg_kmem_cache_create_work *cw =
2718 container_of(w, struct memcg_register_cache_work, work); 2591 container_of(w, struct memcg_kmem_cache_create_work, work);
2719 struct mem_cgroup *memcg = cw->memcg; 2592 struct mem_cgroup *memcg = cw->memcg;
2720 struct kmem_cache *cachep = cw->cachep; 2593 struct kmem_cache *cachep = cw->cachep;
2721 2594
2722 mutex_lock(&memcg_slab_mutex); 2595 memcg_create_kmem_cache(memcg, cachep);
2723 memcg_register_cache(memcg, cachep);
2724 mutex_unlock(&memcg_slab_mutex);
2725 2596
2726 css_put(&memcg->css); 2597 css_put(&memcg->css);
2727 kfree(cw); 2598 kfree(cw);
@@ -2730,10 +2601,10 @@ static void memcg_register_cache_func(struct work_struct *w)
2730/* 2601/*
2731 * Enqueue the creation of a per-memcg kmem_cache. 2602 * Enqueue the creation of a per-memcg kmem_cache.
2732 */ 2603 */
2733static void __memcg_schedule_register_cache(struct mem_cgroup *memcg, 2604static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
2734 struct kmem_cache *cachep) 2605 struct kmem_cache *cachep)
2735{ 2606{
2736 struct memcg_register_cache_work *cw; 2607 struct memcg_kmem_cache_create_work *cw;
2737 2608
2738 cw = kmalloc(sizeof(*cw), GFP_NOWAIT); 2609 cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
2739 if (!cw) 2610 if (!cw)
@@ -2743,18 +2614,18 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
2743 2614
2744 cw->memcg = memcg; 2615 cw->memcg = memcg;
2745 cw->cachep = cachep; 2616 cw->cachep = cachep;
2617 INIT_WORK(&cw->work, memcg_kmem_cache_create_func);
2746 2618
2747 INIT_WORK(&cw->work, memcg_register_cache_func);
2748 schedule_work(&cw->work); 2619 schedule_work(&cw->work);
2749} 2620}
2750 2621
2751static void memcg_schedule_register_cache(struct mem_cgroup *memcg, 2622static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
2752 struct kmem_cache *cachep) 2623 struct kmem_cache *cachep)
2753{ 2624{
2754 /* 2625 /*
2755 * We need to stop accounting when we kmalloc, because if the 2626 * We need to stop accounting when we kmalloc, because if the
2756 * corresponding kmalloc cache is not yet created, the first allocation 2627 * corresponding kmalloc cache is not yet created, the first allocation
2757 * in __memcg_schedule_register_cache will recurse. 2628 * in __memcg_schedule_kmem_cache_create will recurse.
2758 * 2629 *
2759 * However, it is better to enclose the whole function. Depending on 2630 * However, it is better to enclose the whole function. Depending on
2760 * the debugging options enabled, INIT_WORK(), for instance, can 2631 * the debugging options enabled, INIT_WORK(), for instance, can
@@ -2763,24 +2634,10 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
2763 * the safest choice is to do it like this, wrapping the whole function. 2634 * the safest choice is to do it like this, wrapping the whole function.
2764 */ 2635 */
2765 current->memcg_kmem_skip_account = 1; 2636 current->memcg_kmem_skip_account = 1;
2766 __memcg_schedule_register_cache(memcg, cachep); 2637 __memcg_schedule_kmem_cache_create(memcg, cachep);
2767 current->memcg_kmem_skip_account = 0; 2638 current->memcg_kmem_skip_account = 0;
2768} 2639}
2769 2640
2770int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
2771{
2772 unsigned int nr_pages = 1 << order;
2773
2774 return memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
2775}
2776
2777void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
2778{
2779 unsigned int nr_pages = 1 << order;
2780
2781 memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages);
2782}
2783
2784/* 2641/*
2785 * Return the kmem_cache we're supposed to use for a slab allocation. 2642 * Return the kmem_cache we're supposed to use for a slab allocation.
2786 * We try to use the current memcg's version of the cache. 2643 * We try to use the current memcg's version of the cache.
@@ -2825,7 +2682,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
2825 * could happen with the slab_mutex held. So it's better to 2682 * could happen with the slab_mutex held. So it's better to
2826 * defer everything. 2683 * defer everything.
2827 */ 2684 */
2828 memcg_schedule_register_cache(memcg, cachep); 2685 memcg_schedule_kmem_cache_create(memcg, cachep);
2829out: 2686out:
2830 css_put(&memcg->css); 2687 css_put(&memcg->css);
2831 return cachep; 2688 return cachep;
@@ -4154,7 +4011,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
4154 4011
4155static void memcg_destroy_kmem(struct mem_cgroup *memcg) 4012static void memcg_destroy_kmem(struct mem_cgroup *memcg)
4156{ 4013{
4157 memcg_unregister_all_caches(memcg); 4014 memcg_destroy_kmem_caches(memcg);
4158 mem_cgroup_sockets_destroy(memcg); 4015 mem_cgroup_sockets_destroy(memcg);
4159} 4016}
4160#else 4017#else
@@ -4682,7 +4539,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
4682 spin_lock_init(&memcg->event_list_lock); 4539 spin_lock_init(&memcg->event_list_lock);
4683#ifdef CONFIG_MEMCG_KMEM 4540#ifdef CONFIG_MEMCG_KMEM
4684 memcg->kmemcg_id = -1; 4541 memcg->kmemcg_id = -1;
4685 INIT_LIST_HEAD(&memcg->memcg_slab_caches);
4686#endif 4542#endif
4687 4543
4688 return &memcg->css; 4544 return &memcg->css;
@@ -4926,10 +4782,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
4926 return NULL; 4782 return NULL;
4927 4783
4928 mapping = vma->vm_file->f_mapping; 4784 mapping = vma->vm_file->f_mapping;
4929 if (pte_none(ptent)) 4785 pgoff = linear_page_index(vma, addr);
4930 pgoff = linear_page_index(vma, addr);
4931 else /* pte_file(ptent) is true */
4932 pgoff = pte_to_pgoff(ptent);
4933 4786
4934 /* page is moved even if it's not RSS of this task(page-faulted). */ 4787 /* page is moved even if it's not RSS of this task(page-faulted). */
4935#ifdef CONFIG_SWAP 4788#ifdef CONFIG_SWAP
@@ -4961,7 +4814,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
4961 page = mc_handle_present_pte(vma, addr, ptent); 4814 page = mc_handle_present_pte(vma, addr, ptent);
4962 else if (is_swap_pte(ptent)) 4815 else if (is_swap_pte(ptent))
4963 page = mc_handle_swap_pte(vma, addr, ptent, &ent); 4816 page = mc_handle_swap_pte(vma, addr, ptent, &ent);
4964 else if (pte_none(ptent) || pte_file(ptent)) 4817 else if (pte_none(ptent))
4965 page = mc_handle_file_pte(vma, addr, ptent, &ent); 4818 page = mc_handle_file_pte(vma, addr, ptent, &ent);
4966 4819
4967 if (!page && !ent.val) 4820 if (!page && !ent.val)
diff --git a/mm/memory.c b/mm/memory.c
index d707c4dfbbb4..d63849b5188f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -813,42 +813,40 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
813 813
814 /* pte contains position in swap or file, so copy. */ 814 /* pte contains position in swap or file, so copy. */
815 if (unlikely(!pte_present(pte))) { 815 if (unlikely(!pte_present(pte))) {
816 if (!pte_file(pte)) { 816 swp_entry_t entry = pte_to_swp_entry(pte);
817 swp_entry_t entry = pte_to_swp_entry(pte); 817
818 818 if (likely(!non_swap_entry(entry))) {
819 if (likely(!non_swap_entry(entry))) { 819 if (swap_duplicate(entry) < 0)
820 if (swap_duplicate(entry) < 0) 820 return entry.val;
821 return entry.val; 821
822 822 /* make sure dst_mm is on swapoff's mmlist. */
823 /* make sure dst_mm is on swapoff's mmlist. */ 823 if (unlikely(list_empty(&dst_mm->mmlist))) {
824 if (unlikely(list_empty(&dst_mm->mmlist))) { 824 spin_lock(&mmlist_lock);
825 spin_lock(&mmlist_lock); 825 if (list_empty(&dst_mm->mmlist))
826 if (list_empty(&dst_mm->mmlist)) 826 list_add(&dst_mm->mmlist,
827 list_add(&dst_mm->mmlist, 827 &src_mm->mmlist);
828 &src_mm->mmlist); 828 spin_unlock(&mmlist_lock);
829 spin_unlock(&mmlist_lock); 829 }
830 } 830 rss[MM_SWAPENTS]++;
831 rss[MM_SWAPENTS]++; 831 } else if (is_migration_entry(entry)) {
832 } else if (is_migration_entry(entry)) { 832 page = migration_entry_to_page(entry);
833 page = migration_entry_to_page(entry); 833
834 834 if (PageAnon(page))
835 if (PageAnon(page)) 835 rss[MM_ANONPAGES]++;
836 rss[MM_ANONPAGES]++; 836 else
837 else 837 rss[MM_FILEPAGES]++;
838 rss[MM_FILEPAGES]++; 838
839 839 if (is_write_migration_entry(entry) &&
840 if (is_write_migration_entry(entry) && 840 is_cow_mapping(vm_flags)) {
841 is_cow_mapping(vm_flags)) { 841 /*
842 /* 842 * COW mappings require pages in both
843 * COW mappings require pages in both 843 * parent and child to be set to read.
844 * parent and child to be set to read. 844 */
845 */ 845 make_migration_entry_read(&entry);
846 make_migration_entry_read(&entry); 846 pte = swp_entry_to_pte(entry);
847 pte = swp_entry_to_pte(entry); 847 if (pte_swp_soft_dirty(*src_pte))
848 if (pte_swp_soft_dirty(*src_pte)) 848 pte = pte_swp_mksoft_dirty(pte);
849 pte = pte_swp_mksoft_dirty(pte); 849 set_pte_at(src_mm, addr, src_pte, pte);
850 set_pte_at(src_mm, addr, src_pte, pte);
851 }
852 } 850 }
853 } 851 }
854 goto out_set_pte; 852 goto out_set_pte;
@@ -1022,11 +1020,9 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1022 * readonly mappings. The tradeoff is that copy_page_range is more 1020 * readonly mappings. The tradeoff is that copy_page_range is more
1023 * efficient than faulting. 1021 * efficient than faulting.
1024 */ 1022 */
1025 if (!(vma->vm_flags & (VM_HUGETLB | VM_NONLINEAR | 1023 if (!(vma->vm_flags & (VM_HUGETLB | VM_PFNMAP | VM_MIXEDMAP)) &&
1026 VM_PFNMAP | VM_MIXEDMAP))) { 1024 !vma->anon_vma)
1027 if (!vma->anon_vma) 1025 return 0;
1028 return 0;
1029 }
1030 1026
1031 if (is_vm_hugetlb_page(vma)) 1027 if (is_vm_hugetlb_page(vma))
1032 return copy_hugetlb_page_range(dst_mm, src_mm, vma); 1028 return copy_hugetlb_page_range(dst_mm, src_mm, vma);
@@ -1084,6 +1080,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
1084 spinlock_t *ptl; 1080 spinlock_t *ptl;
1085 pte_t *start_pte; 1081 pte_t *start_pte;
1086 pte_t *pte; 1082 pte_t *pte;
1083 swp_entry_t entry;
1087 1084
1088again: 1085again:
1089 init_rss_vec(rss); 1086 init_rss_vec(rss);
@@ -1109,28 +1106,12 @@ again:
1109 if (details->check_mapping && 1106 if (details->check_mapping &&
1110 details->check_mapping != page->mapping) 1107 details->check_mapping != page->mapping)
1111 continue; 1108 continue;
1112 /*
1113 * Each page->index must be checked when
1114 * invalidating or truncating nonlinear.
1115 */
1116 if (details->nonlinear_vma &&
1117 (page->index < details->first_index ||
1118 page->index > details->last_index))
1119 continue;
1120 } 1109 }
1121 ptent = ptep_get_and_clear_full(mm, addr, pte, 1110 ptent = ptep_get_and_clear_full(mm, addr, pte,
1122 tlb->fullmm); 1111 tlb->fullmm);
1123 tlb_remove_tlb_entry(tlb, pte, addr); 1112 tlb_remove_tlb_entry(tlb, pte, addr);
1124 if (unlikely(!page)) 1113 if (unlikely(!page))
1125 continue; 1114 continue;
1126 if (unlikely(details) && details->nonlinear_vma
1127 && linear_page_index(details->nonlinear_vma,
1128 addr) != page->index) {
1129 pte_t ptfile = pgoff_to_pte(page->index);
1130 if (pte_soft_dirty(ptent))
1131 ptfile = pte_file_mksoft_dirty(ptfile);
1132 set_pte_at(mm, addr, pte, ptfile);
1133 }
1134 if (PageAnon(page)) 1115 if (PageAnon(page))
1135 rss[MM_ANONPAGES]--; 1116 rss[MM_ANONPAGES]--;
1136 else { 1117 else {
@@ -1153,33 +1134,25 @@ again:
1153 } 1134 }
1154 continue; 1135 continue;
1155 } 1136 }
1156 /* 1137 /* If details->check_mapping, we leave swap entries. */
1157 * If details->check_mapping, we leave swap entries;
1158 * if details->nonlinear_vma, we leave file entries.
1159 */
1160 if (unlikely(details)) 1138 if (unlikely(details))
1161 continue; 1139 continue;
1162 if (pte_file(ptent)) {
1163 if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
1164 print_bad_pte(vma, addr, ptent, NULL);
1165 } else {
1166 swp_entry_t entry = pte_to_swp_entry(ptent);
1167 1140
1168 if (!non_swap_entry(entry)) 1141 entry = pte_to_swp_entry(ptent);
1169 rss[MM_SWAPENTS]--; 1142 if (!non_swap_entry(entry))
1170 else if (is_migration_entry(entry)) { 1143 rss[MM_SWAPENTS]--;
1171 struct page *page; 1144 else if (is_migration_entry(entry)) {
1145 struct page *page;
1172 1146
1173 page = migration_entry_to_page(entry); 1147 page = migration_entry_to_page(entry);
1174 1148
1175 if (PageAnon(page)) 1149 if (PageAnon(page))
1176 rss[MM_ANONPAGES]--; 1150 rss[MM_ANONPAGES]--;
1177 else 1151 else
1178 rss[MM_FILEPAGES]--; 1152 rss[MM_FILEPAGES]--;
1179 }
1180 if (unlikely(!free_swap_and_cache(entry)))
1181 print_bad_pte(vma, addr, ptent, NULL);
1182 } 1153 }
1154 if (unlikely(!free_swap_and_cache(entry)))
1155 print_bad_pte(vma, addr, ptent, NULL);
1183 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); 1156 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
1184 } while (pte++, addr += PAGE_SIZE, addr != end); 1157 } while (pte++, addr += PAGE_SIZE, addr != end);
1185 1158
@@ -1279,7 +1252,7 @@ static void unmap_page_range(struct mmu_gather *tlb,
1279 pgd_t *pgd; 1252 pgd_t *pgd;
1280 unsigned long next; 1253 unsigned long next;
1281 1254
1282 if (details && !details->check_mapping && !details->nonlinear_vma) 1255 if (details && !details->check_mapping)
1283 details = NULL; 1256 details = NULL;
1284 1257
1285 BUG_ON(addr >= end); 1258 BUG_ON(addr >= end);
@@ -1373,7 +1346,7 @@ void unmap_vmas(struct mmu_gather *tlb,
1373 * @vma: vm_area_struct holding the applicable pages 1346 * @vma: vm_area_struct holding the applicable pages
1374 * @start: starting address of pages to zap 1347 * @start: starting address of pages to zap
1375 * @size: number of bytes to zap 1348 * @size: number of bytes to zap
1376 * @details: details of nonlinear truncation or shared cache invalidation 1349 * @details: details of shared cache invalidation
1377 * 1350 *
1378 * Caller must protect the VMA list 1351 * Caller must protect the VMA list
1379 */ 1352 */
@@ -1399,7 +1372,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
1399 * @vma: vm_area_struct holding the applicable pages 1372 * @vma: vm_area_struct holding the applicable pages
1400 * @address: starting address of pages to zap 1373 * @address: starting address of pages to zap
1401 * @size: number of bytes to zap 1374 * @size: number of bytes to zap
1402 * @details: details of nonlinear truncation or shared cache invalidation 1375 * @details: details of shared cache invalidation
1403 * 1376 *
1404 * The range must fit into one VMA. 1377 * The range must fit into one VMA.
1405 */ 1378 */
@@ -1924,12 +1897,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
1924EXPORT_SYMBOL_GPL(apply_to_page_range); 1897EXPORT_SYMBOL_GPL(apply_to_page_range);
1925 1898
1926/* 1899/*
1927 * handle_pte_fault chooses page fault handler according to an entry 1900 * handle_pte_fault chooses page fault handler according to an entry which was
1928 * which was read non-atomically. Before making any commitment, on 1901 * read non-atomically. Before making any commitment, on those architectures
1929 * those architectures or configurations (e.g. i386 with PAE) which 1902 * or configurations (e.g. i386 with PAE) which might give a mix of unmatched
1930 * might give a mix of unmatched parts, do_swap_page and do_nonlinear_fault 1903 * parts, do_swap_page must check under lock before unmapping the pte and
1931 * must check under lock before unmapping the pte and proceeding 1904 * proceeding (but do_wp_page is only called after already making such a check;
1932 * (but do_wp_page is only called after already making such a check;
1933 * and do_anonymous_page can safely check later on). 1905 * and do_anonymous_page can safely check later on).
1934 */ 1906 */
1935static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, 1907static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
@@ -2035,7 +2007,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2035 pte_t entry; 2007 pte_t entry;
2036 int ret = 0; 2008 int ret = 0;
2037 int page_mkwrite = 0; 2009 int page_mkwrite = 0;
2038 struct page *dirty_page = NULL; 2010 bool dirty_shared = false;
2039 unsigned long mmun_start = 0; /* For mmu_notifiers */ 2011 unsigned long mmun_start = 0; /* For mmu_notifiers */
2040 unsigned long mmun_end = 0; /* For mmu_notifiers */ 2012 unsigned long mmun_end = 0; /* For mmu_notifiers */
2041 struct mem_cgroup *memcg; 2013 struct mem_cgroup *memcg;
@@ -2086,6 +2058,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2086 unlock_page(old_page); 2058 unlock_page(old_page);
2087 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == 2059 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
2088 (VM_WRITE|VM_SHARED))) { 2060 (VM_WRITE|VM_SHARED))) {
2061 page_cache_get(old_page);
2089 /* 2062 /*
2090 * Only catch write-faults on shared writable pages, 2063 * Only catch write-faults on shared writable pages,
2091 * read-only shared pages can get COWed by 2064 * read-only shared pages can get COWed by
@@ -2093,7 +2066,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2093 */ 2066 */
2094 if (vma->vm_ops && vma->vm_ops->page_mkwrite) { 2067 if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
2095 int tmp; 2068 int tmp;
2096 page_cache_get(old_page); 2069
2097 pte_unmap_unlock(page_table, ptl); 2070 pte_unmap_unlock(page_table, ptl);
2098 tmp = do_page_mkwrite(vma, old_page, address); 2071 tmp = do_page_mkwrite(vma, old_page, address);
2099 if (unlikely(!tmp || (tmp & 2072 if (unlikely(!tmp || (tmp &
@@ -2113,11 +2086,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2113 unlock_page(old_page); 2086 unlock_page(old_page);
2114 goto unlock; 2087 goto unlock;
2115 } 2088 }
2116
2117 page_mkwrite = 1; 2089 page_mkwrite = 1;
2118 } 2090 }
2119 dirty_page = old_page; 2091
2120 get_page(dirty_page); 2092 dirty_shared = true;
2121 2093
2122reuse: 2094reuse:
2123 /* 2095 /*
@@ -2136,20 +2108,20 @@ reuse:
2136 pte_unmap_unlock(page_table, ptl); 2108 pte_unmap_unlock(page_table, ptl);
2137 ret |= VM_FAULT_WRITE; 2109 ret |= VM_FAULT_WRITE;
2138 2110
2139 if (!dirty_page) 2111 if (dirty_shared) {
2140 return ret;
2141
2142 if (!page_mkwrite) {
2143 struct address_space *mapping; 2112 struct address_space *mapping;
2144 int dirtied; 2113 int dirtied;
2145 2114
2146 lock_page(dirty_page); 2115 if (!page_mkwrite)
2147 dirtied = set_page_dirty(dirty_page); 2116 lock_page(old_page);
2148 VM_BUG_ON_PAGE(PageAnon(dirty_page), dirty_page);
2149 mapping = dirty_page->mapping;
2150 unlock_page(dirty_page);
2151 2117
2152 if (dirtied && mapping) { 2118 dirtied = set_page_dirty(old_page);
2119 VM_BUG_ON_PAGE(PageAnon(old_page), old_page);
2120 mapping = old_page->mapping;
2121 unlock_page(old_page);
2122 page_cache_release(old_page);
2123
2124 if ((dirtied || page_mkwrite) && mapping) {
2153 /* 2125 /*
2154 * Some device drivers do not set page.mapping 2126 * Some device drivers do not set page.mapping
2155 * but still dirty their pages 2127 * but still dirty their pages
@@ -2157,25 +2129,9 @@ reuse:
2157 balance_dirty_pages_ratelimited(mapping); 2129 balance_dirty_pages_ratelimited(mapping);
2158 } 2130 }
2159 2131
2160 /* file_update_time outside page_lock */ 2132 if (!page_mkwrite)
2161 if (vma->vm_file)
2162 file_update_time(vma->vm_file); 2133 file_update_time(vma->vm_file);
2163 } 2134 }
2164 put_page(dirty_page);
2165 if (page_mkwrite) {
2166 struct address_space *mapping = dirty_page->mapping;
2167
2168 set_page_dirty(dirty_page);
2169 unlock_page(dirty_page);
2170 page_cache_release(dirty_page);
2171 if (mapping) {
2172 /*
2173 * Some device drivers do not set page.mapping
2174 * but still dirty their pages
2175 */
2176 balance_dirty_pages_ratelimited(mapping);
2177 }
2178 }
2179 2135
2180 return ret; 2136 return ret;
2181 } 2137 }
@@ -2333,25 +2289,11 @@ static inline void unmap_mapping_range_tree(struct rb_root *root,
2333 } 2289 }
2334} 2290}
2335 2291
2336static inline void unmap_mapping_range_list(struct list_head *head,
2337 struct zap_details *details)
2338{
2339 struct vm_area_struct *vma;
2340
2341 /*
2342 * In nonlinear VMAs there is no correspondence between virtual address
2343 * offset and file offset. So we must perform an exhaustive search
2344 * across *all* the pages in each nonlinear VMA, not just the pages
2345 * whose virtual address lies outside the file truncation point.
2346 */
2347 list_for_each_entry(vma, head, shared.nonlinear) {
2348 details->nonlinear_vma = vma;
2349 unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
2350 }
2351}
2352
2353/** 2292/**
2354 * unmap_mapping_range - unmap the portion of all mmaps in the specified address_space corresponding to the specified page range in the underlying file. 2293 * unmap_mapping_range - unmap the portion of all mmaps in the specified
2294 * address_space corresponding to the specified page range in the underlying
2295 * file.
2296 *
2355 * @mapping: the address space containing mmaps to be unmapped. 2297 * @mapping: the address space containing mmaps to be unmapped.
2356 * @holebegin: byte in first page to unmap, relative to the start of 2298 * @holebegin: byte in first page to unmap, relative to the start of
2357 * the underlying file. This will be rounded down to a PAGE_SIZE 2299 * the underlying file. This will be rounded down to a PAGE_SIZE
@@ -2380,7 +2322,6 @@ void unmap_mapping_range(struct address_space *mapping,
2380 } 2322 }
2381 2323
2382 details.check_mapping = even_cows? NULL: mapping; 2324 details.check_mapping = even_cows? NULL: mapping;
2383 details.nonlinear_vma = NULL;
2384 details.first_index = hba; 2325 details.first_index = hba;
2385 details.last_index = hba + hlen - 1; 2326 details.last_index = hba + hlen - 1;
2386 if (details.last_index < details.first_index) 2327 if (details.last_index < details.first_index)
@@ -2390,8 +2331,6 @@ void unmap_mapping_range(struct address_space *mapping,
2390 i_mmap_lock_write(mapping); 2331 i_mmap_lock_write(mapping);
2391 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) 2332 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
2392 unmap_mapping_range_tree(&mapping->i_mmap, &details); 2333 unmap_mapping_range_tree(&mapping->i_mmap, &details);
2393 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
2394 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
2395 i_mmap_unlock_write(mapping); 2334 i_mmap_unlock_write(mapping);
2396} 2335}
2397EXPORT_SYMBOL(unmap_mapping_range); 2336EXPORT_SYMBOL(unmap_mapping_range);
@@ -2752,8 +2691,6 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
2752 entry = mk_pte(page, vma->vm_page_prot); 2691 entry = mk_pte(page, vma->vm_page_prot);
2753 if (write) 2692 if (write)
2754 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2693 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2755 else if (pte_file(*pte) && pte_file_soft_dirty(*pte))
2756 entry = pte_mksoft_dirty(entry);
2757 if (anon) { 2694 if (anon) {
2758 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); 2695 inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
2759 page_add_new_anon_rmap(page, vma, address); 2696 page_add_new_anon_rmap(page, vma, address);
@@ -2888,8 +2825,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2888 * if page by the offset is not ready to be mapped (cold cache or 2825 * if page by the offset is not ready to be mapped (cold cache or
2889 * something). 2826 * something).
2890 */ 2827 */
2891 if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) && 2828 if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
2892 fault_around_bytes >> PAGE_SHIFT > 1) {
2893 pte = pte_offset_map_lock(mm, pmd, address, &ptl); 2829 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
2894 do_fault_around(vma, address, pte, pgoff, flags); 2830 do_fault_around(vma, address, pte, pgoff, flags);
2895 if (!pte_same(*pte, orig_pte)) 2831 if (!pte_same(*pte, orig_pte))
@@ -3021,8 +2957,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3021 balance_dirty_pages_ratelimited(mapping); 2957 balance_dirty_pages_ratelimited(mapping);
3022 } 2958 }
3023 2959
3024 /* file_update_time outside page_lock */ 2960 if (!vma->vm_ops->page_mkwrite)
3025 if (vma->vm_file && !vma->vm_ops->page_mkwrite)
3026 file_update_time(vma->vm_file); 2961 file_update_time(vma->vm_file);
3027 2962
3028 return ret; 2963 return ret;
@@ -3034,7 +2969,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3034 * The mmap_sem may have been released depending on flags and our 2969 * The mmap_sem may have been released depending on flags and our
3035 * return value. See filemap_fault() and __lock_page_or_retry(). 2970 * return value. See filemap_fault() and __lock_page_or_retry().
3036 */ 2971 */
3037static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, 2972static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3038 unsigned long address, pte_t *page_table, pmd_t *pmd, 2973 unsigned long address, pte_t *page_table, pmd_t *pmd,
3039 unsigned int flags, pte_t orig_pte) 2974 unsigned int flags, pte_t orig_pte)
3040{ 2975{
@@ -3051,46 +2986,6 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3051 return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); 2986 return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
3052} 2987}
3053 2988
3054/*
3055 * Fault of a previously existing named mapping. Repopulate the pte
3056 * from the encoded file_pte if possible. This enables swappable
3057 * nonlinear vmas.
3058 *
3059 * We enter with non-exclusive mmap_sem (to exclude vma changes,
3060 * but allow concurrent faults), and pte mapped but not yet locked.
3061 * We return with pte unmapped and unlocked.
3062 * The mmap_sem may have been released depending on flags and our
3063 * return value. See filemap_fault() and __lock_page_or_retry().
3064 */
3065static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3066 unsigned long address, pte_t *page_table, pmd_t *pmd,
3067 unsigned int flags, pte_t orig_pte)
3068{
3069 pgoff_t pgoff;
3070
3071 flags |= FAULT_FLAG_NONLINEAR;
3072
3073 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
3074 return 0;
3075
3076 if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
3077 /*
3078 * Page table corrupted: show pte and kill process.
3079 */
3080 print_bad_pte(vma, address, orig_pte, NULL);
3081 return VM_FAULT_SIGBUS;
3082 }
3083
3084 pgoff = pte_to_pgoff(orig_pte);
3085 if (!(flags & FAULT_FLAG_WRITE))
3086 return do_read_fault(mm, vma, address, pmd, pgoff, flags,
3087 orig_pte);
3088 if (!(vma->vm_flags & VM_SHARED))
3089 return do_cow_fault(mm, vma, address, pmd, pgoff, flags,
3090 orig_pte);
3091 return do_shared_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
3092}
3093
3094static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, 2989static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
3095 unsigned long addr, int page_nid, 2990 unsigned long addr, int page_nid,
3096 int *flags) 2991 int *flags)
@@ -3218,15 +3113,12 @@ static int handle_pte_fault(struct mm_struct *mm,
3218 if (pte_none(entry)) { 3113 if (pte_none(entry)) {
3219 if (vma->vm_ops) { 3114 if (vma->vm_ops) {
3220 if (likely(vma->vm_ops->fault)) 3115 if (likely(vma->vm_ops->fault))
3221 return do_linear_fault(mm, vma, address, 3116 return do_fault(mm, vma, address, pte,
3222 pte, pmd, flags, entry); 3117 pmd, flags, entry);
3223 } 3118 }
3224 return do_anonymous_page(mm, vma, address, 3119 return do_anonymous_page(mm, vma, address,
3225 pte, pmd, flags); 3120 pte, pmd, flags);
3226 } 3121 }
3227 if (pte_file(entry))
3228 return do_nonlinear_fault(mm, vma, address,
3229 pte, pmd, flags, entry);
3230 return do_swap_page(mm, vma, address, 3122 return do_swap_page(mm, vma, address,
3231 pte, pmd, flags, entry); 3123 pte, pmd, flags, entry);
3232 } 3124 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 344cdf692fc8..6e284bcca8bb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -179,37 +179,6 @@ out:
179} 179}
180 180
181/* 181/*
182 * Congratulations to trinity for discovering this bug.
183 * mm/fremap.c's remap_file_pages() accepts any range within a single vma to
184 * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then
185 * replace the specified range by file ptes throughout (maybe populated after).
186 * If page migration finds a page within that range, while it's still located
187 * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem:
188 * zap_pte() clears the temporary migration entry before mmap_sem is dropped.
189 * But if the migrating page is in a part of the vma outside the range to be
190 * remapped, then it will not be cleared, and remove_migration_ptes() needs to
191 * deal with it. Fortunately, this part of the vma is of course still linear,
192 * so we just need to use linear location on the nonlinear list.
193 */
194static int remove_linear_migration_ptes_from_nonlinear(struct page *page,
195 struct address_space *mapping, void *arg)
196{
197 struct vm_area_struct *vma;
198 /* hugetlbfs does not support remap_pages, so no huge pgoff worries */
199 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
200 unsigned long addr;
201
202 list_for_each_entry(vma,
203 &mapping->i_mmap_nonlinear, shared.nonlinear) {
204
205 addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
206 if (addr >= vma->vm_start && addr < vma->vm_end)
207 remove_migration_pte(page, vma, addr, arg);
208 }
209 return SWAP_AGAIN;
210}
211
212/*
213 * Get rid of all migration entries and replace them by 182 * Get rid of all migration entries and replace them by
214 * references to the indicated page. 183 * references to the indicated page.
215 */ 184 */
@@ -218,7 +187,6 @@ static void remove_migration_ptes(struct page *old, struct page *new)
218 struct rmap_walk_control rwc = { 187 struct rmap_walk_control rwc = {
219 .rmap_one = remove_migration_pte, 188 .rmap_one = remove_migration_pte,
220 .arg = old, 189 .arg = old,
221 .file_nonlinear = remove_linear_migration_ptes_from_nonlinear,
222 }; 190 };
223 191
224 rmap_walk(new, &rwc); 192 rmap_walk(new, &rwc);
diff --git a/mm/mincore.c b/mm/mincore.c
index c8c528b36641..46527c023e0c 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -124,17 +124,13 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
124 ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 124 ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
125 do { 125 do {
126 pte_t pte = *ptep; 126 pte_t pte = *ptep;
127 pgoff_t pgoff;
128 127
129 next = addr + PAGE_SIZE; 128 next = addr + PAGE_SIZE;
130 if (pte_none(pte)) 129 if (pte_none(pte))
131 mincore_unmapped_range(vma, addr, next, vec); 130 mincore_unmapped_range(vma, addr, next, vec);
132 else if (pte_present(pte)) 131 else if (pte_present(pte))
133 *vec = 1; 132 *vec = 1;
134 else if (pte_file(pte)) { 133 else { /* pte is a swap entry */
135 pgoff = pte_to_pgoff(pte);
136 *vec = mincore_page(vma->vm_file->f_mapping, pgoff);
137 } else { /* pte is a swap entry */
138 swp_entry_t entry = pte_to_swp_entry(pte); 134 swp_entry_t entry = pte_to_swp_entry(pte);
139 135
140 if (non_swap_entry(entry)) { 136 if (non_swap_entry(entry)) {
@@ -145,9 +141,8 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
145 *vec = 1; 141 *vec = 1;
146 } else { 142 } else {
147#ifdef CONFIG_SWAP 143#ifdef CONFIG_SWAP
148 pgoff = entry.val;
149 *vec = mincore_page(swap_address_space(entry), 144 *vec = mincore_page(swap_address_space(entry),
150 pgoff); 145 entry.val);
151#else 146#else
152 WARN_ON(1); 147 WARN_ON(1);
153 *vec = 1; 148 *vec = 1;
diff --git a/mm/mmap.c b/mm/mmap.c
index 7f684d5a8087..14d84666e8ba 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -243,10 +243,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
243 mapping_unmap_writable(mapping); 243 mapping_unmap_writable(mapping);
244 244
245 flush_dcache_mmap_lock(mapping); 245 flush_dcache_mmap_lock(mapping);
246 if (unlikely(vma->vm_flags & VM_NONLINEAR)) 246 vma_interval_tree_remove(vma, &mapping->i_mmap);
247 list_del_init(&vma->shared.nonlinear);
248 else
249 vma_interval_tree_remove(vma, &mapping->i_mmap);
250 flush_dcache_mmap_unlock(mapping); 247 flush_dcache_mmap_unlock(mapping);
251} 248}
252 249
@@ -649,10 +646,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
649 atomic_inc(&mapping->i_mmap_writable); 646 atomic_inc(&mapping->i_mmap_writable);
650 647
651 flush_dcache_mmap_lock(mapping); 648 flush_dcache_mmap_lock(mapping);
652 if (unlikely(vma->vm_flags & VM_NONLINEAR)) 649 vma_interval_tree_insert(vma, &mapping->i_mmap);
653 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
654 else
655 vma_interval_tree_insert(vma, &mapping->i_mmap);
656 flush_dcache_mmap_unlock(mapping); 650 flush_dcache_mmap_unlock(mapping);
657 } 651 }
658} 652}
@@ -789,14 +783,11 @@ again: remove_next = 1 + (end > next->vm_end);
789 783
790 if (file) { 784 if (file) {
791 mapping = file->f_mapping; 785 mapping = file->f_mapping;
792 if (!(vma->vm_flags & VM_NONLINEAR)) { 786 root = &mapping->i_mmap;
793 root = &mapping->i_mmap; 787 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
794 uprobe_munmap(vma, vma->vm_start, vma->vm_end);
795 788
796 if (adjust_next) 789 if (adjust_next)
797 uprobe_munmap(next, next->vm_start, 790 uprobe_munmap(next, next->vm_start, next->vm_end);
798 next->vm_end);
799 }
800 791
801 i_mmap_lock_write(mapping); 792 i_mmap_lock_write(mapping);
802 if (insert) { 793 if (insert) {
@@ -2634,6 +2625,75 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2634 return vm_munmap(addr, len); 2625 return vm_munmap(addr, len);
2635} 2626}
2636 2627
2628
2629/*
2630 * Emulation of deprecated remap_file_pages() syscall.
2631 */
2632SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2633 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
2634{
2635
2636 struct mm_struct *mm = current->mm;
2637 struct vm_area_struct *vma;
2638 unsigned long populate = 0;
2639 unsigned long ret = -EINVAL;
2640 struct file *file;
2641
2642 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
2643 "See Documentation/vm/remap_file_pages.txt.\n",
2644 current->comm, current->pid);
2645
2646 if (prot)
2647 return ret;
2648 start = start & PAGE_MASK;
2649 size = size & PAGE_MASK;
2650
2651 if (start + size <= start)
2652 return ret;
2653
2654 /* Does pgoff wrap? */
2655 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
2656 return ret;
2657
2658 down_write(&mm->mmap_sem);
2659 vma = find_vma(mm, start);
2660
2661 if (!vma || !(vma->vm_flags & VM_SHARED))
2662 goto out;
2663
2664 if (start < vma->vm_start || start + size > vma->vm_end)
2665 goto out;
2666
2667 if (pgoff == linear_page_index(vma, start)) {
2668 ret = 0;
2669 goto out;
2670 }
2671
2672 prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
2673 prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
2674 prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
2675
2676 flags &= MAP_NONBLOCK;
2677 flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
2678 if (vma->vm_flags & VM_LOCKED) {
2679 flags |= MAP_LOCKED;
2680 /* drop PG_Mlocked flag for over-mapped range */
2681 munlock_vma_pages_range(vma, start, start + size);
2682 }
2683
2684 file = get_file(vma->vm_file);
2685 ret = do_mmap_pgoff(vma->vm_file, start, size,
2686 prot, flags, pgoff, &populate);
2687 fput(file);
2688out:
2689 up_write(&mm->mmap_sem);
2690 if (populate)
2691 mm_populate(ret, populate);
2692 if (!IS_ERR_VALUE(ret))
2693 ret = 0;
2694 return ret;
2695}
2696
2637static inline void verify_mm_writelocked(struct mm_struct *mm) 2697static inline void verify_mm_writelocked(struct mm_struct *mm)
2638{ 2698{
2639#ifdef CONFIG_DEBUG_VM 2699#ifdef CONFIG_DEBUG_VM
@@ -3108,8 +3168,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
3108 * 3168 *
3109 * mmap_sem in write mode is required in order to block all operations 3169 * mmap_sem in write mode is required in order to block all operations
3110 * that could modify pagetables and free pages without need of 3170 * that could modify pagetables and free pages without need of
3111 * altering the vma layout (for example populate_range() with 3171 * altering the vma layout. It's also needed in write mode to avoid new
3112 * nonlinear vmas). It's also needed in write mode to avoid new
3113 * anon_vmas to be associated with existing vmas. 3172 * anon_vmas to be associated with existing vmas.
3114 * 3173 *
3115 * A single task can't take more than one mm_take_all_locks() in a row 3174 * A single task can't take more than one mm_take_all_locks() in a row
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ace93454ce8e..33121662f08b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -105,7 +105,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
105 } 105 }
106 if (updated) 106 if (updated)
107 pages++; 107 pages++;
108 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { 108 } else if (IS_ENABLED(CONFIG_MIGRATION)) {
109 swp_entry_t entry = pte_to_swp_entry(oldpte); 109 swp_entry_t entry = pte_to_swp_entry(oldpte);
110 110
111 if (is_write_migration_entry(entry)) { 111 if (is_write_migration_entry(entry)) {
diff --git a/mm/mremap.c b/mm/mremap.c
index 17fa018f5f39..57dadc025c64 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -81,8 +81,6 @@ static pte_t move_soft_dirty_pte(pte_t pte)
81 pte = pte_mksoft_dirty(pte); 81 pte = pte_mksoft_dirty(pte);
82 else if (is_swap_pte(pte)) 82 else if (is_swap_pte(pte))
83 pte = pte_swp_mksoft_dirty(pte); 83 pte = pte_swp_mksoft_dirty(pte);
84 else if (pte_file(pte))
85 pte = pte_file_mksoft_dirty(pte);
86#endif 84#endif
87 return pte; 85 return pte;
88} 86}
diff --git a/mm/msync.c b/mm/msync.c
index 992a1673d488..bb04d53ae852 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -86,10 +86,7 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
86 (vma->vm_flags & VM_SHARED)) { 86 (vma->vm_flags & VM_SHARED)) {
87 get_file(file); 87 get_file(file);
88 up_read(&mm->mmap_sem); 88 up_read(&mm->mmap_sem);
89 if (vma->vm_flags & VM_NONLINEAR) 89 error = vfs_fsync_range(file, fstart, fend, 1);
90 error = vfs_fsync(file, 1);
91 else
92 error = vfs_fsync_range(file, fstart, fend, 1);
93 fput(file); 90 fput(file);
94 if (error || start >= end) 91 if (error || start >= end)
95 goto out; 92 goto out;
diff --git a/mm/nommu.c b/mm/nommu.c
index 28bd8c4dff6f..541bed64e348 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1984,14 +1984,6 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
1984} 1984}
1985EXPORT_SYMBOL(filemap_map_pages); 1985EXPORT_SYMBOL(filemap_map_pages);
1986 1986
1987int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
1988 unsigned long size, pgoff_t pgoff)
1989{
1990 BUG();
1991 return 0;
1992}
1993EXPORT_SYMBOL(generic_file_remap_pages);
1994
1995static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, 1987static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
1996 unsigned long addr, void *buf, int len, int write) 1988 unsigned long addr, void *buf, int len, int write)
1997{ 1989{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8e20f9c2fa5a..f121050e8530 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -552,17 +552,15 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
552 return 0; 552 return 0;
553 553
554 if (page_is_guard(buddy) && page_order(buddy) == order) { 554 if (page_is_guard(buddy) && page_order(buddy) == order) {
555 VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
556
557 if (page_zone_id(page) != page_zone_id(buddy)) 555 if (page_zone_id(page) != page_zone_id(buddy))
558 return 0; 556 return 0;
559 557
558 VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
559
560 return 1; 560 return 1;
561 } 561 }
562 562
563 if (PageBuddy(buddy) && page_order(buddy) == order) { 563 if (PageBuddy(buddy) && page_order(buddy) == order) {
564 VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
565
566 /* 564 /*
567 * zone check is done late to avoid uselessly 565 * zone check is done late to avoid uselessly
568 * calculating zone/node ids for pages that could 566 * calculating zone/node ids for pages that could
@@ -571,6 +569,8 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
571 if (page_zone_id(page) != page_zone_id(buddy)) 569 if (page_zone_id(page) != page_zone_id(buddy))
572 return 0; 570 return 0;
573 571
572 VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
573
574 return 1; 574 return 1;
575 } 575 }
576 return 0; 576 return 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index 71cd5bd0c17d..70b32498d4f2 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -590,9 +590,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
590 if (!vma->anon_vma || !page__anon_vma || 590 if (!vma->anon_vma || !page__anon_vma ||
591 vma->anon_vma->root != page__anon_vma->root) 591 vma->anon_vma->root != page__anon_vma->root)
592 return -EFAULT; 592 return -EFAULT;
593 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { 593 } else if (page->mapping) {
594 if (!vma->vm_file || 594 if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
595 vma->vm_file->f_mapping != page->mapping)
596 return -EFAULT; 595 return -EFAULT;
597 } else 596 } else
598 return -EFAULT; 597 return -EFAULT;
@@ -1274,7 +1273,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1274 if (pte_soft_dirty(pteval)) 1273 if (pte_soft_dirty(pteval))
1275 swp_pte = pte_swp_mksoft_dirty(swp_pte); 1274 swp_pte = pte_swp_mksoft_dirty(swp_pte);
1276 set_pte_at(mm, address, pte, swp_pte); 1275 set_pte_at(mm, address, pte, swp_pte);
1277 BUG_ON(pte_file(*pte));
1278 } else if (IS_ENABLED(CONFIG_MIGRATION) && 1276 } else if (IS_ENABLED(CONFIG_MIGRATION) &&
1279 (flags & TTU_MIGRATION)) { 1277 (flags & TTU_MIGRATION)) {
1280 /* Establish migration entry for a file page */ 1278 /* Establish migration entry for a file page */
@@ -1316,211 +1314,6 @@ out_mlock:
1316 return ret; 1314 return ret;
1317} 1315}
1318 1316
1319/*
1320 * objrmap doesn't work for nonlinear VMAs because the assumption that
1321 * offset-into-file correlates with offset-into-virtual-addresses does not hold.
1322 * Consequently, given a particular page and its ->index, we cannot locate the
1323 * ptes which are mapping that page without an exhaustive linear search.
1324 *
1325 * So what this code does is a mini "virtual scan" of each nonlinear VMA which
1326 * maps the file to which the target page belongs. The ->vm_private_data field
1327 * holds the current cursor into that scan. Successive searches will circulate
1328 * around the vma's virtual address space.
1329 *
1330 * So as more replacement pressure is applied to the pages in a nonlinear VMA,
1331 * more scanning pressure is placed against them as well. Eventually pages
1332 * will become fully unmapped and are eligible for eviction.
1333 *
1334 * For very sparsely populated VMAs this is a little inefficient - chances are
1335 * there there won't be many ptes located within the scan cluster. In this case
1336 * maybe we could scan further - to the end of the pte page, perhaps.
1337 *
1338 * Mlocked pages: check VM_LOCKED under mmap_sem held for read, if we can
1339 * acquire it without blocking. If vma locked, mlock the pages in the cluster,
1340 * rather than unmapping them. If we encounter the "check_page" that vmscan is
1341 * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
1342 */
1343#define CLUSTER_SIZE min(32*PAGE_SIZE, PMD_SIZE)
1344#define CLUSTER_MASK (~(CLUSTER_SIZE - 1))
1345
1346static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1347 struct vm_area_struct *vma, struct page *check_page)
1348{
1349 struct mm_struct *mm = vma->vm_mm;
1350 pmd_t *pmd;
1351 pte_t *pte;
1352 pte_t pteval;
1353 spinlock_t *ptl;
1354 struct page *page;
1355 unsigned long address;
1356 unsigned long mmun_start; /* For mmu_notifiers */
1357 unsigned long mmun_end; /* For mmu_notifiers */
1358 unsigned long end;
1359 int ret = SWAP_AGAIN;
1360 int locked_vma = 0;
1361
1362 address = (vma->vm_start + cursor) & CLUSTER_MASK;
1363 end = address + CLUSTER_SIZE;
1364 if (address < vma->vm_start)
1365 address = vma->vm_start;
1366 if (end > vma->vm_end)
1367 end = vma->vm_end;
1368
1369 pmd = mm_find_pmd(mm, address);
1370 if (!pmd)
1371 return ret;
1372
1373 mmun_start = address;
1374 mmun_end = end;
1375 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
1376
1377 /*
1378 * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
1379 * keep the sem while scanning the cluster for mlocking pages.
1380 */
1381 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
1382 locked_vma = (vma->vm_flags & VM_LOCKED);
1383 if (!locked_vma)
1384 up_read(&vma->vm_mm->mmap_sem); /* don't need it */
1385 }
1386
1387 pte = pte_offset_map_lock(mm, pmd, address, &ptl);
1388
1389 /* Update high watermark before we lower rss */
1390 update_hiwater_rss(mm);
1391
1392 for (; address < end; pte++, address += PAGE_SIZE) {
1393 if (!pte_present(*pte))
1394 continue;
1395 page = vm_normal_page(vma, address, *pte);
1396 BUG_ON(!page || PageAnon(page));
1397
1398 if (locked_vma) {
1399 if (page == check_page) {
1400 /* we know we have check_page locked */
1401 mlock_vma_page(page);
1402 ret = SWAP_MLOCK;
1403 } else if (trylock_page(page)) {
1404 /*
1405 * If we can lock the page, perform mlock.
1406 * Otherwise leave the page alone, it will be
1407 * eventually encountered again later.
1408 */
1409 mlock_vma_page(page);
1410 unlock_page(page);
1411 }
1412 continue; /* don't unmap */
1413 }
1414
1415 /*
1416 * No need for _notify because we're within an
1417 * mmu_notifier_invalidate_range_ {start|end} scope.
1418 */
1419 if (ptep_clear_flush_young(vma, address, pte))
1420 continue;
1421
1422 /* Nuke the page table entry. */
1423 flush_cache_page(vma, address, pte_pfn(*pte));
1424 pteval = ptep_clear_flush_notify(vma, address, pte);
1425
1426 /* If nonlinear, store the file page offset in the pte. */
1427 if (page->index != linear_page_index(vma, address)) {
1428 pte_t ptfile = pgoff_to_pte(page->index);
1429 if (pte_soft_dirty(pteval))
1430 ptfile = pte_file_mksoft_dirty(ptfile);
1431 set_pte_at(mm, address, pte, ptfile);
1432 }
1433
1434 /* Move the dirty bit to the physical page now the pte is gone. */
1435 if (pte_dirty(pteval))
1436 set_page_dirty(page);
1437
1438 page_remove_rmap(page);
1439 page_cache_release(page);
1440 dec_mm_counter(mm, MM_FILEPAGES);
1441 (*mapcount)--;
1442 }
1443 pte_unmap_unlock(pte - 1, ptl);
1444 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1445 if (locked_vma)
1446 up_read(&vma->vm_mm->mmap_sem);
1447 return ret;
1448}
1449
1450static int try_to_unmap_nonlinear(struct page *page,
1451 struct address_space *mapping, void *arg)
1452{
1453 struct vm_area_struct *vma;
1454 int ret = SWAP_AGAIN;
1455 unsigned long cursor;
1456 unsigned long max_nl_cursor = 0;
1457 unsigned long max_nl_size = 0;
1458 unsigned int mapcount;
1459
1460 list_for_each_entry(vma,
1461 &mapping->i_mmap_nonlinear, shared.nonlinear) {
1462
1463 cursor = (unsigned long) vma->vm_private_data;
1464 if (cursor > max_nl_cursor)
1465 max_nl_cursor = cursor;
1466 cursor = vma->vm_end - vma->vm_start;
1467 if (cursor > max_nl_size)
1468 max_nl_size = cursor;
1469 }
1470
1471 if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
1472 return SWAP_FAIL;
1473 }
1474
1475 /*
1476 * We don't try to search for this page in the nonlinear vmas,
1477 * and page_referenced wouldn't have found it anyway. Instead
1478 * just walk the nonlinear vmas trying to age and unmap some.
1479 * The mapcount of the page we came in with is irrelevant,
1480 * but even so use it as a guide to how hard we should try?
1481 */
1482 mapcount = page_mapcount(page);
1483 if (!mapcount)
1484 return ret;
1485
1486 cond_resched();
1487
1488 max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
1489 if (max_nl_cursor == 0)
1490 max_nl_cursor = CLUSTER_SIZE;
1491
1492 do {
1493 list_for_each_entry(vma,
1494 &mapping->i_mmap_nonlinear, shared.nonlinear) {
1495
1496 cursor = (unsigned long) vma->vm_private_data;
1497 while (cursor < max_nl_cursor &&
1498 cursor < vma->vm_end - vma->vm_start) {
1499 if (try_to_unmap_cluster(cursor, &mapcount,
1500 vma, page) == SWAP_MLOCK)
1501 ret = SWAP_MLOCK;
1502 cursor += CLUSTER_SIZE;
1503 vma->vm_private_data = (void *) cursor;
1504 if ((int)mapcount <= 0)
1505 return ret;
1506 }
1507 vma->vm_private_data = (void *) max_nl_cursor;
1508 }
1509 cond_resched();
1510 max_nl_cursor += CLUSTER_SIZE;
1511 } while (max_nl_cursor <= max_nl_size);
1512
1513 /*
1514 * Don't loop forever (perhaps all the remaining pages are
1515 * in locked vmas). Reset cursor on all unreserved nonlinear
1516 * vmas, now forgetting on which ones it had fallen behind.
1517 */
1518 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
1519 vma->vm_private_data = NULL;
1520
1521 return ret;
1522}
1523
1524bool is_vma_temporary_stack(struct vm_area_struct *vma) 1317bool is_vma_temporary_stack(struct vm_area_struct *vma)
1525{ 1318{
1526 int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); 1319 int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
@@ -1566,7 +1359,6 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1566 .rmap_one = try_to_unmap_one, 1359 .rmap_one = try_to_unmap_one,
1567 .arg = (void *)flags, 1360 .arg = (void *)flags,
1568 .done = page_not_mapped, 1361 .done = page_not_mapped,
1569 .file_nonlinear = try_to_unmap_nonlinear,
1570 .anon_lock = page_lock_anon_vma_read, 1362 .anon_lock = page_lock_anon_vma_read,
1571 }; 1363 };
1572 1364
@@ -1612,12 +1404,6 @@ int try_to_munlock(struct page *page)
1612 .rmap_one = try_to_unmap_one, 1404 .rmap_one = try_to_unmap_one,
1613 .arg = (void *)TTU_MUNLOCK, 1405 .arg = (void *)TTU_MUNLOCK,
1614 .done = page_not_mapped, 1406 .done = page_not_mapped,
1615 /*
1616 * We don't bother to try to find the munlocked page in
1617 * nonlinears. It's costly. Instead, later, page reclaim logic
1618 * may call try_to_unmap() and recover PG_mlocked lazily.
1619 */
1620 .file_nonlinear = NULL,
1621 .anon_lock = page_lock_anon_vma_read, 1407 .anon_lock = page_lock_anon_vma_read,
1622 1408
1623 }; 1409 };
@@ -1748,13 +1534,6 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
1748 goto done; 1534 goto done;
1749 } 1535 }
1750 1536
1751 if (!rwc->file_nonlinear)
1752 goto done;
1753
1754 if (list_empty(&mapping->i_mmap_nonlinear))
1755 goto done;
1756
1757 ret = rwc->file_nonlinear(page, mapping, rwc->arg);
1758done: 1537done:
1759 i_mmap_unlock_read(mapping); 1538 i_mmap_unlock_read(mapping);
1760 return ret; 1539 return ret;
diff --git a/mm/shmem.c b/mm/shmem.c
index 993e6ba689cc..b3e403181981 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3201,7 +3201,6 @@ static const struct vm_operations_struct shmem_vm_ops = {
3201 .set_policy = shmem_set_policy, 3201 .set_policy = shmem_set_policy,
3202 .get_policy = shmem_get_policy, 3202 .get_policy = shmem_get_policy,
3203#endif 3203#endif
3204 .remap_pages = generic_file_remap_pages,
3205}; 3204};
3206 3205
3207static struct dentry *shmem_mount(struct file_system_type *fs_type, 3206static struct dentry *shmem_mount(struct file_system_type *fs_type,
diff --git a/mm/slab.h b/mm/slab.h
index 1cf4005482dd..90430d6f665e 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -235,7 +235,7 @@ static __always_inline int memcg_charge_slab(struct kmem_cache *s,
235 return 0; 235 return 0;
236 if (is_root_cache(s)) 236 if (is_root_cache(s))
237 return 0; 237 return 0;
238 return __memcg_charge_slab(s, gfp, order); 238 return memcg_charge_kmem(s->memcg_params->memcg, gfp, 1 << order);
239} 239}
240 240
241static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order) 241static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
@@ -244,7 +244,7 @@ static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
244 return; 244 return;
245 if (is_root_cache(s)) 245 if (is_root_cache(s))
246 return; 246 return;
247 __memcg_uncharge_slab(s, order); 247 memcg_uncharge_kmem(s->memcg_params->memcg, 1 << order);
248} 248}
249#else 249#else
250static inline bool is_root_cache(struct kmem_cache *s) 250static inline bool is_root_cache(struct kmem_cache *s)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index e03dd6f2a272..6e1e4cf65836 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -331,7 +331,7 @@ out:
331 331
332out_free_cache: 332out_free_cache:
333 memcg_free_cache_params(s); 333 memcg_free_cache_params(s);
334 kfree(s); 334 kmem_cache_free(kmem_cache, s);
335 goto out; 335 goto out;
336} 336}
337 337
@@ -425,21 +425,64 @@ out_unlock:
425} 425}
426EXPORT_SYMBOL(kmem_cache_create); 426EXPORT_SYMBOL(kmem_cache_create);
427 427
428static int do_kmem_cache_shutdown(struct kmem_cache *s,
429 struct list_head *release, bool *need_rcu_barrier)
430{
431 if (__kmem_cache_shutdown(s) != 0) {
432 printk(KERN_ERR "kmem_cache_destroy %s: "
433 "Slab cache still has objects\n", s->name);
434 dump_stack();
435 return -EBUSY;
436 }
437
438 if (s->flags & SLAB_DESTROY_BY_RCU)
439 *need_rcu_barrier = true;
440
441#ifdef CONFIG_MEMCG_KMEM
442 if (!is_root_cache(s)) {
443 struct kmem_cache *root_cache = s->memcg_params->root_cache;
444 int memcg_id = memcg_cache_id(s->memcg_params->memcg);
445
446 BUG_ON(root_cache->memcg_params->memcg_caches[memcg_id] != s);
447 root_cache->memcg_params->memcg_caches[memcg_id] = NULL;
448 }
449#endif
450 list_move(&s->list, release);
451 return 0;
452}
453
454static void do_kmem_cache_release(struct list_head *release,
455 bool need_rcu_barrier)
456{
457 struct kmem_cache *s, *s2;
458
459 if (need_rcu_barrier)
460 rcu_barrier();
461
462 list_for_each_entry_safe(s, s2, release, list) {
463#ifdef SLAB_SUPPORTS_SYSFS
464 sysfs_slab_remove(s);
465#else
466 slab_kmem_cache_release(s);
467#endif
468 }
469}
470
428#ifdef CONFIG_MEMCG_KMEM 471#ifdef CONFIG_MEMCG_KMEM
429/* 472/*
430 * memcg_create_kmem_cache - Create a cache for a memory cgroup. 473 * memcg_create_kmem_cache - Create a cache for a memory cgroup.
431 * @memcg: The memory cgroup the new cache is for. 474 * @memcg: The memory cgroup the new cache is for.
432 * @root_cache: The parent of the new cache. 475 * @root_cache: The parent of the new cache.
433 * @memcg_name: The name of the memory cgroup (used for naming the new cache).
434 * 476 *
435 * This function attempts to create a kmem cache that will serve allocation 477 * This function attempts to create a kmem cache that will serve allocation
436 * requests going from @memcg to @root_cache. The new cache inherits properties 478 * requests going from @memcg to @root_cache. The new cache inherits properties
437 * from its parent. 479 * from its parent.
438 */ 480 */
439struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, 481void memcg_create_kmem_cache(struct mem_cgroup *memcg,
440 struct kmem_cache *root_cache, 482 struct kmem_cache *root_cache)
441 const char *memcg_name)
442{ 483{
484 static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
485 int memcg_id = memcg_cache_id(memcg);
443 struct kmem_cache *s = NULL; 486 struct kmem_cache *s = NULL;
444 char *cache_name; 487 char *cache_name;
445 488
@@ -448,8 +491,18 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
448 491
449 mutex_lock(&slab_mutex); 492 mutex_lock(&slab_mutex);
450 493
494 /*
495 * Since per-memcg caches are created asynchronously on first
496 * allocation (see memcg_kmem_get_cache()), several threads can try to
497 * create the same cache, but only one of them may succeed.
498 */
499 if (cache_from_memcg_idx(root_cache, memcg_id))
500 goto out_unlock;
501
502 cgroup_name(mem_cgroup_css(memcg)->cgroup,
503 memcg_name_buf, sizeof(memcg_name_buf));
451 cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name, 504 cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
452 memcg_cache_id(memcg), memcg_name); 505 memcg_cache_id(memcg), memcg_name_buf);
453 if (!cache_name) 506 if (!cache_name)
454 goto out_unlock; 507 goto out_unlock;
455 508
@@ -457,49 +510,73 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
457 root_cache->size, root_cache->align, 510 root_cache->size, root_cache->align,
458 root_cache->flags, root_cache->ctor, 511 root_cache->flags, root_cache->ctor,
459 memcg, root_cache); 512 memcg, root_cache);
513 /*
514 * If we could not create a memcg cache, do not complain, because
515 * that's not critical at all as we can always proceed with the root
516 * cache.
517 */
460 if (IS_ERR(s)) { 518 if (IS_ERR(s)) {
461 kfree(cache_name); 519 kfree(cache_name);
462 s = NULL; 520 goto out_unlock;
463 } 521 }
464 522
523 /*
524 * Since readers won't lock (see cache_from_memcg_idx()), we need a
525 * barrier here to ensure nobody will see the kmem_cache partially
526 * initialized.
527 */
528 smp_wmb();
529 root_cache->memcg_params->memcg_caches[memcg_id] = s;
530
465out_unlock: 531out_unlock:
466 mutex_unlock(&slab_mutex); 532 mutex_unlock(&slab_mutex);
467 533
468 put_online_mems(); 534 put_online_mems();
469 put_online_cpus(); 535 put_online_cpus();
470
471 return s;
472} 536}
473 537
474static int memcg_cleanup_cache_params(struct kmem_cache *s) 538void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
475{ 539{
476 int rc; 540 LIST_HEAD(release);
541 bool need_rcu_barrier = false;
542 struct kmem_cache *s, *s2;
477 543
478 if (!s->memcg_params || 544 get_online_cpus();
479 !s->memcg_params->is_root_cache) 545 get_online_mems();
480 return 0;
481 546
482 mutex_unlock(&slab_mutex);
483 rc = __memcg_cleanup_cache_params(s);
484 mutex_lock(&slab_mutex); 547 mutex_lock(&slab_mutex);
548 list_for_each_entry_safe(s, s2, &slab_caches, list) {
549 if (is_root_cache(s) || s->memcg_params->memcg != memcg)
550 continue;
551 /*
552 * The cgroup is about to be freed and therefore has no charges
553 * left. Hence, all its caches must be empty by now.
554 */
555 BUG_ON(do_kmem_cache_shutdown(s, &release, &need_rcu_barrier));
556 }
557 mutex_unlock(&slab_mutex);
485 558
486 return rc; 559 put_online_mems();
487} 560 put_online_cpus();
488#else 561
489static int memcg_cleanup_cache_params(struct kmem_cache *s) 562 do_kmem_cache_release(&release, need_rcu_barrier);
490{
491 return 0;
492} 563}
493#endif /* CONFIG_MEMCG_KMEM */ 564#endif /* CONFIG_MEMCG_KMEM */
494 565
495void slab_kmem_cache_release(struct kmem_cache *s) 566void slab_kmem_cache_release(struct kmem_cache *s)
496{ 567{
568 memcg_free_cache_params(s);
497 kfree(s->name); 569 kfree(s->name);
498 kmem_cache_free(kmem_cache, s); 570 kmem_cache_free(kmem_cache, s);
499} 571}
500 572
501void kmem_cache_destroy(struct kmem_cache *s) 573void kmem_cache_destroy(struct kmem_cache *s)
502{ 574{
575 int i;
576 LIST_HEAD(release);
577 bool need_rcu_barrier = false;
578 bool busy = false;
579
503 get_online_cpus(); 580 get_online_cpus();
504 get_online_mems(); 581 get_online_mems();
505 582
@@ -509,35 +586,23 @@ void kmem_cache_destroy(struct kmem_cache *s)
509 if (s->refcount) 586 if (s->refcount)
510 goto out_unlock; 587 goto out_unlock;
511 588
512 if (memcg_cleanup_cache_params(s) != 0) 589 for_each_memcg_cache_index(i) {
513 goto out_unlock; 590 struct kmem_cache *c = cache_from_memcg_idx(s, i);
514 591
515 if (__kmem_cache_shutdown(s) != 0) { 592 if (c && do_kmem_cache_shutdown(c, &release, &need_rcu_barrier))
516 printk(KERN_ERR "kmem_cache_destroy %s: " 593 busy = true;
517 "Slab cache still has objects\n", s->name);
518 dump_stack();
519 goto out_unlock;
520 } 594 }
521 595
522 list_del(&s->list); 596 if (!busy)
523 597 do_kmem_cache_shutdown(s, &release, &need_rcu_barrier);
524 mutex_unlock(&slab_mutex);
525 if (s->flags & SLAB_DESTROY_BY_RCU)
526 rcu_barrier();
527
528 memcg_free_cache_params(s);
529#ifdef SLAB_SUPPORTS_SYSFS
530 sysfs_slab_remove(s);
531#else
532 slab_kmem_cache_release(s);
533#endif
534 goto out;
535 598
536out_unlock: 599out_unlock:
537 mutex_unlock(&slab_mutex); 600 mutex_unlock(&slab_mutex);
538out: 601
539 put_online_mems(); 602 put_online_mems();
540 put_online_cpus(); 603 put_online_cpus();
604
605 do_kmem_cache_release(&release, need_rcu_barrier);
541} 606}
542EXPORT_SYMBOL(kmem_cache_destroy); 607EXPORT_SYMBOL(kmem_cache_destroy);
543 608
diff --git a/mm/slub.c b/mm/slub.c
index fe376fe1f4fe..8b8508adf9c2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2398,13 +2398,24 @@ redo:
2398 * reading from one cpu area. That does not matter as long 2398 * reading from one cpu area. That does not matter as long
2399 * as we end up on the original cpu again when doing the cmpxchg. 2399 * as we end up on the original cpu again when doing the cmpxchg.
2400 * 2400 *
2401 * Preemption is disabled for the retrieval of the tid because that 2401 * We should guarantee that tid and kmem_cache are retrieved on
2402 * must occur from the current processor. We cannot allow rescheduling 2402 * the same cpu. It could be different if CONFIG_PREEMPT so we need
2403 * on a different processor between the determination of the pointer 2403 * to check if it is matched or not.
2404 * and the retrieval of the tid.
2405 */ 2404 */
2406 preempt_disable(); 2405 do {
2407 c = this_cpu_ptr(s->cpu_slab); 2406 tid = this_cpu_read(s->cpu_slab->tid);
2407 c = raw_cpu_ptr(s->cpu_slab);
2408 } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));
2409
2410 /*
2411 * Irqless object alloc/free algorithm used here depends on sequence
2412 * of fetching cpu_slab's data. tid should be fetched before anything
2413 * on c to guarantee that object and page associated with previous tid
2414 * won't be used with current tid. If we fetch tid first, object and
2415 * page could be one associated with next tid and our alloc/free
2416 * request will be failed. In this case, we will retry. So, no problem.
2417 */
2418 barrier();
2408 2419
2409 /* 2420 /*
2410 * The transaction ids are globally unique per cpu and per operation on 2421 * The transaction ids are globally unique per cpu and per operation on
@@ -2412,8 +2423,6 @@ redo:
2412 * occurs on the right processor and that there was no operation on the 2423 * occurs on the right processor and that there was no operation on the
2413 * linked list in between. 2424 * linked list in between.
2414 */ 2425 */
2415 tid = c->tid;
2416 preempt_enable();
2417 2426
2418 object = c->freelist; 2427 object = c->freelist;
2419 page = c->page; 2428 page = c->page;
@@ -2512,7 +2521,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
2512#endif 2521#endif
2513 2522
2514/* 2523/*
2515 * Slow patch handling. This may still be called frequently since objects 2524 * Slow path handling. This may still be called frequently since objects
2516 * have a longer lifetime than the cpu slabs in most processing loads. 2525 * have a longer lifetime than the cpu slabs in most processing loads.
2517 * 2526 *
2518 * So we still attempt to reduce cache line usage. Just take the slab 2527 * So we still attempt to reduce cache line usage. Just take the slab
@@ -2659,11 +2668,13 @@ redo:
2659 * data is retrieved via this pointer. If we are on the same cpu 2668 * data is retrieved via this pointer. If we are on the same cpu
2660 * during the cmpxchg then the free will succedd. 2669 * during the cmpxchg then the free will succedd.
2661 */ 2670 */
2662 preempt_disable(); 2671 do {
2663 c = this_cpu_ptr(s->cpu_slab); 2672 tid = this_cpu_read(s->cpu_slab->tid);
2673 c = raw_cpu_ptr(s->cpu_slab);
2674 } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));
2664 2675
2665 tid = c->tid; 2676 /* Same with comment on barrier() in slab_alloc_node() */
2666 preempt_enable(); 2677 barrier();
2667 2678
2668 if (likely(page == c->page)) { 2679 if (likely(page == c->page)) {
2669 set_freepointer(s, object, c->freelist); 2680 set_freepointer(s, object, c->freelist);
diff --git a/mm/swap.c b/mm/swap.c
index 8a12b33936b4..5b3087228b99 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1140,10 +1140,8 @@ void __init swap_setup(void)
1140 1140
1141 if (bdi_init(swapper_spaces[0].backing_dev_info)) 1141 if (bdi_init(swapper_spaces[0].backing_dev_info))
1142 panic("Failed to init swap bdi"); 1142 panic("Failed to init swap bdi");
1143 for (i = 0; i < MAX_SWAPFILES; i++) { 1143 for (i = 0; i < MAX_SWAPFILES; i++)
1144 spin_lock_init(&swapper_spaces[i].tree_lock); 1144 spin_lock_init(&swapper_spaces[i].tree_lock);
1145 INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
1146 }
1147#endif 1145#endif
1148 1146
1149 /* Use a smaller cluster for small-memory machines */ 1147 /* Use a smaller cluster for small-memory machines */
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 1284f89fca08..9943e5fd74e6 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -17,6 +17,9 @@
17#include <linux/cpu.h> 17#include <linux/cpu.h>
18#include <linux/cpumask.h> 18#include <linux/cpumask.h>
19#include <linux/vmstat.h> 19#include <linux/vmstat.h>
20#include <linux/proc_fs.h>
21#include <linux/seq_file.h>
22#include <linux/debugfs.h>
20#include <linux/sched.h> 23#include <linux/sched.h>
21#include <linux/math64.h> 24#include <linux/math64.h>
22#include <linux/writeback.h> 25#include <linux/writeback.h>
@@ -670,66 +673,6 @@ int fragmentation_index(struct zone *zone, unsigned int order)
670} 673}
671#endif 674#endif
672 675
673#if defined(CONFIG_PROC_FS) || defined(CONFIG_COMPACTION)
674#include <linux/proc_fs.h>
675#include <linux/seq_file.h>
676
677static char * const migratetype_names[MIGRATE_TYPES] = {
678 "Unmovable",
679 "Reclaimable",
680 "Movable",
681 "Reserve",
682#ifdef CONFIG_CMA
683 "CMA",
684#endif
685#ifdef CONFIG_MEMORY_ISOLATION
686 "Isolate",
687#endif
688};
689
690static void *frag_start(struct seq_file *m, loff_t *pos)
691{
692 pg_data_t *pgdat;
693 loff_t node = *pos;
694 for (pgdat = first_online_pgdat();
695 pgdat && node;
696 pgdat = next_online_pgdat(pgdat))
697 --node;
698
699 return pgdat;
700}
701
702static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
703{
704 pg_data_t *pgdat = (pg_data_t *)arg;
705
706 (*pos)++;
707 return next_online_pgdat(pgdat);
708}
709
710static void frag_stop(struct seq_file *m, void *arg)
711{
712}
713
714/* Walk all the zones in a node and print using a callback */
715static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
716 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
717{
718 struct zone *zone;
719 struct zone *node_zones = pgdat->node_zones;
720 unsigned long flags;
721
722 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
723 if (!populated_zone(zone))
724 continue;
725
726 spin_lock_irqsave(&zone->lock, flags);
727 print(m, pgdat, zone);
728 spin_unlock_irqrestore(&zone->lock, flags);
729 }
730}
731#endif
732
733#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA) 676#if defined(CONFIG_PROC_FS) || defined(CONFIG_SYSFS) || defined(CONFIG_NUMA)
734#ifdef CONFIG_ZONE_DMA 677#ifdef CONFIG_ZONE_DMA
735#define TEXT_FOR_DMA(xx) xx "_dma", 678#define TEXT_FOR_DMA(xx) xx "_dma",
@@ -907,7 +850,66 @@ const char * const vmstat_text[] = {
907#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ 850#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
908 851
909 852
853#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \
854 defined(CONFIG_PROC_FS)
855static void *frag_start(struct seq_file *m, loff_t *pos)
856{
857 pg_data_t *pgdat;
858 loff_t node = *pos;
859
860 for (pgdat = first_online_pgdat();
861 pgdat && node;
862 pgdat = next_online_pgdat(pgdat))
863 --node;
864
865 return pgdat;
866}
867
868static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
869{
870 pg_data_t *pgdat = (pg_data_t *)arg;
871
872 (*pos)++;
873 return next_online_pgdat(pgdat);
874}
875
876static void frag_stop(struct seq_file *m, void *arg)
877{
878}
879
880/* Walk all the zones in a node and print using a callback */
881static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
882 void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
883{
884 struct zone *zone;
885 struct zone *node_zones = pgdat->node_zones;
886 unsigned long flags;
887
888 for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
889 if (!populated_zone(zone))
890 continue;
891
892 spin_lock_irqsave(&zone->lock, flags);
893 print(m, pgdat, zone);
894 spin_unlock_irqrestore(&zone->lock, flags);
895 }
896}
897#endif
898
910#ifdef CONFIG_PROC_FS 899#ifdef CONFIG_PROC_FS
900static char * const migratetype_names[MIGRATE_TYPES] = {
901 "Unmovable",
902 "Reclaimable",
903 "Movable",
904 "Reserve",
905#ifdef CONFIG_CMA
906 "CMA",
907#endif
908#ifdef CONFIG_MEMORY_ISOLATION
909 "Isolate",
910#endif
911};
912
911static void frag_show_print(struct seq_file *m, pg_data_t *pgdat, 913static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
912 struct zone *zone) 914 struct zone *zone)
913{ 915{
@@ -1536,8 +1538,6 @@ static int __init setup_vmstat(void)
1536module_init(setup_vmstat) 1538module_init(setup_vmstat)
1537 1539
1538#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) 1540#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)
1539#include <linux/debugfs.h>
1540
1541 1541
1542/* 1542/*
1543 * Return an index indicating how much of the available free memory is 1543 * Return an index indicating how much of the available free memory is