aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSteve French <sfrench@us.ibm.com>2008-02-06 11:04:00 -0500
committerSteve French <sfrench@us.ibm.com>2008-02-06 11:04:00 -0500
commitf315ccb3e679f271583f2a4f463ad9b65665b751 (patch)
tree44eb52102587d7b0bb592464cef6ec04bcac8b90 /fs
parentead03e30b050d6dda769e7e9b071c5fa720bf8d2 (diff)
parent551e4fb2465b87de9d4aa1669b27d624435443bb (diff)
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig28
-rw-r--r--fs/buffer.c62
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/compat.c32
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/ecryptfs/mmap.c5
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c16
-rw-r--r--fs/ext3/inode.c4
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/fs-writeback.c31
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/ops_address.c2
-rw-r--r--fs/hostfs/hostfs_user.c8
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/libfs.c11
-rw-r--r--fs/mpage.c7
-rw-r--r--fs/nfs/read.c10
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/nfsd/auth.c10
-rw-r--r--fs/ntfs/aops.c20
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/file.c32
-rw-r--r--fs/ntfs/malloc.h3
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.c6
-rw-r--r--fs/proc/array.c21
-rw-r--r--fs/proc/base.c51
-rw-r--r--fs/proc/internal.h8
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/proc_misc.c136
-rw-r--r--fs/proc/task_mmu.c676
-rw-r--r--fs/reiserfs/inode.c4
-rw-r--r--fs/smbfs/inode.c7
-rw-r--r--fs/timerfd.c207
-rw-r--r--fs/xattr.c45
-rw-r--r--fs/xfs/linux-2.6/kmem.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c2
39 files changed, 943 insertions, 533 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 987b5d7cb21a..ea5b35947623 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1152,7 +1152,7 @@ config BEFS_DEBUG
1152 depends on BEFS_FS 1152 depends on BEFS_FS
1153 help 1153 help
1154 If you say Y here, you can use the 'debug' mount option to enable 1154 If you say Y here, you can use the 'debug' mount option to enable
1155 debugging output from the driver. 1155 debugging output from the driver.
1156 1156
1157config BFS_FS 1157config BFS_FS
1158 tristate "BFS file system support (EXPERIMENTAL)" 1158 tristate "BFS file system support (EXPERIMENTAL)"
@@ -1263,7 +1263,7 @@ config JFFS2_FS_XATTR
1263 Extended attributes are name:value pairs associated with inodes by 1263 Extended attributes are name:value pairs associated with inodes by
1264 the kernel or by users (see the attr(5) manual page, or visit 1264 the kernel or by users (see the attr(5) manual page, or visit
1265 <http://acl.bestbits.at/> for details). 1265 <http://acl.bestbits.at/> for details).
1266 1266
1267 If unsure, say N. 1267 If unsure, say N.
1268 1268
1269config JFFS2_FS_POSIX_ACL 1269config JFFS2_FS_POSIX_ACL
@@ -1274,10 +1274,10 @@ config JFFS2_FS_POSIX_ACL
1274 help 1274 help
1275 Posix Access Control Lists (ACLs) support permissions for users and 1275 Posix Access Control Lists (ACLs) support permissions for users and
1276 groups beyond the owner/group/world scheme. 1276 groups beyond the owner/group/world scheme.
1277 1277
1278 To learn more about Access Control Lists, visit the Posix ACLs for 1278 To learn more about Access Control Lists, visit the Posix ACLs for
1279 Linux website <http://acl.bestbits.at/>. 1279 Linux website <http://acl.bestbits.at/>.
1280 1280
1281 If you don't know what Access Control Lists are, say N 1281 If you don't know what Access Control Lists are, say N
1282 1282
1283config JFFS2_FS_SECURITY 1283config JFFS2_FS_SECURITY
@@ -1289,7 +1289,7 @@ config JFFS2_FS_SECURITY
1289 implemented by security modules like SELinux. This option 1289 implemented by security modules like SELinux. This option
1290 enables an extended attribute handler for file security 1290 enables an extended attribute handler for file security
1291 labels in the jffs2 filesystem. 1291 labels in the jffs2 filesystem.
1292 1292
1293 If you are not using a security module that requires using 1293 If you are not using a security module that requires using
1294 extended attributes for file security labels, say N. 1294 extended attributes for file security labels, say N.
1295 1295
@@ -1835,7 +1835,7 @@ config RPCSEC_GSS_SPKM3
1835 If unsure, say N. 1835 If unsure, say N.
1836 1836
1837config SMB_FS 1837config SMB_FS
1838 tristate "SMB file system support (to mount Windows shares etc.)" 1838 tristate "SMB file system support (OBSOLETE, please use CIFS)"
1839 depends on INET 1839 depends on INET
1840 select NLS 1840 select NLS
1841 help 1841 help
@@ -1858,8 +1858,8 @@ config SMB_FS
1858 General information about how to connect Linux, Windows machines and 1858 General information about how to connect Linux, Windows machines and
1859 Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>. 1859 Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
1860 1860
1861 To compile the SMB support as a module, choose M here: the module will 1861 To compile the SMB support as a module, choose M here:
1862 be called smbfs. Most people say N, however. 1862 the module will be called smbfs. Most people say N, however.
1863 1863
1864config SMB_NLS_DEFAULT 1864config SMB_NLS_DEFAULT
1865 bool "Use a default NLS" 1865 bool "Use a default NLS"
@@ -1891,7 +1891,7 @@ config SMB_NLS_REMOTE
1891 smbmount from samba 2.2.0 or later supports this. 1891 smbmount from samba 2.2.0 or later supports this.
1892 1892
1893config CIFS 1893config CIFS
1894 tristate "CIFS support (advanced network filesystem for Samba, Window and other CIFS compliant servers)" 1894 tristate "CIFS support (advanced network filesystem, SMBFS successor)"
1895 depends on INET 1895 depends on INET
1896 select NLS 1896 select NLS
1897 help 1897 help
@@ -1949,16 +1949,16 @@ config CIFS_WEAK_PW_HASH
1949 LANMAN based servers such as OS/2 and Windows 95, but such 1949 LANMAN based servers such as OS/2 and Windows 95, but such
1950 mounts may be less secure than mounts using NTLM or more recent 1950 mounts may be less secure than mounts using NTLM or more recent
1951 security mechanisms if you are on a public network. Unless you 1951 security mechanisms if you are on a public network. Unless you
1952 have a need to access old SMB servers (and are on a private 1952 have a need to access old SMB servers (and are on a private
1953 network) you probably want to say N. Even if this support 1953 network) you probably want to say N. Even if this support
1954 is enabled in the kernel build, LANMAN authentication will not be 1954 is enabled in the kernel build, LANMAN authentication will not be
1955 used automatically. At runtime LANMAN mounts are disabled but 1955 used automatically. At runtime LANMAN mounts are disabled but
1956 can be set to required (or optional) either in 1956 can be set to required (or optional) either in
1957 /proc/fs/cifs (see fs/cifs/README for more detail) or via an 1957 /proc/fs/cifs (see fs/cifs/README for more detail) or via an
1958 option on the mount command. This support is disabled by 1958 option on the mount command. This support is disabled by
1959 default in order to reduce the possibility of a downgrade 1959 default in order to reduce the possibility of a downgrade
1960 attack. 1960 attack.
1961 1961
1962 If unsure, say N. 1962 If unsure, say N.
1963 1963
1964config CIFS_XATTR 1964config CIFS_XATTR
@@ -1999,7 +1999,7 @@ config CIFS_DEBUG2
1999 messages in some error paths, slowing performance. This 1999 messages in some error paths, slowing performance. This
2000 option can be turned off unless you are debugging 2000 option can be turned off unless you are debugging
2001 cifs problems. If unsure, say N. 2001 cifs problems. If unsure, say N.
2002 2002
2003config CIFS_EXPERIMENTAL 2003config CIFS_EXPERIMENTAL
2004 bool "CIFS Experimental Features (EXPERIMENTAL)" 2004 bool "CIFS Experimental Features (EXPERIMENTAL)"
2005 depends on CIFS && EXPERIMENTAL 2005 depends on CIFS && EXPERIMENTAL
@@ -2090,7 +2090,7 @@ config CODA_FS_OLD_API
2090 However this new API is not backward compatible with older 2090 However this new API is not backward compatible with older
2091 clients. If you really need to run the old Coda userspace 2091 clients. If you really need to run the old Coda userspace
2092 cache manager then say Y. 2092 cache manager then say Y.
2093 2093
2094 For most cases you probably want to say N. 2094 For most cases you probably want to say N.
2095 2095
2096config AFS_FS 2096config AFS_FS
diff --git a/fs/buffer.c b/fs/buffer.c
index 456c9ab7705b..826baf4f04bc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1798,7 +1798,7 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1798 start = max(from, block_start); 1798 start = max(from, block_start);
1799 size = min(to, block_end) - start; 1799 size = min(to, block_end) - start;
1800 1800
1801 zero_user_page(page, start, size, KM_USER0); 1801 zero_user(page, start, size);
1802 set_buffer_uptodate(bh); 1802 set_buffer_uptodate(bh);
1803 } 1803 }
1804 1804
@@ -1861,19 +1861,10 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1861 mark_buffer_dirty(bh); 1861 mark_buffer_dirty(bh);
1862 continue; 1862 continue;
1863 } 1863 }
1864 if (block_end > to || block_start < from) { 1864 if (block_end > to || block_start < from)
1865 void *kaddr; 1865 zero_user_segments(page,
1866 1866 to, block_end,
1867 kaddr = kmap_atomic(page, KM_USER0); 1867 block_start, from);
1868 if (block_end > to)
1869 memset(kaddr+to, 0,
1870 block_end-to);
1871 if (block_start < from)
1872 memset(kaddr+block_start,
1873 0, from-block_start);
1874 flush_dcache_page(page);
1875 kunmap_atomic(kaddr, KM_USER0);
1876 }
1877 continue; 1868 continue;
1878 } 1869 }
1879 } 1870 }
@@ -2104,8 +2095,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
2104 SetPageError(page); 2095 SetPageError(page);
2105 } 2096 }
2106 if (!buffer_mapped(bh)) { 2097 if (!buffer_mapped(bh)) {
2107 zero_user_page(page, i * blocksize, blocksize, 2098 zero_user(page, i * blocksize, blocksize);
2108 KM_USER0);
2109 if (!err) 2099 if (!err)
2110 set_buffer_uptodate(bh); 2100 set_buffer_uptodate(bh);
2111 continue; 2101 continue;
@@ -2218,7 +2208,7 @@ int cont_expand_zero(struct file *file, struct address_space *mapping,
2218 &page, &fsdata); 2208 &page, &fsdata);
2219 if (err) 2209 if (err)
2220 goto out; 2210 goto out;
2221 zero_user_page(page, zerofrom, len, KM_USER0); 2211 zero_user(page, zerofrom, len);
2222 err = pagecache_write_end(file, mapping, curpos, len, len, 2212 err = pagecache_write_end(file, mapping, curpos, len, len,
2223 page, fsdata); 2213 page, fsdata);
2224 if (err < 0) 2214 if (err < 0)
@@ -2245,7 +2235,7 @@ int cont_expand_zero(struct file *file, struct address_space *mapping,
2245 &page, &fsdata); 2235 &page, &fsdata);
2246 if (err) 2236 if (err)
2247 goto out; 2237 goto out;
2248 zero_user_page(page, zerofrom, len, KM_USER0); 2238 zero_user(page, zerofrom, len);
2249 err = pagecache_write_end(file, mapping, curpos, len, len, 2239 err = pagecache_write_end(file, mapping, curpos, len, len,
2250 page, fsdata); 2240 page, fsdata);
2251 if (err < 0) 2241 if (err < 0)
@@ -2422,7 +2412,6 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
2422 unsigned block_in_page; 2412 unsigned block_in_page;
2423 unsigned block_start, block_end; 2413 unsigned block_start, block_end;
2424 sector_t block_in_file; 2414 sector_t block_in_file;
2425 char *kaddr;
2426 int nr_reads = 0; 2415 int nr_reads = 0;
2427 int ret = 0; 2416 int ret = 0;
2428 int is_mapped_to_disk = 1; 2417 int is_mapped_to_disk = 1;
@@ -2493,13 +2482,8 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
2493 continue; 2482 continue;
2494 } 2483 }
2495 if (buffer_new(bh) || !buffer_mapped(bh)) { 2484 if (buffer_new(bh) || !buffer_mapped(bh)) {
2496 kaddr = kmap_atomic(page, KM_USER0); 2485 zero_user_segments(page, block_start, from,
2497 if (block_start < from) 2486 to, block_end);
2498 memset(kaddr+block_start, 0, from-block_start);
2499 if (block_end > to)
2500 memset(kaddr + to, 0, block_end - to);
2501 flush_dcache_page(page);
2502 kunmap_atomic(kaddr, KM_USER0);
2503 continue; 2487 continue;
2504 } 2488 }
2505 if (buffer_uptodate(bh)) 2489 if (buffer_uptodate(bh))
@@ -2636,7 +2620,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
2636 * the page size, the remaining memory is zeroed when mapped, and 2620 * the page size, the remaining memory is zeroed when mapped, and
2637 * writes to that region are not written out to the file." 2621 * writes to that region are not written out to the file."
2638 */ 2622 */
2639 zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); 2623 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2640out: 2624out:
2641 ret = mpage_writepage(page, get_block, wbc); 2625 ret = mpage_writepage(page, get_block, wbc);
2642 if (ret == -EAGAIN) 2626 if (ret == -EAGAIN)
@@ -2709,7 +2693,7 @@ has_buffers:
2709 if (page_has_buffers(page)) 2693 if (page_has_buffers(page))
2710 goto has_buffers; 2694 goto has_buffers;
2711 } 2695 }
2712 zero_user_page(page, offset, length, KM_USER0); 2696 zero_user(page, offset, length);
2713 set_page_dirty(page); 2697 set_page_dirty(page);
2714 err = 0; 2698 err = 0;
2715 2699
@@ -2785,7 +2769,7 @@ int block_truncate_page(struct address_space *mapping,
2785 goto unlock; 2769 goto unlock;
2786 } 2770 }
2787 2771
2788 zero_user_page(page, offset, length, KM_USER0); 2772 zero_user(page, offset, length);
2789 mark_buffer_dirty(bh); 2773 mark_buffer_dirty(bh);
2790 err = 0; 2774 err = 0;
2791 2775
@@ -2831,7 +2815,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
2831 * the page size, the remaining memory is zeroed when mapped, and 2815 * the page size, the remaining memory is zeroed when mapped, and
2832 * writes to that region are not written out to the file." 2816 * writes to that region are not written out to the file."
2833 */ 2817 */
2834 zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); 2818 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2835 return __block_write_full_page(inode, page, get_block, wbc); 2819 return __block_write_full_page(inode, page, get_block, wbc);
2836} 2820}
2837 2821
@@ -3169,7 +3153,7 @@ static void recalc_bh_state(void)
3169 3153
3170struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) 3154struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3171{ 3155{
3172 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, 3156 struct buffer_head *ret = kmem_cache_alloc(bh_cachep,
3173 set_migrateflags(gfp_flags, __GFP_RECLAIMABLE)); 3157 set_migrateflags(gfp_flags, __GFP_RECLAIMABLE));
3174 if (ret) { 3158 if (ret) {
3175 INIT_LIST_HEAD(&ret->b_assoc_buffers); 3159 INIT_LIST_HEAD(&ret->b_assoc_buffers);
@@ -3257,12 +3241,24 @@ int bh_submit_read(struct buffer_head *bh)
3257} 3241}
3258EXPORT_SYMBOL(bh_submit_read); 3242EXPORT_SYMBOL(bh_submit_read);
3259 3243
3244static void
3245init_buffer_head(struct kmem_cache *cachep, void *data)
3246{
3247 struct buffer_head *bh = data;
3248
3249 memset(bh, 0, sizeof(*bh));
3250 INIT_LIST_HEAD(&bh->b_assoc_buffers);
3251}
3252
3260void __init buffer_init(void) 3253void __init buffer_init(void)
3261{ 3254{
3262 int nrpages; 3255 int nrpages;
3263 3256
3264 bh_cachep = KMEM_CACHE(buffer_head, 3257 bh_cachep = kmem_cache_create("buffer_head",
3265 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); 3258 sizeof(struct buffer_head), 0,
3259 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
3260 SLAB_MEM_SPREAD),
3261 init_buffer_head);
3266 3262
3267 /* 3263 /*
3268 * Limit the bh occupancy to 10% of ZONE_NORMAL 3264 * Limit the bh occupancy to 10% of ZONE_NORMAL
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index d9567ba2960b..47f2621001e4 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1386,7 +1386,7 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
1386 if (!page) 1386 if (!page)
1387 return -ENOMEM; 1387 return -ENOMEM;
1388 1388
1389 zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); 1389 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
1390 unlock_page(page); 1390 unlock_page(page);
1391 page_cache_release(page); 1391 page_cache_release(page);
1392 return rc; 1392 return rc;
diff --git a/fs/compat.c b/fs/compat.c
index 5216c3fd7517..69baca5ad608 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -2206,19 +2206,41 @@ asmlinkage long compat_sys_signalfd(int ufd,
2206 2206
2207#ifdef CONFIG_TIMERFD 2207#ifdef CONFIG_TIMERFD
2208 2208
2209asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags, 2209asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
2210 const struct compat_itimerspec __user *utmr) 2210 const struct compat_itimerspec __user *utmr,
2211 struct compat_itimerspec __user *otmr)
2211{ 2212{
2213 int error;
2212 struct itimerspec t; 2214 struct itimerspec t;
2213 struct itimerspec __user *ut; 2215 struct itimerspec __user *ut;
2214 2216
2215 if (get_compat_itimerspec(&t, utmr)) 2217 if (get_compat_itimerspec(&t, utmr))
2216 return -EFAULT; 2218 return -EFAULT;
2217 ut = compat_alloc_user_space(sizeof(*ut)); 2219 ut = compat_alloc_user_space(2 * sizeof(struct itimerspec));
2218 if (copy_to_user(ut, &t, sizeof(t))) 2220 if (copy_to_user(&ut[0], &t, sizeof(t)))
2219 return -EFAULT; 2221 return -EFAULT;
2222 error = sys_timerfd_settime(ufd, flags, &ut[0], &ut[1]);
2223 if (!error && otmr)
2224 error = (copy_from_user(&t, &ut[1], sizeof(struct itimerspec)) ||
2225 put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
2226
2227 return error;
2228}
2229
2230asmlinkage long compat_sys_timerfd_gettime(int ufd,
2231 struct compat_itimerspec __user *otmr)
2232{
2233 int error;
2234 struct itimerspec t;
2235 struct itimerspec __user *ut;
2220 2236
2221 return sys_timerfd(ufd, clockid, flags, ut); 2237 ut = compat_alloc_user_space(sizeof(struct itimerspec));
2238 error = sys_timerfd_gettime(ufd, ut);
2239 if (!error)
2240 error = (copy_from_user(&t, ut, sizeof(struct itimerspec)) ||
2241 put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0;
2242
2243 return error;
2222} 2244}
2223 2245
2224#endif /* CONFIG_TIMERFD */ 2246#endif /* CONFIG_TIMERFD */
diff --git a/fs/direct-io.c b/fs/direct-io.c
index acf0da1bd257..9e81addbd6ea 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -878,8 +878,8 @@ do_holes:
878 page_cache_release(page); 878 page_cache_release(page);
879 goto out; 879 goto out;
880 } 880 }
881 zero_user_page(page, block_in_page << blkbits, 881 zero_user(page, block_in_page << blkbits,
882 1 << blkbits, KM_USER0); 882 1 << blkbits);
883 dio->block_in_file++; 883 dio->block_in_file++;
884 block_in_page++; 884 block_in_page++;
885 goto next_block; 885 goto next_block;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 32c5711d79a3..0535412d8c64 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -257,8 +257,7 @@ static int fill_zeros_to_end_of_page(struct page *page, unsigned int to)
257 end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE; 257 end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE;
258 if (to > end_byte_in_page) 258 if (to > end_byte_in_page)
259 end_byte_in_page = to; 259 end_byte_in_page = to;
260 zero_user_page(page, end_byte_in_page, 260 zero_user_segment(page, end_byte_in_page, PAGE_CACHE_SIZE);
261 PAGE_CACHE_SIZE - end_byte_in_page, KM_USER0);
262out: 261out:
263 return 0; 262 return 0;
264} 263}
@@ -307,7 +306,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
307 */ 306 */
308 if ((i_size_read(page->mapping->host) == prev_page_end_size) && 307 if ((i_size_read(page->mapping->host) == prev_page_end_size) &&
309 (from != 0)) { 308 (from != 0)) {
310 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); 309 zero_user(page, 0, PAGE_CACHE_SIZE);
311 } 310 }
312out: 311out:
313 return rc; 312 return rc;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 81c04abfb1aa..a415f42d32cf 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -353,7 +353,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
353 spin_unlock_irqrestore(&psw->lock, flags); 353 spin_unlock_irqrestore(&psw->lock, flags);
354 354
355 /* Do really wake up now */ 355 /* Do really wake up now */
356 wake_up(wq); 356 wake_up_nested(wq, 1 + wake_nests);
357 357
358 /* Remove the current task from the list */ 358 /* Remove the current task from the list */
359 spin_lock_irqsave(&psw->lock, flags); 359 spin_lock_irqsave(&psw->lock, flags);
diff --git a/fs/exec.c b/fs/exec.c
index 282240afe99e..be923e4bc389 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -760,7 +760,7 @@ static int de_thread(struct task_struct *tsk)
760 */ 760 */
761 read_lock(&tasklist_lock); 761 read_lock(&tasklist_lock);
762 spin_lock_irq(lock); 762 spin_lock_irq(lock);
763 if (sig->flags & SIGNAL_GROUP_EXIT) { 763 if (signal_group_exit(sig)) {
764 /* 764 /*
765 * Another group action in progress, just 765 * Another group action in progress, just
766 * return so that the signal is processed. 766 * return so that the signal is processed.
@@ -778,6 +778,7 @@ static int de_thread(struct task_struct *tsk)
778 if (unlikely(tsk->group_leader == task_child_reaper(tsk))) 778 if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
779 task_active_pid_ns(tsk)->child_reaper = tsk; 779 task_active_pid_ns(tsk)->child_reaper = tsk;
780 780
781 sig->group_exit_task = tsk;
781 zap_other_threads(tsk); 782 zap_other_threads(tsk);
782 read_unlock(&tasklist_lock); 783 read_unlock(&tasklist_lock);
783 784
@@ -802,7 +803,6 @@ static int de_thread(struct task_struct *tsk)
802 } 803 }
803 804
804 sig->notify_count = count; 805 sig->notify_count = count;
805 sig->group_exit_task = tsk;
806 while (atomic_read(&sig->count) > count) { 806 while (atomic_read(&sig->count) > count) {
807 __set_current_state(TASK_UNINTERRUPTIBLE); 807 __set_current_state(TASK_UNINTERRUPTIBLE);
808 spin_unlock_irq(lock); 808 spin_unlock_irq(lock);
@@ -871,15 +871,10 @@ static int de_thread(struct task_struct *tsk)
871 leader->exit_state = EXIT_DEAD; 871 leader->exit_state = EXIT_DEAD;
872 872
873 write_unlock_irq(&tasklist_lock); 873 write_unlock_irq(&tasklist_lock);
874 } 874 }
875 875
876 sig->group_exit_task = NULL; 876 sig->group_exit_task = NULL;
877 sig->notify_count = 0; 877 sig->notify_count = 0;
878 /*
879 * There may be one thread left which is just exiting,
880 * but it's safe to stop telling the group to kill themselves.
881 */
882 sig->flags = 0;
883 878
884no_thread_group: 879no_thread_group:
885 exit_itimers(sig); 880 exit_itimers(sig);
@@ -947,12 +942,13 @@ static void flush_old_files(struct files_struct * files)
947 spin_unlock(&files->file_lock); 942 spin_unlock(&files->file_lock);
948} 943}
949 944
950void get_task_comm(char *buf, struct task_struct *tsk) 945char *get_task_comm(char *buf, struct task_struct *tsk)
951{ 946{
952 /* buf must be at least sizeof(tsk->comm) in size */ 947 /* buf must be at least sizeof(tsk->comm) in size */
953 task_lock(tsk); 948 task_lock(tsk);
954 strncpy(buf, tsk->comm, sizeof(tsk->comm)); 949 strncpy(buf, tsk->comm, sizeof(tsk->comm));
955 task_unlock(tsk); 950 task_unlock(tsk);
951 return buf;
956} 952}
957 953
958void set_task_comm(struct task_struct *tsk, char *buf) 954void set_task_comm(struct task_struct *tsk, char *buf)
@@ -1548,7 +1544,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1548 int err = -EAGAIN; 1544 int err = -EAGAIN;
1549 1545
1550 spin_lock_irq(&tsk->sighand->siglock); 1546 spin_lock_irq(&tsk->sighand->siglock);
1551 if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) { 1547 if (!signal_group_exit(tsk->signal)) {
1552 tsk->signal->group_exit_code = exit_code; 1548 tsk->signal->group_exit_code = exit_code;
1553 zap_process(tsk); 1549 zap_process(tsk);
1554 err = 0; 1550 err = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 9b162cd6c16c..077535439288 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1845,7 +1845,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
1845 */ 1845 */
1846 if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && 1846 if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
1847 ext3_should_writeback_data(inode) && PageUptodate(page)) { 1847 ext3_should_writeback_data(inode) && PageUptodate(page)) {
1848 zero_user_page(page, offset, length, KM_USER0); 1848 zero_user(page, offset, length);
1849 set_page_dirty(page); 1849 set_page_dirty(page);
1850 goto unlock; 1850 goto unlock;
1851 } 1851 }
@@ -1898,7 +1898,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
1898 goto unlock; 1898 goto unlock;
1899 } 1899 }
1900 1900
1901 zero_user_page(page, offset, length, KM_USER0); 1901 zero_user(page, offset, length);
1902 BUFFER_TRACE(bh, "zeroed end of block"); 1902 BUFFER_TRACE(bh, "zeroed end of block");
1903 1903
1904 err = 0; 1904 err = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index bb717cbb749c..05c4145dd27d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1840,7 +1840,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
1840 */ 1840 */
1841 if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && 1841 if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
1842 ext4_should_writeback_data(inode) && PageUptodate(page)) { 1842 ext4_should_writeback_data(inode) && PageUptodate(page)) {
1843 zero_user_page(page, offset, length, KM_USER0); 1843 zero_user(page, offset, length);
1844 set_page_dirty(page); 1844 set_page_dirty(page);
1845 goto unlock; 1845 goto unlock;
1846 } 1846 }
@@ -1893,7 +1893,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
1893 goto unlock; 1893 goto unlock;
1894 } 1894 }
1895 1895
1896 zero_user_page(page, offset, length, KM_USER0); 1896 zero_user(page, offset, length);
1897 1897
1898 BUFFER_TRACE(bh, "zeroed end of block"); 1898 BUFFER_TRACE(bh, "zeroed end of block");
1899 1899
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 300324bd563c..0b3064079fa5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -284,7 +284,17 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
284 * soon as the queue becomes uncongested. 284 * soon as the queue becomes uncongested.
285 */ 285 */
286 inode->i_state |= I_DIRTY_PAGES; 286 inode->i_state |= I_DIRTY_PAGES;
287 requeue_io(inode); 287 if (wbc->nr_to_write <= 0) {
288 /*
289 * slice used up: queue for next turn
290 */
291 requeue_io(inode);
292 } else {
293 /*
294 * somehow blocked: retry later
295 */
296 redirty_tail(inode);
297 }
288 } else { 298 } else {
289 /* 299 /*
290 * Otherwise fully redirty the inode so that 300 * Otherwise fully redirty the inode so that
@@ -334,9 +344,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
334 WARN_ON(inode->i_state & I_WILL_FREE); 344 WARN_ON(inode->i_state & I_WILL_FREE);
335 345
336 if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { 346 if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {
337 struct address_space *mapping = inode->i_mapping;
338 int ret;
339
340 /* 347 /*
341 * We're skipping this inode because it's locked, and we're not 348 * We're skipping this inode because it's locked, and we're not
342 * doing writeback-for-data-integrity. Move it to s_more_io so 349 * doing writeback-for-data-integrity. Move it to s_more_io so
@@ -345,15 +352,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
345 * completed a full scan of s_io. 352 * completed a full scan of s_io.
346 */ 353 */
347 requeue_io(inode); 354 requeue_io(inode);
348 355 return 0;
349 /*
350 * Even if we don't actually write the inode itself here,
351 * we can at least start some of the data writeout..
352 */
353 spin_unlock(&inode_lock);
354 ret = do_writepages(mapping, wbc);
355 spin_lock(&inode_lock);
356 return ret;
357 } 356 }
358 357
359 /* 358 /*
@@ -479,8 +478,12 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
479 iput(inode); 478 iput(inode);
480 cond_resched(); 479 cond_resched();
481 spin_lock(&inode_lock); 480 spin_lock(&inode_lock);
482 if (wbc->nr_to_write <= 0) 481 if (wbc->nr_to_write <= 0) {
482 wbc->more_io = 1;
483 break; 483 break;
484 }
485 if (!list_empty(&sb->s_more_io))
486 wbc->more_io = 1;
484 } 487 }
485 return; /* Leave any unwritten inodes on s_io */ 488 return; /* Leave any unwritten inodes on s_io */
486} 489}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e4effc47abfc..e9456ebd3bb6 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -932,7 +932,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
932 if (!gfs2_is_writeback(ip)) 932 if (!gfs2_is_writeback(ip))
933 gfs2_trans_add_bh(ip->i_gl, bh, 0); 933 gfs2_trans_add_bh(ip->i_gl, bh, 0);
934 934
935 zero_user_page(page, offset, length, KM_USER0); 935 zero_user(page, offset, length);
936 936
937unlock: 937unlock:
938 unlock_page(page); 938 unlock_page(page);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 38dbe99a30ed..ac772b6d9dbb 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -446,7 +446,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
446 * so we need to supply one here. It doesn't happen often. 446 * so we need to supply one here. It doesn't happen often.
447 */ 447 */
448 if (unlikely(page->index)) { 448 if (unlikely(page->index)) {
449 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); 449 zero_user(page, 0, PAGE_CACHE_SIZE);
450 return 0; 450 return 0;
451 } 451 }
452 452
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 35c1a9f33f47..53fd0a67c11a 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -285,17 +285,17 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
285 return err; 285 return err;
286 286
287 times[0].tv_sec = atime_ts.tv_sec; 287 times[0].tv_sec = atime_ts.tv_sec;
288 times[0].tv_usec = atime_ts.tv_nsec * 1000; 288 times[0].tv_usec = atime_ts.tv_nsec / 1000;
289 times[1].tv_sec = mtime_ts.tv_sec; 289 times[1].tv_sec = mtime_ts.tv_sec;
290 times[1].tv_usec = mtime_ts.tv_nsec * 1000; 290 times[1].tv_usec = mtime_ts.tv_nsec / 1000;
291 291
292 if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) { 292 if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) {
293 times[0].tv_sec = attrs->ia_atime.tv_sec; 293 times[0].tv_sec = attrs->ia_atime.tv_sec;
294 times[0].tv_usec = attrs->ia_atime.tv_nsec * 1000; 294 times[0].tv_usec = attrs->ia_atime.tv_nsec / 1000;
295 } 295 }
296 if (attrs->ia_valid & HOSTFS_ATTR_MTIME_SET) { 296 if (attrs->ia_valid & HOSTFS_ATTR_MTIME_SET) {
297 times[1].tv_sec = attrs->ia_mtime.tv_sec; 297 times[1].tv_sec = attrs->ia_mtime.tv_sec;
298 times[1].tv_usec = attrs->ia_mtime.tv_nsec * 1000; 298 times[1].tv_usec = attrs->ia_mtime.tv_nsec / 1000;
299 } 299 }
300 300
301 if (fd >= 0) { 301 if (fd >= 0) {
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 09ee07f02663..3b3cc28cdefc 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -768,7 +768,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
768 case Opt_mode: 768 case Opt_mode:
769 if (match_octal(&args[0], &option)) 769 if (match_octal(&args[0], &option))
770 goto bad_val; 770 goto bad_val;
771 pconfig->mode = option & 0777U; 771 pconfig->mode = option & 01777U;
772 break; 772 break;
773 773
774 case Opt_size: { 774 case Opt_size: {
diff --git a/fs/libfs.c b/fs/libfs.c
index 6e68b700958d..5523bde96387 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -341,13 +341,10 @@ int simple_prepare_write(struct file *file, struct page *page,
341 unsigned from, unsigned to) 341 unsigned from, unsigned to)
342{ 342{
343 if (!PageUptodate(page)) { 343 if (!PageUptodate(page)) {
344 if (to - from != PAGE_CACHE_SIZE) { 344 if (to - from != PAGE_CACHE_SIZE)
345 void *kaddr = kmap_atomic(page, KM_USER0); 345 zero_user_segments(page,
346 memset(kaddr, 0, from); 346 0, from,
347 memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); 347 to, PAGE_CACHE_SIZE);
348 flush_dcache_page(page);
349 kunmap_atomic(kaddr, KM_USER0);
350 }
351 } 348 }
352 return 0; 349 return 0;
353} 350}
diff --git a/fs/mpage.c b/fs/mpage.c
index d54f8f897224..5df564366f36 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -276,9 +276,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
276 } 276 }
277 277
278 if (first_hole != blocks_per_page) { 278 if (first_hole != blocks_per_page) {
279 zero_user_page(page, first_hole << blkbits, 279 zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE);
280 PAGE_CACHE_SIZE - (first_hole << blkbits),
281 KM_USER0);
282 if (first_hole == 0) { 280 if (first_hole == 0) {
283 SetPageUptodate(page); 281 SetPageUptodate(page);
284 unlock_page(page); 282 unlock_page(page);
@@ -571,8 +569,7 @@ page_is_mapped:
571 569
572 if (page->index > end_index || !offset) 570 if (page->index > end_index || !offset)
573 goto confused; 571 goto confused;
574 zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, 572 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
575 KM_USER0);
576 } 573 }
577 574
578 /* 575 /*
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 8fd6dfbe1bc3..3d7d9631e125 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -79,7 +79,7 @@ void nfs_readdata_release(void *data)
79static 79static
80int nfs_return_empty_page(struct page *page) 80int nfs_return_empty_page(struct page *page)
81{ 81{
82 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); 82 zero_user(page, 0, PAGE_CACHE_SIZE);
83 SetPageUptodate(page); 83 SetPageUptodate(page);
84 unlock_page(page); 84 unlock_page(page);
85 return 0; 85 return 0;
@@ -103,10 +103,10 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
103 pglen = PAGE_CACHE_SIZE - base; 103 pglen = PAGE_CACHE_SIZE - base;
104 for (;;) { 104 for (;;) {
105 if (remainder <= pglen) { 105 if (remainder <= pglen) {
106 zero_user_page(*pages, base, remainder, KM_USER0); 106 zero_user(*pages, base, remainder);
107 break; 107 break;
108 } 108 }
109 zero_user_page(*pages, base, pglen, KM_USER0); 109 zero_user(*pages, base, pglen);
110 pages++; 110 pages++;
111 remainder -= pglen; 111 remainder -= pglen;
112 pglen = PAGE_CACHE_SIZE; 112 pglen = PAGE_CACHE_SIZE;
@@ -130,7 +130,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
130 return PTR_ERR(new); 130 return PTR_ERR(new);
131 } 131 }
132 if (len < PAGE_CACHE_SIZE) 132 if (len < PAGE_CACHE_SIZE)
133 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); 133 zero_user_segment(page, len, PAGE_CACHE_SIZE);
134 134
135 nfs_list_add_request(new, &one_request); 135 nfs_list_add_request(new, &one_request);
136 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 136 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
@@ -532,7 +532,7 @@ readpage_async_filler(void *data, struct page *page)
532 goto out_error; 532 goto out_error;
533 533
534 if (len < PAGE_CACHE_SIZE) 534 if (len < PAGE_CACHE_SIZE)
535 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); 535 zero_user_segment(page, len, PAGE_CACHE_SIZE);
536 nfs_pageio_add_request(desc->pgio, new); 536 nfs_pageio_add_request(desc->pgio, new);
537 return 0; 537 return 0;
538out_error: 538out_error:
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 522efff3e2c5..b144b1957dd9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -665,9 +665,7 @@ zero_page:
665 * then we need to zero any uninitalised data. */ 665 * then we need to zero any uninitalised data. */
666 if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE 666 if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE
667 && !PageUptodate(req->wb_page)) 667 && !PageUptodate(req->wb_page))
668 zero_user_page(req->wb_page, req->wb_bytes, 668 zero_user_segment(req->wb_page, req->wb_bytes, PAGE_CACHE_SIZE);
669 PAGE_CACHE_SIZE - req->wb_bytes,
670 KM_USER0);
671 return req; 669 return req;
672} 670}
673 671
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 21928056e35e..d13403e33622 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -11,8 +11,6 @@
11#include <linux/nfsd/nfsd.h> 11#include <linux/nfsd/nfsd.h>
12#include <linux/nfsd/export.h> 12#include <linux/nfsd/export.h>
13 13
14#define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE))
15
16int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) 14int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
17{ 15{
18 struct exp_flavor_info *f; 16 struct exp_flavor_info *f;
@@ -69,10 +67,12 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
69 ret = set_current_groups(cred.cr_group_info); 67 ret = set_current_groups(cred.cr_group_info);
70 put_group_info(cred.cr_group_info); 68 put_group_info(cred.cr_group_info);
71 if ((cred.cr_uid)) { 69 if ((cred.cr_uid)) {
72 cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; 70 current->cap_effective =
71 cap_drop_nfsd_set(current->cap_effective);
73 } else { 72 } else {
74 cap_t(current->cap_effective) |= (CAP_NFSD_MASK & 73 current->cap_effective =
75 current->cap_permitted); 74 cap_raise_nfsd_set(current->cap_effective,
75 current->cap_permitted);
76 } 76 }
77 return ret; 77 return ret;
78} 78}
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index ad87cb01299b..00e9ccde8e42 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -87,13 +87,17 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
87 /* Check for the current buffer head overflowing. */ 87 /* Check for the current buffer head overflowing. */
88 if (unlikely(file_ofs + bh->b_size > init_size)) { 88 if (unlikely(file_ofs + bh->b_size > init_size)) {
89 int ofs; 89 int ofs;
90 void *kaddr;
90 91
91 ofs = 0; 92 ofs = 0;
92 if (file_ofs < init_size) 93 if (file_ofs < init_size)
93 ofs = init_size - file_ofs; 94 ofs = init_size - file_ofs;
94 local_irq_save(flags); 95 local_irq_save(flags);
95 zero_user_page(page, bh_offset(bh) + ofs, 96 kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
96 bh->b_size - ofs, KM_BIO_SRC_IRQ); 97 memset(kaddr + bh_offset(bh) + ofs, 0,
98 bh->b_size - ofs);
99 flush_dcache_page(page);
100 kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
97 local_irq_restore(flags); 101 local_irq_restore(flags);
98 } 102 }
99 } else { 103 } else {
@@ -334,7 +338,7 @@ handle_hole:
334 bh->b_blocknr = -1UL; 338 bh->b_blocknr = -1UL;
335 clear_buffer_mapped(bh); 339 clear_buffer_mapped(bh);
336handle_zblock: 340handle_zblock:
337 zero_user_page(page, i * blocksize, blocksize, KM_USER0); 341 zero_user(page, i * blocksize, blocksize);
338 if (likely(!err)) 342 if (likely(!err))
339 set_buffer_uptodate(bh); 343 set_buffer_uptodate(bh);
340 } while (i++, iblock++, (bh = bh->b_this_page) != head); 344 } while (i++, iblock++, (bh = bh->b_this_page) != head);
@@ -410,7 +414,7 @@ retry_readpage:
410 /* Is the page fully outside i_size? (truncate in progress) */ 414 /* Is the page fully outside i_size? (truncate in progress) */
411 if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> 415 if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
412 PAGE_CACHE_SHIFT)) { 416 PAGE_CACHE_SHIFT)) {
413 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); 417 zero_user(page, 0, PAGE_CACHE_SIZE);
414 ntfs_debug("Read outside i_size - truncated?"); 418 ntfs_debug("Read outside i_size - truncated?");
415 goto done; 419 goto done;
416 } 420 }
@@ -459,7 +463,7 @@ retry_readpage:
459 * ok to ignore the compressed flag here. 463 * ok to ignore the compressed flag here.
460 */ 464 */
461 if (unlikely(page->index > 0)) { 465 if (unlikely(page->index > 0)) {
462 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); 466 zero_user(page, 0, PAGE_CACHE_SIZE);
463 goto done; 467 goto done;
464 } 468 }
465 if (!NInoAttr(ni)) 469 if (!NInoAttr(ni))
@@ -788,8 +792,7 @@ lock_retry_remap:
788 if (err == -ENOENT || lcn == LCN_ENOENT) { 792 if (err == -ENOENT || lcn == LCN_ENOENT) {
789 bh->b_blocknr = -1; 793 bh->b_blocknr = -1;
790 clear_buffer_dirty(bh); 794 clear_buffer_dirty(bh);
791 zero_user_page(page, bh_offset(bh), blocksize, 795 zero_user(page, bh_offset(bh), blocksize);
792 KM_USER0);
793 set_buffer_uptodate(bh); 796 set_buffer_uptodate(bh);
794 err = 0; 797 err = 0;
795 continue; 798 continue;
@@ -1414,8 +1417,7 @@ retry_writepage:
1414 if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { 1417 if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
1415 /* The page straddles i_size. */ 1418 /* The page straddles i_size. */
1416 unsigned int ofs = i_size & ~PAGE_CACHE_MASK; 1419 unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
1417 zero_user_page(page, ofs, PAGE_CACHE_SIZE - ofs, 1420 zero_user_segment(page, ofs, PAGE_CACHE_SIZE);
1418 KM_USER0);
1419 } 1421 }
1420 /* Handle mst protected attributes. */ 1422 /* Handle mst protected attributes. */
1421 if (NInoMstProtected(ni)) 1423 if (NInoMstProtected(ni))
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index d1619d05eb23..33ff314cc507 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -565,7 +565,7 @@ int ntfs_read_compressed_block(struct page *page)
565 if (xpage >= max_page) { 565 if (xpage >= max_page) {
566 kfree(bhs); 566 kfree(bhs);
567 kfree(pages); 567 kfree(pages);
568 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); 568 zero_user(page, 0, PAGE_CACHE_SIZE);
569 ntfs_debug("Compressed read outside i_size - truncated?"); 569 ntfs_debug("Compressed read outside i_size - truncated?");
570 SetPageUptodate(page); 570 SetPageUptodate(page);
571 unlock_page(page); 571 unlock_page(page);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 6cd08dfdc2ed..3c5550cd11d6 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -607,8 +607,8 @@ do_next_page:
607 ntfs_submit_bh_for_read(bh); 607 ntfs_submit_bh_for_read(bh);
608 *wait_bh++ = bh; 608 *wait_bh++ = bh;
609 } else { 609 } else {
610 zero_user_page(page, bh_offset(bh), 610 zero_user(page, bh_offset(bh),
611 blocksize, KM_USER0); 611 blocksize);
612 set_buffer_uptodate(bh); 612 set_buffer_uptodate(bh);
613 } 613 }
614 } 614 }
@@ -683,9 +683,8 @@ map_buffer_cached:
683 ntfs_submit_bh_for_read(bh); 683 ntfs_submit_bh_for_read(bh);
684 *wait_bh++ = bh; 684 *wait_bh++ = bh;
685 } else { 685 } else {
686 zero_user_page(page, 686 zero_user(page, bh_offset(bh),
687 bh_offset(bh), 687 blocksize);
688 blocksize, KM_USER0);
689 set_buffer_uptodate(bh); 688 set_buffer_uptodate(bh);
690 } 689 }
691 } 690 }
@@ -703,8 +702,8 @@ map_buffer_cached:
703 */ 702 */
704 if (bh_end <= pos || bh_pos >= end) { 703 if (bh_end <= pos || bh_pos >= end) {
705 if (!buffer_uptodate(bh)) { 704 if (!buffer_uptodate(bh)) {
706 zero_user_page(page, bh_offset(bh), 705 zero_user(page, bh_offset(bh),
707 blocksize, KM_USER0); 706 blocksize);
708 set_buffer_uptodate(bh); 707 set_buffer_uptodate(bh);
709 } 708 }
710 mark_buffer_dirty(bh); 709 mark_buffer_dirty(bh);
@@ -743,8 +742,7 @@ map_buffer_cached:
743 if (!buffer_uptodate(bh)) 742 if (!buffer_uptodate(bh))
744 set_buffer_uptodate(bh); 743 set_buffer_uptodate(bh);
745 } else if (!buffer_uptodate(bh)) { 744 } else if (!buffer_uptodate(bh)) {
746 zero_user_page(page, bh_offset(bh), blocksize, 745 zero_user(page, bh_offset(bh), blocksize);
747 KM_USER0);
748 set_buffer_uptodate(bh); 746 set_buffer_uptodate(bh);
749 } 747 }
750 continue; 748 continue;
@@ -868,8 +866,8 @@ rl_not_mapped_enoent:
868 if (!buffer_uptodate(bh)) 866 if (!buffer_uptodate(bh))
869 set_buffer_uptodate(bh); 867 set_buffer_uptodate(bh);
870 } else if (!buffer_uptodate(bh)) { 868 } else if (!buffer_uptodate(bh)) {
871 zero_user_page(page, bh_offset(bh), 869 zero_user(page, bh_offset(bh),
872 blocksize, KM_USER0); 870 blocksize);
873 set_buffer_uptodate(bh); 871 set_buffer_uptodate(bh);
874 } 872 }
875 continue; 873 continue;
@@ -1128,8 +1126,8 @@ rl_not_mapped_enoent:
1128 1126
1129 if (likely(bh_pos < initialized_size)) 1127 if (likely(bh_pos < initialized_size))
1130 ofs = initialized_size - bh_pos; 1128 ofs = initialized_size - bh_pos;
1131 zero_user_page(page, bh_offset(bh) + ofs, 1129 zero_user_segment(page, bh_offset(bh) + ofs,
1132 blocksize - ofs, KM_USER0); 1130 blocksize);
1133 } 1131 }
1134 } else /* if (unlikely(!buffer_uptodate(bh))) */ 1132 } else /* if (unlikely(!buffer_uptodate(bh))) */
1135 err = -EIO; 1133 err = -EIO;
@@ -1269,8 +1267,8 @@ rl_not_mapped_enoent:
1269 if (PageUptodate(page)) 1267 if (PageUptodate(page))
1270 set_buffer_uptodate(bh); 1268 set_buffer_uptodate(bh);
1271 else { 1269 else {
1272 zero_user_page(page, bh_offset(bh), 1270 zero_user(page, bh_offset(bh),
1273 blocksize, KM_USER0); 1271 blocksize);
1274 set_buffer_uptodate(bh); 1272 set_buffer_uptodate(bh);
1275 } 1273 }
1276 } 1274 }
@@ -1330,7 +1328,7 @@ err_out:
1330 len = PAGE_CACHE_SIZE; 1328 len = PAGE_CACHE_SIZE;
1331 if (len > bytes) 1329 if (len > bytes)
1332 len = bytes; 1330 len = bytes;
1333 zero_user_page(*pages, 0, len, KM_USER0); 1331 zero_user(*pages, 0, len);
1334 } 1332 }
1335 goto out; 1333 goto out;
1336} 1334}
@@ -1451,7 +1449,7 @@ err_out:
1451 len = PAGE_CACHE_SIZE; 1449 len = PAGE_CACHE_SIZE;
1452 if (len > bytes) 1450 if (len > bytes)
1453 len = bytes; 1451 len = bytes;
1454 zero_user_page(*pages, 0, len, KM_USER0); 1452 zero_user(*pages, 0, len);
1455 } 1453 }
1456 goto out; 1454 goto out;
1457} 1455}
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index e38e402e4103..cd0be3f5c3cd 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -85,8 +85,7 @@ static inline void *ntfs_malloc_nofs_nofail(unsigned long size)
85 85
86static inline void ntfs_free(void *addr) 86static inline void ntfs_free(void *addr)
87{ 87{
88 if (likely(((unsigned long)addr < VMALLOC_START) || 88 if (!is_vmalloc_addr(addr)) {
89 ((unsigned long)addr >= VMALLOC_END ))) {
90 kfree(addr); 89 kfree(addr);
91 /* free_page((unsigned long)addr); */ 90 /* free_page((unsigned long)addr); */
92 return; 91 return;
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 64713e149e46..447206eb5c2e 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5670,7 +5670,7 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
5670 mlog_errno(ret); 5670 mlog_errno(ret);
5671 5671
5672 if (zero) 5672 if (zero)
5673 zero_user_page(page, from, to - from, KM_USER0); 5673 zero_user_segment(page, from, to);
5674 5674
5675 /* 5675 /*
5676 * Need to set the buffers we zero'd into uptodate 5676 * Need to set the buffers we zero'd into uptodate
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index bc7b4cbbe8ec..82243127eebf 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -307,7 +307,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
307 * XXX sys_readahead() seems to get that wrong? 307 * XXX sys_readahead() seems to get that wrong?
308 */ 308 */
309 if (start >= i_size_read(inode)) { 309 if (start >= i_size_read(inode)) {
310 zero_user_page(page, 0, PAGE_SIZE, KM_USER0); 310 zero_user(page, 0, PAGE_SIZE);
311 SetPageUptodate(page); 311 SetPageUptodate(page);
312 ret = 0; 312 ret = 0;
313 goto out_alloc; 313 goto out_alloc;
@@ -869,7 +869,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
869 if (block_start >= to) 869 if (block_start >= to)
870 break; 870 break;
871 871
872 zero_user_page(page, block_start, bh->b_size, KM_USER0); 872 zero_user(page, block_start, bh->b_size);
873 set_buffer_uptodate(bh); 873 set_buffer_uptodate(bh);
874 mark_buffer_dirty(bh); 874 mark_buffer_dirty(bh);
875 875
@@ -1034,7 +1034,7 @@ static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to
1034 start = max(from, block_start); 1034 start = max(from, block_start);
1035 end = min(to, block_end); 1035 end = min(to, block_end);
1036 1036
1037 zero_user_page(page, start, end - start, KM_USER0); 1037 zero_user_segment(page, start, end);
1038 set_buffer_uptodate(bh); 1038 set_buffer_uptodate(bh);
1039 } 1039 }
1040 1040
diff --git a/fs/proc/array.c b/fs/proc/array.c
index b380313092bd..6ba2746e4517 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -281,14 +281,23 @@ static inline char *task_sig(struct task_struct *p, char *buffer)
281 return buffer; 281 return buffer;
282} 282}
283 283
284static char *render_cap_t(const char *header, kernel_cap_t *a, char *buffer)
285{
286 unsigned __capi;
287
288 buffer += sprintf(buffer, "%s", header);
289 CAP_FOR_EACH_U32(__capi) {
290 buffer += sprintf(buffer, "%08x",
291 a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]);
292 }
293 return buffer + sprintf(buffer, "\n");
294}
295
284static inline char *task_cap(struct task_struct *p, char *buffer) 296static inline char *task_cap(struct task_struct *p, char *buffer)
285{ 297{
286 return buffer + sprintf(buffer, "CapInh:\t%016x\n" 298 buffer = render_cap_t("CapInh:\t", &p->cap_inheritable, buffer);
287 "CapPrm:\t%016x\n" 299 buffer = render_cap_t("CapPrm:\t", &p->cap_permitted, buffer);
288 "CapEff:\t%016x\n", 300 return render_cap_t("CapEff:\t", &p->cap_effective, buffer);
289 cap_t(p->cap_inheritable),
290 cap_t(p->cap_permitted),
291 cap_t(p->cap_effective));
292} 301}
293 302
294static inline char *task_context_switch_counts(struct task_struct *p, 303static inline char *task_context_switch_counts(struct task_struct *p,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 33537487f5ab..c59852b38787 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -88,10 +88,6 @@
88 * in /proc for a task before it execs a suid executable. 88 * in /proc for a task before it execs a suid executable.
89 */ 89 */
90 90
91
92/* Worst case buffer size needed for holding an integer. */
93#define PROC_NUMBUF 13
94
95struct pid_entry { 91struct pid_entry {
96 char *name; 92 char *name;
97 int len; 93 int len;
@@ -787,7 +783,7 @@ out_no_task:
787} 783}
788#endif 784#endif
789 785
790static loff_t mem_lseek(struct file * file, loff_t offset, int orig) 786loff_t mem_lseek(struct file *file, loff_t offset, int orig)
791{ 787{
792 switch (orig) { 788 switch (orig) {
793 case 0: 789 case 0:
@@ -935,42 +931,6 @@ static const struct file_operations proc_oom_adjust_operations = {
935 .write = oom_adjust_write, 931 .write = oom_adjust_write,
936}; 932};
937 933
938#ifdef CONFIG_MMU
939static ssize_t clear_refs_write(struct file *file, const char __user *buf,
940 size_t count, loff_t *ppos)
941{
942 struct task_struct *task;
943 char buffer[PROC_NUMBUF], *end;
944 struct mm_struct *mm;
945
946 memset(buffer, 0, sizeof(buffer));
947 if (count > sizeof(buffer) - 1)
948 count = sizeof(buffer) - 1;
949 if (copy_from_user(buffer, buf, count))
950 return -EFAULT;
951 if (!simple_strtol(buffer, &end, 0))
952 return -EINVAL;
953 if (*end == '\n')
954 end++;
955 task = get_proc_task(file->f_path.dentry->d_inode);
956 if (!task)
957 return -ESRCH;
958 mm = get_task_mm(task);
959 if (mm) {
960 clear_refs_smap(mm);
961 mmput(mm);
962 }
963 put_task_struct(task);
964 if (end - buffer == 0)
965 return -EIO;
966 return end - buffer;
967}
968
969static struct file_operations proc_clear_refs_operations = {
970 .write = clear_refs_write,
971};
972#endif
973
974#ifdef CONFIG_AUDITSYSCALL 934#ifdef CONFIG_AUDITSYSCALL
975#define TMPBUFLEN 21 935#define TMPBUFLEN 21
976static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 936static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
@@ -2289,9 +2249,10 @@ static const struct pid_entry tgid_base_stuff[] = {
2289 LNK("exe", exe), 2249 LNK("exe", exe),
2290 REG("mounts", S_IRUGO, mounts), 2250 REG("mounts", S_IRUGO, mounts),
2291 REG("mountstats", S_IRUSR, mountstats), 2251 REG("mountstats", S_IRUSR, mountstats),
2292#ifdef CONFIG_MMU 2252#ifdef CONFIG_PROC_PAGE_MONITOR
2293 REG("clear_refs", S_IWUSR, clear_refs), 2253 REG("clear_refs", S_IWUSR, clear_refs),
2294 REG("smaps", S_IRUGO, smaps), 2254 REG("smaps", S_IRUGO, smaps),
2255 REG("pagemap", S_IRUSR, pagemap),
2295#endif 2256#endif
2296#ifdef CONFIG_SECURITY 2257#ifdef CONFIG_SECURITY
2297 DIR("attr", S_IRUGO|S_IXUGO, attr_dir), 2258 DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
@@ -2360,7 +2321,8 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
2360 name.len = snprintf(buf, sizeof(buf), "%d", pid); 2321 name.len = snprintf(buf, sizeof(buf), "%d", pid);
2361 dentry = d_hash_and_lookup(mnt->mnt_root, &name); 2322 dentry = d_hash_and_lookup(mnt->mnt_root, &name);
2362 if (dentry) { 2323 if (dentry) {
2363 shrink_dcache_parent(dentry); 2324 if (!(current->flags & PF_EXITING))
2325 shrink_dcache_parent(dentry);
2364 d_drop(dentry); 2326 d_drop(dentry);
2365 dput(dentry); 2327 dput(dentry);
2366 } 2328 }
@@ -2617,9 +2579,10 @@ static const struct pid_entry tid_base_stuff[] = {
2617 LNK("root", root), 2579 LNK("root", root),
2618 LNK("exe", exe), 2580 LNK("exe", exe),
2619 REG("mounts", S_IRUGO, mounts), 2581 REG("mounts", S_IRUGO, mounts),
2620#ifdef CONFIG_MMU 2582#ifdef CONFIG_PROC_PAGE_MONITOR
2621 REG("clear_refs", S_IWUSR, clear_refs), 2583 REG("clear_refs", S_IWUSR, clear_refs),
2622 REG("smaps", S_IRUGO, smaps), 2584 REG("smaps", S_IRUGO, smaps),
2585 REG("pagemap", S_IRUSR, pagemap),
2623#endif 2586#endif
2624#ifdef CONFIG_SECURITY 2587#ifdef CONFIG_SECURITY
2625 DIR("attr", S_IRUGO|S_IXUGO, attr_dir), 2588 DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 05b3e9006262..7d57e8069924 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -52,15 +52,13 @@ extern int proc_tid_stat(struct task_struct *, char *);
52extern int proc_tgid_stat(struct task_struct *, char *); 52extern int proc_tgid_stat(struct task_struct *, char *);
53extern int proc_pid_status(struct task_struct *, char *); 53extern int proc_pid_status(struct task_struct *, char *);
54extern int proc_pid_statm(struct task_struct *, char *); 54extern int proc_pid_statm(struct task_struct *, char *);
55extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
55 56
56extern const struct file_operations proc_maps_operations; 57extern const struct file_operations proc_maps_operations;
57extern const struct file_operations proc_numa_maps_operations; 58extern const struct file_operations proc_numa_maps_operations;
58extern const struct file_operations proc_smaps_operations; 59extern const struct file_operations proc_smaps_operations;
59 60extern const struct file_operations proc_clear_refs_operations;
60extern const struct file_operations proc_maps_operations; 61extern const struct file_operations proc_pagemap_operations;
61extern const struct file_operations proc_numa_maps_operations;
62extern const struct file_operations proc_smaps_operations;
63
64 62
65void free_proc_entry(struct proc_dir_entry *de); 63void free_proc_entry(struct proc_dir_entry *de);
66 64
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 1be73082edd3..7dd26e18cbfd 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -325,7 +325,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
325 if (m == NULL) { 325 if (m == NULL) {
326 if (clear_user(buffer, tsz)) 326 if (clear_user(buffer, tsz))
327 return -EFAULT; 327 return -EFAULT;
328 } else if ((start >= VMALLOC_START) && (start < VMALLOC_END)) { 328 } else if (is_vmalloc_addr((void *)start)) {
329 char * elf_buf; 329 char * elf_buf;
330 struct vm_struct *m; 330 struct vm_struct *m;
331 unsigned long curstart = start; 331 unsigned long curstart = start;
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 3462bfde89f6..51288db37a0c 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -46,6 +46,7 @@
46#include <linux/vmalloc.h> 46#include <linux/vmalloc.h>
47#include <linux/crash_dump.h> 47#include <linux/crash_dump.h>
48#include <linux/pid_namespace.h> 48#include <linux/pid_namespace.h>
49#include <linux/bootmem.h>
49#include <asm/uaccess.h> 50#include <asm/uaccess.h>
50#include <asm/pgtable.h> 51#include <asm/pgtable.h>
51#include <asm/io.h> 52#include <asm/io.h>
@@ -675,6 +676,137 @@ static const struct file_operations proc_sysrq_trigger_operations = {
675}; 676};
676#endif 677#endif
677 678
679#ifdef CONFIG_PROC_PAGE_MONITOR
680#define KPMSIZE sizeof(u64)
681#define KPMMASK (KPMSIZE - 1)
682/* /proc/kpagecount - an array exposing page counts
683 *
684 * Each entry is a u64 representing the corresponding
685 * physical page count.
686 */
687static ssize_t kpagecount_read(struct file *file, char __user *buf,
688 size_t count, loff_t *ppos)
689{
690 u64 __user *out = (u64 __user *)buf;
691 struct page *ppage;
692 unsigned long src = *ppos;
693 unsigned long pfn;
694 ssize_t ret = 0;
695 u64 pcount;
696
697 pfn = src / KPMSIZE;
698 count = min_t(size_t, count, (max_pfn * KPMSIZE) - src);
699 if (src & KPMMASK || count & KPMMASK)
700 return -EIO;
701
702 while (count > 0) {
703 ppage = NULL;
704 if (pfn_valid(pfn))
705 ppage = pfn_to_page(pfn);
706 pfn++;
707 if (!ppage)
708 pcount = 0;
709 else
710 pcount = atomic_read(&ppage->_count);
711
712 if (put_user(pcount, out++)) {
713 ret = -EFAULT;
714 break;
715 }
716
717 count -= KPMSIZE;
718 }
719
720 *ppos += (char __user *)out - buf;
721 if (!ret)
722 ret = (char __user *)out - buf;
723 return ret;
724}
725
726static struct file_operations proc_kpagecount_operations = {
727 .llseek = mem_lseek,
728 .read = kpagecount_read,
729};
730
731/* /proc/kpageflags - an array exposing page flags
732 *
733 * Each entry is a u64 representing the corresponding
734 * physical page flags.
735 */
736
737/* These macros are used to decouple internal flags from exported ones */
738
739#define KPF_LOCKED 0
740#define KPF_ERROR 1
741#define KPF_REFERENCED 2
742#define KPF_UPTODATE 3
743#define KPF_DIRTY 4
744#define KPF_LRU 5
745#define KPF_ACTIVE 6
746#define KPF_SLAB 7
747#define KPF_WRITEBACK 8
748#define KPF_RECLAIM 9
749#define KPF_BUDDY 10
750
751#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos)
752
753static ssize_t kpageflags_read(struct file *file, char __user *buf,
754 size_t count, loff_t *ppos)
755{
756 u64 __user *out = (u64 __user *)buf;
757 struct page *ppage;
758 unsigned long src = *ppos;
759 unsigned long pfn;
760 ssize_t ret = 0;
761 u64 kflags, uflags;
762
763 pfn = src / KPMSIZE;
764 count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src);
765 if (src & KPMMASK || count & KPMMASK)
766 return -EIO;
767
768 while (count > 0) {
769 ppage = NULL;
770 if (pfn_valid(pfn))
771 ppage = pfn_to_page(pfn);
772 pfn++;
773 if (!ppage)
774 kflags = 0;
775 else
776 kflags = ppage->flags;
777
778 uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) |
779 kpf_copy_bit(kflags, KPF_ERROR, PG_error) |
780 kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) |
781 kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) |
782 kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) |
783 kpf_copy_bit(kflags, KPF_LRU, PG_lru) |
784 kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) |
785 kpf_copy_bit(kflags, KPF_SLAB, PG_slab) |
786 kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) |
787 kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) |
788 kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy);
789
790 if (put_user(uflags, out++)) {
791 ret = -EFAULT;
792 break;
793 }
794
795 count -= KPMSIZE;
796 }
797
798 *ppos += (char __user *)out - buf;
799 if (!ret)
800 ret = (char __user *)out - buf;
801 return ret;
802}
803
804static struct file_operations proc_kpageflags_operations = {
805 .llseek = mem_lseek,
806 .read = kpageflags_read,
807};
808#endif /* CONFIG_PROC_PAGE_MONITOR */
809
678struct proc_dir_entry *proc_root_kcore; 810struct proc_dir_entry *proc_root_kcore;
679 811
680void create_seq_entry(char *name, mode_t mode, const struct file_operations *f) 812void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
@@ -755,6 +887,10 @@ void __init proc_misc_init(void)
755 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; 887 (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE;
756 } 888 }
757#endif 889#endif
890#ifdef CONFIG_PROC_PAGE_MONITOR
891 create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations);
892 create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations);
893#endif
758#ifdef CONFIG_PROC_VMCORE 894#ifdef CONFIG_PROC_VMCORE
759 proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); 895 proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL);
760 if (proc_vmcore) 896 if (proc_vmcore)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8043a3eab52c..38338ed98cc6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -5,7 +5,10 @@
5#include <linux/highmem.h> 5#include <linux/highmem.h>
6#include <linux/ptrace.h> 6#include <linux/ptrace.h>
7#include <linux/pagemap.h> 7#include <linux/pagemap.h>
8#include <linux/ptrace.h>
8#include <linux/mempolicy.h> 9#include <linux/mempolicy.h>
10#include <linux/swap.h>
11#include <linux/swapops.h>
9 12
10#include <asm/elf.h> 13#include <asm/elf.h>
11#include <asm/uaccess.h> 14#include <asm/uaccess.h>
@@ -114,24 +117,124 @@ static void pad_len_spaces(struct seq_file *m, int len)
114 seq_printf(m, "%*c", len, ' '); 117 seq_printf(m, "%*c", len, ' ');
115} 118}
116 119
117struct mem_size_stats 120static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
118{ 121{
119 unsigned long resident; 122 if (vma && vma != priv->tail_vma) {
120 unsigned long shared_clean; 123 struct mm_struct *mm = vma->vm_mm;
121 unsigned long shared_dirty; 124 up_read(&mm->mmap_sem);
122 unsigned long private_clean; 125 mmput(mm);
123 unsigned long private_dirty; 126 }
124 unsigned long referenced; 127}
125};
126 128
127struct pmd_walker { 129static void *m_start(struct seq_file *m, loff_t *pos)
128 struct vm_area_struct *vma; 130{
129 void *private; 131 struct proc_maps_private *priv = m->private;
130 void (*action)(struct vm_area_struct *, pmd_t *, unsigned long, 132 unsigned long last_addr = m->version;
131 unsigned long, void *); 133 struct mm_struct *mm;
132}; 134 struct vm_area_struct *vma, *tail_vma = NULL;
135 loff_t l = *pos;
136
137 /* Clear the per syscall fields in priv */
138 priv->task = NULL;
139 priv->tail_vma = NULL;
140
141 /*
142 * We remember last_addr rather than next_addr to hit with
143 * mmap_cache most of the time. We have zero last_addr at
144 * the beginning and also after lseek. We will have -1 last_addr
145 * after the end of the vmas.
146 */
147
148 if (last_addr == -1UL)
149 return NULL;
150
151 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
152 if (!priv->task)
153 return NULL;
154
155 mm = mm_for_maps(priv->task);
156 if (!mm)
157 return NULL;
158
159 tail_vma = get_gate_vma(priv->task);
160 priv->tail_vma = tail_vma;
161
162 /* Start with last addr hint */
163 vma = find_vma(mm, last_addr);
164 if (last_addr && vma) {
165 vma = vma->vm_next;
166 goto out;
167 }
168
169 /*
170 * Check the vma index is within the range and do
171 * sequential scan until m_index.
172 */
173 vma = NULL;
174 if ((unsigned long)l < mm->map_count) {
175 vma = mm->mmap;
176 while (l-- && vma)
177 vma = vma->vm_next;
178 goto out;
179 }
180
181 if (l != mm->map_count)
182 tail_vma = NULL; /* After gate vma */
183
184out:
185 if (vma)
186 return vma;
187
188 /* End of vmas has been reached */
189 m->version = (tail_vma != NULL)? 0: -1UL;
190 up_read(&mm->mmap_sem);
191 mmput(mm);
192 return tail_vma;
193}
133 194
134static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) 195static void *m_next(struct seq_file *m, void *v, loff_t *pos)
196{
197 struct proc_maps_private *priv = m->private;
198 struct vm_area_struct *vma = v;
199 struct vm_area_struct *tail_vma = priv->tail_vma;
200
201 (*pos)++;
202 if (vma && (vma != tail_vma) && vma->vm_next)
203 return vma->vm_next;
204 vma_stop(priv, vma);
205 return (vma != tail_vma)? tail_vma: NULL;
206}
207
208static void m_stop(struct seq_file *m, void *v)
209{
210 struct proc_maps_private *priv = m->private;
211 struct vm_area_struct *vma = v;
212
213 vma_stop(priv, vma);
214 if (priv->task)
215 put_task_struct(priv->task);
216}
217
218static int do_maps_open(struct inode *inode, struct file *file,
219 struct seq_operations *ops)
220{
221 struct proc_maps_private *priv;
222 int ret = -ENOMEM;
223 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
224 if (priv) {
225 priv->pid = proc_pid(inode);
226 ret = seq_open(file, ops);
227 if (!ret) {
228 struct seq_file *m = file->private_data;
229 m->private = priv;
230 } else {
231 kfree(priv);
232 }
233 }
234 return ret;
235}
236
237static int show_map(struct seq_file *m, void *v)
135{ 238{
136 struct proc_maps_private *priv = m->private; 239 struct proc_maps_private *priv = m->private;
137 struct task_struct *task = priv->task; 240 struct task_struct *task = priv->task;
@@ -191,41 +294,71 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
191 } 294 }
192 seq_putc(m, '\n'); 295 seq_putc(m, '\n');
193 296
194 if (mss)
195 seq_printf(m,
196 "Size: %8lu kB\n"
197 "Rss: %8lu kB\n"
198 "Shared_Clean: %8lu kB\n"
199 "Shared_Dirty: %8lu kB\n"
200 "Private_Clean: %8lu kB\n"
201 "Private_Dirty: %8lu kB\n"
202 "Referenced: %8lu kB\n",
203 (vma->vm_end - vma->vm_start) >> 10,
204 mss->resident >> 10,
205 mss->shared_clean >> 10,
206 mss->shared_dirty >> 10,
207 mss->private_clean >> 10,
208 mss->private_dirty >> 10,
209 mss->referenced >> 10);
210
211 if (m->count < m->size) /* vma is copied successfully */ 297 if (m->count < m->size) /* vma is copied successfully */
212 m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; 298 m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
213 return 0; 299 return 0;
214} 300}
215 301
216static int show_map(struct seq_file *m, void *v) 302static struct seq_operations proc_pid_maps_op = {
303 .start = m_start,
304 .next = m_next,
305 .stop = m_stop,
306 .show = show_map
307};
308
309static int maps_open(struct inode *inode, struct file *file)
217{ 310{
218 return show_map_internal(m, v, NULL); 311 return do_maps_open(inode, file, &proc_pid_maps_op);
219} 312}
220 313
221static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 314const struct file_operations proc_maps_operations = {
222 unsigned long addr, unsigned long end, 315 .open = maps_open,
223 void *private) 316 .read = seq_read,
317 .llseek = seq_lseek,
318 .release = seq_release_private,
319};
320
321/*
322 * Proportional Set Size(PSS): my share of RSS.
323 *
324 * PSS of a process is the count of pages it has in memory, where each
325 * page is divided by the number of processes sharing it. So if a
326 * process has 1000 pages all to itself, and 1000 shared with one other
327 * process, its PSS will be 1500.
328 *
329 * To keep (accumulated) division errors low, we adopt a 64bit
330 * fixed-point pss counter to minimize division errors. So (pss >>
331 * PSS_SHIFT) would be the real byte count.
332 *
333 * A shift of 12 before division means (assuming 4K page size):
334 * - 1M 3-user-pages add up to 8KB errors;
335 * - supports mapcount up to 2^24, or 16M;
336 * - supports PSS up to 2^52 bytes, or 4PB.
337 */
338#define PSS_SHIFT 12
339
340#ifdef CONFIG_PROC_PAGE_MONITOR
341struct mem_size_stats
342{
343 struct vm_area_struct *vma;
344 unsigned long resident;
345 unsigned long shared_clean;
346 unsigned long shared_dirty;
347 unsigned long private_clean;
348 unsigned long private_dirty;
349 unsigned long referenced;
350 u64 pss;
351};
352
353static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
354 void *private)
224{ 355{
225 struct mem_size_stats *mss = private; 356 struct mem_size_stats *mss = private;
357 struct vm_area_struct *vma = mss->vma;
226 pte_t *pte, ptent; 358 pte_t *pte, ptent;
227 spinlock_t *ptl; 359 spinlock_t *ptl;
228 struct page *page; 360 struct page *page;
361 int mapcount;
229 362
230 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 363 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
231 for (; addr != end; pte++, addr += PAGE_SIZE) { 364 for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -242,26 +375,88 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
242 /* Accumulate the size in pages that have been accessed. */ 375 /* Accumulate the size in pages that have been accessed. */
243 if (pte_young(ptent) || PageReferenced(page)) 376 if (pte_young(ptent) || PageReferenced(page))
244 mss->referenced += PAGE_SIZE; 377 mss->referenced += PAGE_SIZE;
245 if (page_mapcount(page) >= 2) { 378 mapcount = page_mapcount(page);
379 if (mapcount >= 2) {
246 if (pte_dirty(ptent)) 380 if (pte_dirty(ptent))
247 mss->shared_dirty += PAGE_SIZE; 381 mss->shared_dirty += PAGE_SIZE;
248 else 382 else
249 mss->shared_clean += PAGE_SIZE; 383 mss->shared_clean += PAGE_SIZE;
384 mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
250 } else { 385 } else {
251 if (pte_dirty(ptent)) 386 if (pte_dirty(ptent))
252 mss->private_dirty += PAGE_SIZE; 387 mss->private_dirty += PAGE_SIZE;
253 else 388 else
254 mss->private_clean += PAGE_SIZE; 389 mss->private_clean += PAGE_SIZE;
390 mss->pss += (PAGE_SIZE << PSS_SHIFT);
255 } 391 }
256 } 392 }
257 pte_unmap_unlock(pte - 1, ptl); 393 pte_unmap_unlock(pte - 1, ptl);
258 cond_resched(); 394 cond_resched();
395 return 0;
259} 396}
260 397
261static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 398static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
262 unsigned long addr, unsigned long end, 399
263 void *private) 400static int show_smap(struct seq_file *m, void *v)
264{ 401{
402 struct vm_area_struct *vma = v;
403 struct mem_size_stats mss;
404 int ret;
405
406 memset(&mss, 0, sizeof mss);
407 mss.vma = vma;
408 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
409 walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end,
410 &smaps_walk, &mss);
411
412 ret = show_map(m, v);
413 if (ret)
414 return ret;
415
416 seq_printf(m,
417 "Size: %8lu kB\n"
418 "Rss: %8lu kB\n"
419 "Pss: %8lu kB\n"
420 "Shared_Clean: %8lu kB\n"
421 "Shared_Dirty: %8lu kB\n"
422 "Private_Clean: %8lu kB\n"
423 "Private_Dirty: %8lu kB\n"
424 "Referenced: %8lu kB\n",
425 (vma->vm_end - vma->vm_start) >> 10,
426 mss.resident >> 10,
427 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
428 mss.shared_clean >> 10,
429 mss.shared_dirty >> 10,
430 mss.private_clean >> 10,
431 mss.private_dirty >> 10,
432 mss.referenced >> 10);
433
434 return ret;
435}
436
437static struct seq_operations proc_pid_smaps_op = {
438 .start = m_start,
439 .next = m_next,
440 .stop = m_stop,
441 .show = show_smap
442};
443
444static int smaps_open(struct inode *inode, struct file *file)
445{
446 return do_maps_open(inode, file, &proc_pid_smaps_op);
447}
448
449const struct file_operations proc_smaps_operations = {
450 .open = smaps_open,
451 .read = seq_read,
452 .llseek = seq_lseek,
453 .release = seq_release_private,
454};
455
456static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
457 unsigned long end, void *private)
458{
459 struct vm_area_struct *vma = private;
265 pte_t *pte, ptent; 460 pte_t *pte, ptent;
266 spinlock_t *ptl; 461 spinlock_t *ptl;
267 struct page *page; 462 struct page *page;
@@ -282,235 +477,248 @@ static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
282 } 477 }
283 pte_unmap_unlock(pte - 1, ptl); 478 pte_unmap_unlock(pte - 1, ptl);
284 cond_resched(); 479 cond_resched();
480 return 0;
285} 481}
286 482
287static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud, 483static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
288 unsigned long addr, unsigned long end) 484
485static ssize_t clear_refs_write(struct file *file, const char __user *buf,
486 size_t count, loff_t *ppos)
289{ 487{
290 pmd_t *pmd; 488 struct task_struct *task;
291 unsigned long next; 489 char buffer[PROC_NUMBUF], *end;
490 struct mm_struct *mm;
491 struct vm_area_struct *vma;
292 492
293 for (pmd = pmd_offset(pud, addr); addr != end; 493 memset(buffer, 0, sizeof(buffer));
294 pmd++, addr = next) { 494 if (count > sizeof(buffer) - 1)
295 next = pmd_addr_end(addr, end); 495 count = sizeof(buffer) - 1;
296 if (pmd_none_or_clear_bad(pmd)) 496 if (copy_from_user(buffer, buf, count))
297 continue; 497 return -EFAULT;
298 walker->action(walker->vma, pmd, addr, next, walker->private); 498 if (!simple_strtol(buffer, &end, 0))
499 return -EINVAL;
500 if (*end == '\n')
501 end++;
502 task = get_proc_task(file->f_path.dentry->d_inode);
503 if (!task)
504 return -ESRCH;
505 mm = get_task_mm(task);
506 if (mm) {
507 down_read(&mm->mmap_sem);
508 for (vma = mm->mmap; vma; vma = vma->vm_next)
509 if (!is_vm_hugetlb_page(vma))
510 walk_page_range(mm, vma->vm_start, vma->vm_end,
511 &clear_refs_walk, vma);
512 flush_tlb_mm(mm);
513 up_read(&mm->mmap_sem);
514 mmput(mm);
299 } 515 }
516 put_task_struct(task);
517 if (end - buffer == 0)
518 return -EIO;
519 return end - buffer;
300} 520}
301 521
302static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd, 522const struct file_operations proc_clear_refs_operations = {
303 unsigned long addr, unsigned long end) 523 .write = clear_refs_write,
304{ 524};
305 pud_t *pud;
306 unsigned long next;
307 525
308 for (pud = pud_offset(pgd, addr); addr != end; 526struct pagemapread {
309 pud++, addr = next) { 527 char __user *out, *end;
310 next = pud_addr_end(addr, end); 528};
311 if (pud_none_or_clear_bad(pud)) 529
312 continue; 530#define PM_ENTRY_BYTES sizeof(u64)
313 walk_pmd_range(walker, pud, addr, next); 531#define PM_RESERVED_BITS 3
532#define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS)
533#define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET)
534#define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK)
535#define PM_NOT_PRESENT PM_SPECIAL(1LL)
536#define PM_SWAP PM_SPECIAL(2LL)
537#define PM_END_OF_BUFFER 1
538
539static int add_to_pagemap(unsigned long addr, u64 pfn,
540 struct pagemapread *pm)
541{
542 /*
543 * Make sure there's room in the buffer for an
544 * entire entry. Otherwise, only copy part of
545 * the pfn.
546 */
547 if (pm->out + PM_ENTRY_BYTES >= pm->end) {
548 if (copy_to_user(pm->out, &pfn, pm->end - pm->out))
549 return -EFAULT;
550 pm->out = pm->end;
551 return PM_END_OF_BUFFER;
314 } 552 }
553
554 if (put_user(pfn, pm->out))
555 return -EFAULT;
556 pm->out += PM_ENTRY_BYTES;
557 return 0;
315} 558}
316 559
317/* 560static int pagemap_pte_hole(unsigned long start, unsigned long end,
318 * walk_page_range - walk the page tables of a VMA with a callback 561 void *private)
319 * @vma - VMA to walk
320 * @action - callback invoked for every bottom-level (PTE) page table
321 * @private - private data passed to the callback function
322 *
323 * Recursively walk the page table for the memory area in a VMA, calling
324 * a callback for every bottom-level (PTE) page table.
325 */
326static inline void walk_page_range(struct vm_area_struct *vma,
327 void (*action)(struct vm_area_struct *,
328 pmd_t *, unsigned long,
329 unsigned long, void *),
330 void *private)
331{ 562{
332 unsigned long addr = vma->vm_start; 563 struct pagemapread *pm = private;
333 unsigned long end = vma->vm_end; 564 unsigned long addr;
334 struct pmd_walker walker = { 565 int err = 0;
335 .vma = vma, 566 for (addr = start; addr < end; addr += PAGE_SIZE) {
336 .private = private, 567 err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
337 .action = action, 568 if (err)
338 }; 569 break;
339 pgd_t *pgd;
340 unsigned long next;
341
342 for (pgd = pgd_offset(vma->vm_mm, addr); addr != end;
343 pgd++, addr = next) {
344 next = pgd_addr_end(addr, end);
345 if (pgd_none_or_clear_bad(pgd))
346 continue;
347 walk_pud_range(&walker, pgd, addr, next);
348 } 570 }
571 return err;
349} 572}
350 573
351static int show_smap(struct seq_file *m, void *v) 574u64 swap_pte_to_pagemap_entry(pte_t pte)
352{ 575{
353 struct vm_area_struct *vma = v; 576 swp_entry_t e = pte_to_swp_entry(pte);
354 struct mem_size_stats mss; 577 return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
355
356 memset(&mss, 0, sizeof mss);
357 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
358 walk_page_range(vma, smaps_pte_range, &mss);
359 return show_map_internal(m, v, &mss);
360} 578}
361 579
362void clear_refs_smap(struct mm_struct *mm) 580static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
581 void *private)
363{ 582{
364 struct vm_area_struct *vma; 583 struct pagemapread *pm = private;
584 pte_t *pte;
585 int err = 0;
586
587 for (; addr != end; addr += PAGE_SIZE) {
588 u64 pfn = PM_NOT_PRESENT;
589 pte = pte_offset_map(pmd, addr);
590 if (is_swap_pte(*pte))
591 pfn = swap_pte_to_pagemap_entry(*pte);
592 else if (pte_present(*pte))
593 pfn = pte_pfn(*pte);
594 /* unmap so we're not in atomic when we copy to userspace */
595 pte_unmap(pte);
596 err = add_to_pagemap(addr, pfn, pm);
597 if (err)
598 return err;
599 }
365 600
366 down_read(&mm->mmap_sem); 601 cond_resched();
367 for (vma = mm->mmap; vma; vma = vma->vm_next) 602
368 if (vma->vm_mm && !is_vm_hugetlb_page(vma)) 603 return err;
369 walk_page_range(vma, clear_refs_pte_range, NULL);
370 flush_tlb_mm(mm);
371 up_read(&mm->mmap_sem);
372} 604}
373 605
374static void *m_start(struct seq_file *m, loff_t *pos) 606static struct mm_walk pagemap_walk = {
607 .pmd_entry = pagemap_pte_range,
608 .pte_hole = pagemap_pte_hole
609};
610
611/*
612 * /proc/pid/pagemap - an array mapping virtual pages to pfns
613 *
614 * For each page in the address space, this file contains one 64-bit
615 * entry representing the corresponding physical page frame number
616 * (PFN) if the page is present. If there is a swap entry for the
617 * physical page, then an encoding of the swap file number and the
618 * page's offset into the swap file are returned. If no page is
619 * present at all, PM_NOT_PRESENT is returned. This allows determining
620 * precisely which pages are mapped (or in swap) and comparing mapped
621 * pages between processes.
622 *
623 * Efficient users of this interface will use /proc/pid/maps to
624 * determine which areas of memory are actually mapped and llseek to
625 * skip over unmapped regions.
626 */
627static ssize_t pagemap_read(struct file *file, char __user *buf,
628 size_t count, loff_t *ppos)
375{ 629{
376 struct proc_maps_private *priv = m->private; 630 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
377 unsigned long last_addr = m->version; 631 struct page **pages, *page;
632 unsigned long uaddr, uend;
378 struct mm_struct *mm; 633 struct mm_struct *mm;
379 struct vm_area_struct *vma, *tail_vma = NULL; 634 struct pagemapread pm;
380 loff_t l = *pos; 635 int pagecount;
381 636 int ret = -ESRCH;
382 /* Clear the per syscall fields in priv */
383 priv->task = NULL;
384 priv->tail_vma = NULL;
385 637
386 /* 638 if (!task)
387 * We remember last_addr rather than next_addr to hit with 639 goto out;
388 * mmap_cache most of the time. We have zero last_addr at
389 * the beginning and also after lseek. We will have -1 last_addr
390 * after the end of the vmas.
391 */
392 640
393 if (last_addr == -1UL) 641 ret = -EACCES;
394 return NULL; 642 if (!ptrace_may_attach(task))
643 goto out;
395 644
396 priv->task = get_pid_task(priv->pid, PIDTYPE_PID); 645 ret = -EINVAL;
397 if (!priv->task) 646 /* file position must be aligned */
398 return NULL; 647 if (*ppos % PM_ENTRY_BYTES)
648 goto out;
399 649
400 mm = mm_for_maps(priv->task); 650 ret = 0;
651 mm = get_task_mm(task);
401 if (!mm) 652 if (!mm)
402 return NULL;
403
404 priv->tail_vma = tail_vma = get_gate_vma(priv->task);
405
406 /* Start with last addr hint */
407 if (last_addr && (vma = find_vma(mm, last_addr))) {
408 vma = vma->vm_next;
409 goto out; 653 goto out;
410 }
411 654
412 /* 655 ret = -ENOMEM;
413 * Check the vma index is within the range and do 656 uaddr = (unsigned long)buf & PAGE_MASK;
414 * sequential scan until m_index. 657 uend = (unsigned long)(buf + count);
415 */ 658 pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
416 vma = NULL; 659 pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
417 if ((unsigned long)l < mm->map_count) { 660 if (!pages)
418 vma = mm->mmap; 661 goto out_task;
419 while (l-- && vma)
420 vma = vma->vm_next;
421 goto out;
422 }
423 662
424 if (l != mm->map_count) 663 down_read(&current->mm->mmap_sem);
425 tail_vma = NULL; /* After gate vma */ 664 ret = get_user_pages(current, current->mm, uaddr, pagecount,
665 1, 0, pages, NULL);
666 up_read(&current->mm->mmap_sem);
426 667
427out: 668 if (ret < 0)
428 if (vma) 669 goto out_free;
429 return vma;
430 670
431 /* End of vmas has been reached */ 671 pm.out = buf;
432 m->version = (tail_vma != NULL)? 0: -1UL; 672 pm.end = buf + count;
433 up_read(&mm->mmap_sem);
434 mmput(mm);
435 return tail_vma;
436}
437 673
438static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) 674 if (!ptrace_may_attach(task)) {
439{ 675 ret = -EIO;
440 if (vma && vma != priv->tail_vma) { 676 } else {
441 struct mm_struct *mm = vma->vm_mm; 677 unsigned long src = *ppos;
442 up_read(&mm->mmap_sem); 678 unsigned long svpfn = src / PM_ENTRY_BYTES;
443 mmput(mm); 679 unsigned long start_vaddr = svpfn << PAGE_SHIFT;
680 unsigned long end_vaddr = TASK_SIZE_OF(task);
681
682 /* watch out for wraparound */
683 if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
684 start_vaddr = end_vaddr;
685
686 /*
687 * The odds are that this will stop walking way
688 * before end_vaddr, because the length of the
689 * user buffer is tracked in "pm", and the walk
690 * will stop when we hit the end of the buffer.
691 */
692 ret = walk_page_range(mm, start_vaddr, end_vaddr,
693 &pagemap_walk, &pm);
694 if (ret == PM_END_OF_BUFFER)
695 ret = 0;
696 /* don't need mmap_sem for these, but this looks cleaner */
697 *ppos += pm.out - buf;
698 if (!ret)
699 ret = pm.out - buf;
444 } 700 }
445}
446
447static void *m_next(struct seq_file *m, void *v, loff_t *pos)
448{
449 struct proc_maps_private *priv = m->private;
450 struct vm_area_struct *vma = v;
451 struct vm_area_struct *tail_vma = priv->tail_vma;
452
453 (*pos)++;
454 if (vma && (vma != tail_vma) && vma->vm_next)
455 return vma->vm_next;
456 vma_stop(priv, vma);
457 return (vma != tail_vma)? tail_vma: NULL;
458}
459
460static void m_stop(struct seq_file *m, void *v)
461{
462 struct proc_maps_private *priv = m->private;
463 struct vm_area_struct *vma = v;
464 701
465 vma_stop(priv, vma); 702 for (; pagecount; pagecount--) {
466 if (priv->task) 703 page = pages[pagecount-1];
467 put_task_struct(priv->task); 704 if (!PageReserved(page))
468} 705 SetPageDirty(page);
469 706 page_cache_release(page);
470static struct seq_operations proc_pid_maps_op = {
471 .start = m_start,
472 .next = m_next,
473 .stop = m_stop,
474 .show = show_map
475};
476
477static struct seq_operations proc_pid_smaps_op = {
478 .start = m_start,
479 .next = m_next,
480 .stop = m_stop,
481 .show = show_smap
482};
483
484static int do_maps_open(struct inode *inode, struct file *file,
485 struct seq_operations *ops)
486{
487 struct proc_maps_private *priv;
488 int ret = -ENOMEM;
489 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
490 if (priv) {
491 priv->pid = proc_pid(inode);
492 ret = seq_open(file, ops);
493 if (!ret) {
494 struct seq_file *m = file->private_data;
495 m->private = priv;
496 } else {
497 kfree(priv);
498 }
499 } 707 }
708 mmput(mm);
709out_free:
710 kfree(pages);
711out_task:
712 put_task_struct(task);
713out:
500 return ret; 714 return ret;
501} 715}
502 716
503static int maps_open(struct inode *inode, struct file *file) 717const struct file_operations proc_pagemap_operations = {
504{ 718 .llseek = mem_lseek, /* borrow this */
505 return do_maps_open(inode, file, &proc_pid_maps_op); 719 .read = pagemap_read,
506}
507
508const struct file_operations proc_maps_operations = {
509 .open = maps_open,
510 .read = seq_read,
511 .llseek = seq_lseek,
512 .release = seq_release_private,
513}; 720};
721#endif /* CONFIG_PROC_PAGE_MONITOR */
514 722
515#ifdef CONFIG_NUMA 723#ifdef CONFIG_NUMA
516extern int show_numa_map(struct seq_file *m, void *v); 724extern int show_numa_map(struct seq_file *m, void *v);
@@ -545,15 +753,3 @@ const struct file_operations proc_numa_maps_operations = {
545 .release = seq_release_private, 753 .release = seq_release_private,
546}; 754};
547#endif 755#endif
548
549static int smaps_open(struct inode *inode, struct file *file)
550{
551 return do_maps_open(inode, file, &proc_pid_smaps_op);
552}
553
554const struct file_operations proc_smaps_operations = {
555 .open = smaps_open,
556 .read = seq_read,
557 .llseek = seq_lseek,
558 .release = seq_release_private,
559};
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 231fd5ccadc5..195309857e63 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2143,7 +2143,7 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps)
2143 /* if we are not on a block boundary */ 2143 /* if we are not on a block boundary */
2144 if (length) { 2144 if (length) {
2145 length = blocksize - length; 2145 length = blocksize - length;
2146 zero_user_page(page, offset, length, KM_USER0); 2146 zero_user(page, offset, length);
2147 if (buffer_mapped(bh) && bh->b_blocknr != 0) { 2147 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2148 mark_buffer_dirty(bh); 2148 mark_buffer_dirty(bh);
2149 } 2149 }
@@ -2367,7 +2367,7 @@ static int reiserfs_write_full_page(struct page *page,
2367 unlock_page(page); 2367 unlock_page(page);
2368 return 0; 2368 return 0;
2369 } 2369 }
2370 zero_user_page(page, last_offset, PAGE_CACHE_SIZE - last_offset, KM_USER0); 2370 zero_user_segment(page, last_offset, PAGE_CACHE_SIZE);
2371 } 2371 }
2372 bh = head; 2372 bh = head;
2373 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); 2373 block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 9416ead0c7aa..4e5c22ca802e 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -500,6 +500,13 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
500 struct smb_fattr root; 500 struct smb_fattr root;
501 int ver; 501 int ver;
502 void *mem; 502 void *mem;
503 static int warn_count;
504
505 if (warn_count < 5) {
506 warn_count++;
507 printk(KERN_EMERG "smbfs is deprecated and will be removed"
508 "from the 2.6.27 kernel. Please migrate to cifs\n");
509 }
503 510
504 if (!raw_data) 511 if (!raw_data)
505 goto out_no_data; 512 goto out_no_data;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 61983f3b107c..10c80b59ec4b 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -25,13 +25,15 @@ struct timerfd_ctx {
25 struct hrtimer tmr; 25 struct hrtimer tmr;
26 ktime_t tintv; 26 ktime_t tintv;
27 wait_queue_head_t wqh; 27 wait_queue_head_t wqh;
28 u64 ticks;
28 int expired; 29 int expired;
30 int clockid;
29}; 31};
30 32
31/* 33/*
32 * This gets called when the timer event triggers. We set the "expired" 34 * This gets called when the timer event triggers. We set the "expired"
33 * flag, but we do not re-arm the timer (in case it's necessary, 35 * flag, but we do not re-arm the timer (in case it's necessary,
34 * tintv.tv64 != 0) until the timer is read. 36 * tintv.tv64 != 0) until the timer is accessed.
35 */ 37 */
36static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) 38static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
37{ 39{
@@ -40,13 +42,24 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
40 42
41 spin_lock_irqsave(&ctx->wqh.lock, flags); 43 spin_lock_irqsave(&ctx->wqh.lock, flags);
42 ctx->expired = 1; 44 ctx->expired = 1;
45 ctx->ticks++;
43 wake_up_locked(&ctx->wqh); 46 wake_up_locked(&ctx->wqh);
44 spin_unlock_irqrestore(&ctx->wqh.lock, flags); 47 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
45 48
46 return HRTIMER_NORESTART; 49 return HRTIMER_NORESTART;
47} 50}
48 51
49static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags, 52static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
53{
54 ktime_t now, remaining;
55
56 now = ctx->tmr.base->get_time();
57 remaining = ktime_sub(ctx->tmr.expires, now);
58
59 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
60}
61
62static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
50 const struct itimerspec *ktmr) 63 const struct itimerspec *ktmr)
51{ 64{
52 enum hrtimer_mode htmode; 65 enum hrtimer_mode htmode;
@@ -57,8 +70,9 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags,
57 70
58 texp = timespec_to_ktime(ktmr->it_value); 71 texp = timespec_to_ktime(ktmr->it_value);
59 ctx->expired = 0; 72 ctx->expired = 0;
73 ctx->ticks = 0;
60 ctx->tintv = timespec_to_ktime(ktmr->it_interval); 74 ctx->tintv = timespec_to_ktime(ktmr->it_interval);
61 hrtimer_init(&ctx->tmr, clockid, htmode); 75 hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
62 ctx->tmr.expires = texp; 76 ctx->tmr.expires = texp;
63 ctx->tmr.function = timerfd_tmrproc; 77 ctx->tmr.function = timerfd_tmrproc;
64 if (texp.tv64 != 0) 78 if (texp.tv64 != 0)
@@ -83,7 +97,7 @@ static unsigned int timerfd_poll(struct file *file, poll_table *wait)
83 poll_wait(file, &ctx->wqh, wait); 97 poll_wait(file, &ctx->wqh, wait);
84 98
85 spin_lock_irqsave(&ctx->wqh.lock, flags); 99 spin_lock_irqsave(&ctx->wqh.lock, flags);
86 if (ctx->expired) 100 if (ctx->ticks)
87 events |= POLLIN; 101 events |= POLLIN;
88 spin_unlock_irqrestore(&ctx->wqh.lock, flags); 102 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
89 103
@@ -102,11 +116,11 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
102 return -EINVAL; 116 return -EINVAL;
103 spin_lock_irq(&ctx->wqh.lock); 117 spin_lock_irq(&ctx->wqh.lock);
104 res = -EAGAIN; 118 res = -EAGAIN;
105 if (!ctx->expired && !(file->f_flags & O_NONBLOCK)) { 119 if (!ctx->ticks && !(file->f_flags & O_NONBLOCK)) {
106 __add_wait_queue(&ctx->wqh, &wait); 120 __add_wait_queue(&ctx->wqh, &wait);
107 for (res = 0;;) { 121 for (res = 0;;) {
108 set_current_state(TASK_INTERRUPTIBLE); 122 set_current_state(TASK_INTERRUPTIBLE);
109 if (ctx->expired) { 123 if (ctx->ticks) {
110 res = 0; 124 res = 0;
111 break; 125 break;
112 } 126 }
@@ -121,22 +135,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
121 __remove_wait_queue(&ctx->wqh, &wait); 135 __remove_wait_queue(&ctx->wqh, &wait);
122 __set_current_state(TASK_RUNNING); 136 __set_current_state(TASK_RUNNING);
123 } 137 }
124 if (ctx->expired) { 138 if (ctx->ticks) {
125 ctx->expired = 0; 139 ticks = ctx->ticks;
126 if (ctx->tintv.tv64 != 0) { 140 if (ctx->expired && ctx->tintv.tv64) {
127 /* 141 /*
128 * If tintv.tv64 != 0, this is a periodic timer that 142 * If tintv.tv64 != 0, this is a periodic timer that
129 * needs to be re-armed. We avoid doing it in the timer 143 * needs to be re-armed. We avoid doing it in the timer
130 * callback to avoid DoS attacks specifying a very 144 * callback to avoid DoS attacks specifying a very
131 * short timer period. 145 * short timer period.
132 */ 146 */
133 ticks = (u64) 147 ticks += hrtimer_forward_now(&ctx->tmr,
134 hrtimer_forward(&ctx->tmr, 148 ctx->tintv) - 1;
135 hrtimer_cb_get_time(&ctx->tmr),
136 ctx->tintv);
137 hrtimer_restart(&ctx->tmr); 149 hrtimer_restart(&ctx->tmr);
138 } else 150 }
139 ticks = 1; 151 ctx->expired = 0;
152 ctx->ticks = 0;
140 } 153 }
141 spin_unlock_irq(&ctx->wqh.lock); 154 spin_unlock_irq(&ctx->wqh.lock);
142 if (ticks) 155 if (ticks)
@@ -150,76 +163,132 @@ static const struct file_operations timerfd_fops = {
150 .read = timerfd_read, 163 .read = timerfd_read,
151}; 164};
152 165
153asmlinkage long sys_timerfd(int ufd, int clockid, int flags, 166static struct file *timerfd_fget(int fd)
154 const struct itimerspec __user *utmr) 167{
168 struct file *file;
169
170 file = fget(fd);
171 if (!file)
172 return ERR_PTR(-EBADF);
173 if (file->f_op != &timerfd_fops) {
174 fput(file);
175 return ERR_PTR(-EINVAL);
176 }
177
178 return file;
179}
180
181asmlinkage long sys_timerfd_create(int clockid, int flags)
155{ 182{
156 int error; 183 int error, ufd;
157 struct timerfd_ctx *ctx; 184 struct timerfd_ctx *ctx;
158 struct file *file; 185 struct file *file;
159 struct inode *inode; 186 struct inode *inode;
160 struct itimerspec ktmr;
161
162 if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
163 return -EFAULT;
164 187
188 if (flags)
189 return -EINVAL;
165 if (clockid != CLOCK_MONOTONIC && 190 if (clockid != CLOCK_MONOTONIC &&
166 clockid != CLOCK_REALTIME) 191 clockid != CLOCK_REALTIME)
167 return -EINVAL; 192 return -EINVAL;
193
194 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
195 if (!ctx)
196 return -ENOMEM;
197
198 init_waitqueue_head(&ctx->wqh);
199 ctx->clockid = clockid;
200 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
201
202 error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]",
203 &timerfd_fops, ctx);
204 if (error) {
205 kfree(ctx);
206 return error;
207 }
208
209 return ufd;
210}
211
212asmlinkage long sys_timerfd_settime(int ufd, int flags,
213 const struct itimerspec __user *utmr,
214 struct itimerspec __user *otmr)
215{
216 struct file *file;
217 struct timerfd_ctx *ctx;
218 struct itimerspec ktmr, kotmr;
219
220 if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
221 return -EFAULT;
222
168 if (!timespec_valid(&ktmr.it_value) || 223 if (!timespec_valid(&ktmr.it_value) ||
169 !timespec_valid(&ktmr.it_interval)) 224 !timespec_valid(&ktmr.it_interval))
170 return -EINVAL; 225 return -EINVAL;
171 226
172 if (ufd == -1) { 227 file = timerfd_fget(ufd);
173 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 228 if (IS_ERR(file))
174 if (!ctx) 229 return PTR_ERR(file);
175 return -ENOMEM; 230 ctx = file->private_data;
176
177 init_waitqueue_head(&ctx->wqh);
178
179 timerfd_setup(ctx, clockid, flags, &ktmr);
180
181 /*
182 * When we call this, the initialization must be complete, since
183 * anon_inode_getfd() will install the fd.
184 */
185 error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]",
186 &timerfd_fops, ctx);
187 if (error)
188 goto err_tmrcancel;
189 } else {
190 file = fget(ufd);
191 if (!file)
192 return -EBADF;
193 ctx = file->private_data;
194 if (file->f_op != &timerfd_fops) {
195 fput(file);
196 return -EINVAL;
197 }
198 /*
199 * We need to stop the existing timer before reprogramming
200 * it to the new values.
201 */
202 for (;;) {
203 spin_lock_irq(&ctx->wqh.lock);
204 if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
205 break;
206 spin_unlock_irq(&ctx->wqh.lock);
207 cpu_relax();
208 }
209 /*
210 * Re-program the timer to the new value ...
211 */
212 timerfd_setup(ctx, clockid, flags, &ktmr);
213 231
232 /*
233 * We need to stop the existing timer before reprogramming
234 * it to the new values.
235 */
236 for (;;) {
237 spin_lock_irq(&ctx->wqh.lock);
238 if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
239 break;
214 spin_unlock_irq(&ctx->wqh.lock); 240 spin_unlock_irq(&ctx->wqh.lock);
215 fput(file); 241 cpu_relax();
216 } 242 }
217 243
218 return ufd; 244 /*
245 * If the timer is expired and it's periodic, we need to advance it
246 * because the caller may want to know the previous expiration time.
247 * We do not update "ticks" and "expired" since the timer will be
248 * re-programmed again in the following timerfd_setup() call.
249 */
250 if (ctx->expired && ctx->tintv.tv64)
251 hrtimer_forward_now(&ctx->tmr, ctx->tintv);
219 252
220err_tmrcancel: 253 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
221 hrtimer_cancel(&ctx->tmr); 254 kotmr.it_interval = ktime_to_timespec(ctx->tintv);
222 kfree(ctx); 255
223 return error; 256 /*
257 * Re-program the timer to the new value ...
258 */
259 timerfd_setup(ctx, flags, &ktmr);
260
261 spin_unlock_irq(&ctx->wqh.lock);
262 fput(file);
263 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
264 return -EFAULT;
265
266 return 0;
267}
268
269asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr)
270{
271 struct file *file;
272 struct timerfd_ctx *ctx;
273 struct itimerspec kotmr;
274
275 file = timerfd_fget(ufd);
276 if (IS_ERR(file))
277 return PTR_ERR(file);
278 ctx = file->private_data;
279
280 spin_lock_irq(&ctx->wqh.lock);
281 if (ctx->expired && ctx->tintv.tv64) {
282 ctx->expired = 0;
283 ctx->ticks +=
284 hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
285 hrtimer_restart(&ctx->tmr);
286 }
287 kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
288 kotmr.it_interval = ktime_to_timespec(ctx->tintv);
289 spin_unlock_irq(&ctx->wqh.lock);
290 fput(file);
291
292 return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
224} 293}
225 294
diff --git a/fs/xattr.c b/fs/xattr.c
index 6645b7313b33..f7c8f87bb390 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -105,6 +105,33 @@ out:
105EXPORT_SYMBOL_GPL(vfs_setxattr); 105EXPORT_SYMBOL_GPL(vfs_setxattr);
106 106
107ssize_t 107ssize_t
108xattr_getsecurity(struct inode *inode, const char *name, void *value,
109 size_t size)
110{
111 void *buffer = NULL;
112 ssize_t len;
113
114 if (!value || !size) {
115 len = security_inode_getsecurity(inode, name, &buffer, false);
116 goto out_noalloc;
117 }
118
119 len = security_inode_getsecurity(inode, name, &buffer, true);
120 if (len < 0)
121 return len;
122 if (size < len) {
123 len = -ERANGE;
124 goto out;
125 }
126 memcpy(value, buffer, len);
127out:
128 security_release_secctx(buffer, len);
129out_noalloc:
130 return len;
131}
132EXPORT_SYMBOL_GPL(xattr_getsecurity);
133
134ssize_t
108vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) 135vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size)
109{ 136{
110 struct inode *inode = dentry->d_inode; 137 struct inode *inode = dentry->d_inode;
@@ -118,23 +145,23 @@ vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size)
118 if (error) 145 if (error)
119 return error; 146 return error;
120 147
121 if (inode->i_op->getxattr)
122 error = inode->i_op->getxattr(dentry, name, value, size);
123 else
124 error = -EOPNOTSUPP;
125
126 if (!strncmp(name, XATTR_SECURITY_PREFIX, 148 if (!strncmp(name, XATTR_SECURITY_PREFIX,
127 XATTR_SECURITY_PREFIX_LEN)) { 149 XATTR_SECURITY_PREFIX_LEN)) {
128 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; 150 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
129 int ret = security_inode_getsecurity(inode, suffix, value, 151 int ret = xattr_getsecurity(inode, suffix, value, size);
130 size, error);
131 /* 152 /*
132 * Only overwrite the return value if a security module 153 * Only overwrite the return value if a security module
133 * is actually active. 154 * is actually active.
134 */ 155 */
135 if (ret != -EOPNOTSUPP) 156 if (ret == -EOPNOTSUPP)
136 error = ret; 157 goto nolsm;
158 return ret;
137 } 159 }
160nolsm:
161 if (inode->i_op->getxattr)
162 error = inode->i_op->getxattr(dentry, name, value, size);
163 else
164 error = -EOPNOTSUPP;
138 165
139 return error; 166 return error;
140} 167}
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index ed2b16dff914..e040f1ce1b6a 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -92,8 +92,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
92void 92void
93kmem_free(void *ptr, size_t size) 93kmem_free(void *ptr, size_t size)
94{ 94{
95 if (((unsigned long)ptr < VMALLOC_START) || 95 if (!is_vmalloc_addr(ptr)) {
96 ((unsigned long)ptr >= VMALLOC_END)) {
97 kfree(ptr); 96 kfree(ptr);
98 } else { 97 } else {
99 vfree(ptr); 98 vfree(ptr);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index a49dd8d4b069..0382c19d6523 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -709,8 +709,7 @@ static inline struct page *
709mem_to_page( 709mem_to_page(
710 void *addr) 710 void *addr)
711{ 711{
712 if (((unsigned long)addr < VMALLOC_START) || 712 if ((!is_vmalloc_addr(addr))) {
713 ((unsigned long)addr >= VMALLOC_END)) {
714 return virt_to_page(addr); 713 return virt_to_page(addr);
715 } else { 714 } else {
716 return vmalloc_to_page(addr); 715 return vmalloc_to_page(addr);
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index d6a8dddb2268..6f614f35f650 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -155,7 +155,7 @@ xfs_iozero(
155 if (status) 155 if (status)
156 break; 156 break;
157 157
158 zero_user_page(page, offset, bytes, KM_USER0); 158 zero_user(page, offset, bytes);
159 159
160 status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, 160 status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
161 page, fsdata); 161 page, fsdata);