diff options
Diffstat (limited to 'fs')
37 files changed, 922 insertions, 519 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 456c9ab7705b..826baf4f04bc 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1798,7 +1798,7 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) | |||
1798 | start = max(from, block_start); | 1798 | start = max(from, block_start); |
1799 | size = min(to, block_end) - start; | 1799 | size = min(to, block_end) - start; |
1800 | 1800 | ||
1801 | zero_user_page(page, start, size, KM_USER0); | 1801 | zero_user(page, start, size); |
1802 | set_buffer_uptodate(bh); | 1802 | set_buffer_uptodate(bh); |
1803 | } | 1803 | } |
1804 | 1804 | ||
@@ -1861,19 +1861,10 @@ static int __block_prepare_write(struct inode *inode, struct page *page, | |||
1861 | mark_buffer_dirty(bh); | 1861 | mark_buffer_dirty(bh); |
1862 | continue; | 1862 | continue; |
1863 | } | 1863 | } |
1864 | if (block_end > to || block_start < from) { | 1864 | if (block_end > to || block_start < from) |
1865 | void *kaddr; | 1865 | zero_user_segments(page, |
1866 | 1866 | to, block_end, | |
1867 | kaddr = kmap_atomic(page, KM_USER0); | 1867 | block_start, from); |
1868 | if (block_end > to) | ||
1869 | memset(kaddr+to, 0, | ||
1870 | block_end-to); | ||
1871 | if (block_start < from) | ||
1872 | memset(kaddr+block_start, | ||
1873 | 0, from-block_start); | ||
1874 | flush_dcache_page(page); | ||
1875 | kunmap_atomic(kaddr, KM_USER0); | ||
1876 | } | ||
1877 | continue; | 1868 | continue; |
1878 | } | 1869 | } |
1879 | } | 1870 | } |
@@ -2104,8 +2095,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block) | |||
2104 | SetPageError(page); | 2095 | SetPageError(page); |
2105 | } | 2096 | } |
2106 | if (!buffer_mapped(bh)) { | 2097 | if (!buffer_mapped(bh)) { |
2107 | zero_user_page(page, i * blocksize, blocksize, | 2098 | zero_user(page, i * blocksize, blocksize); |
2108 | KM_USER0); | ||
2109 | if (!err) | 2099 | if (!err) |
2110 | set_buffer_uptodate(bh); | 2100 | set_buffer_uptodate(bh); |
2111 | continue; | 2101 | continue; |
@@ -2218,7 +2208,7 @@ int cont_expand_zero(struct file *file, struct address_space *mapping, | |||
2218 | &page, &fsdata); | 2208 | &page, &fsdata); |
2219 | if (err) | 2209 | if (err) |
2220 | goto out; | 2210 | goto out; |
2221 | zero_user_page(page, zerofrom, len, KM_USER0); | 2211 | zero_user(page, zerofrom, len); |
2222 | err = pagecache_write_end(file, mapping, curpos, len, len, | 2212 | err = pagecache_write_end(file, mapping, curpos, len, len, |
2223 | page, fsdata); | 2213 | page, fsdata); |
2224 | if (err < 0) | 2214 | if (err < 0) |
@@ -2245,7 +2235,7 @@ int cont_expand_zero(struct file *file, struct address_space *mapping, | |||
2245 | &page, &fsdata); | 2235 | &page, &fsdata); |
2246 | if (err) | 2236 | if (err) |
2247 | goto out; | 2237 | goto out; |
2248 | zero_user_page(page, zerofrom, len, KM_USER0); | 2238 | zero_user(page, zerofrom, len); |
2249 | err = pagecache_write_end(file, mapping, curpos, len, len, | 2239 | err = pagecache_write_end(file, mapping, curpos, len, len, |
2250 | page, fsdata); | 2240 | page, fsdata); |
2251 | if (err < 0) | 2241 | if (err < 0) |
@@ -2422,7 +2412,6 @@ int nobh_write_begin(struct file *file, struct address_space *mapping, | |||
2422 | unsigned block_in_page; | 2412 | unsigned block_in_page; |
2423 | unsigned block_start, block_end; | 2413 | unsigned block_start, block_end; |
2424 | sector_t block_in_file; | 2414 | sector_t block_in_file; |
2425 | char *kaddr; | ||
2426 | int nr_reads = 0; | 2415 | int nr_reads = 0; |
2427 | int ret = 0; | 2416 | int ret = 0; |
2428 | int is_mapped_to_disk = 1; | 2417 | int is_mapped_to_disk = 1; |
@@ -2493,13 +2482,8 @@ int nobh_write_begin(struct file *file, struct address_space *mapping, | |||
2493 | continue; | 2482 | continue; |
2494 | } | 2483 | } |
2495 | if (buffer_new(bh) || !buffer_mapped(bh)) { | 2484 | if (buffer_new(bh) || !buffer_mapped(bh)) { |
2496 | kaddr = kmap_atomic(page, KM_USER0); | 2485 | zero_user_segments(page, block_start, from, |
2497 | if (block_start < from) | 2486 | to, block_end); |
2498 | memset(kaddr+block_start, 0, from-block_start); | ||
2499 | if (block_end > to) | ||
2500 | memset(kaddr + to, 0, block_end - to); | ||
2501 | flush_dcache_page(page); | ||
2502 | kunmap_atomic(kaddr, KM_USER0); | ||
2503 | continue; | 2487 | continue; |
2504 | } | 2488 | } |
2505 | if (buffer_uptodate(bh)) | 2489 | if (buffer_uptodate(bh)) |
@@ -2636,7 +2620,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block, | |||
2636 | * the page size, the remaining memory is zeroed when mapped, and | 2620 | * the page size, the remaining memory is zeroed when mapped, and |
2637 | * writes to that region are not written out to the file." | 2621 | * writes to that region are not written out to the file." |
2638 | */ | 2622 | */ |
2639 | zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); | 2623 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); |
2640 | out: | 2624 | out: |
2641 | ret = mpage_writepage(page, get_block, wbc); | 2625 | ret = mpage_writepage(page, get_block, wbc); |
2642 | if (ret == -EAGAIN) | 2626 | if (ret == -EAGAIN) |
@@ -2709,7 +2693,7 @@ has_buffers: | |||
2709 | if (page_has_buffers(page)) | 2693 | if (page_has_buffers(page)) |
2710 | goto has_buffers; | 2694 | goto has_buffers; |
2711 | } | 2695 | } |
2712 | zero_user_page(page, offset, length, KM_USER0); | 2696 | zero_user(page, offset, length); |
2713 | set_page_dirty(page); | 2697 | set_page_dirty(page); |
2714 | err = 0; | 2698 | err = 0; |
2715 | 2699 | ||
@@ -2785,7 +2769,7 @@ int block_truncate_page(struct address_space *mapping, | |||
2785 | goto unlock; | 2769 | goto unlock; |
2786 | } | 2770 | } |
2787 | 2771 | ||
2788 | zero_user_page(page, offset, length, KM_USER0); | 2772 | zero_user(page, offset, length); |
2789 | mark_buffer_dirty(bh); | 2773 | mark_buffer_dirty(bh); |
2790 | err = 0; | 2774 | err = 0; |
2791 | 2775 | ||
@@ -2831,7 +2815,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block, | |||
2831 | * the page size, the remaining memory is zeroed when mapped, and | 2815 | * the page size, the remaining memory is zeroed when mapped, and |
2832 | * writes to that region are not written out to the file." | 2816 | * writes to that region are not written out to the file." |
2833 | */ | 2817 | */ |
2834 | zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); | 2818 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); |
2835 | return __block_write_full_page(inode, page, get_block, wbc); | 2819 | return __block_write_full_page(inode, page, get_block, wbc); |
2836 | } | 2820 | } |
2837 | 2821 | ||
@@ -3169,7 +3153,7 @@ static void recalc_bh_state(void) | |||
3169 | 3153 | ||
3170 | struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) | 3154 | struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) |
3171 | { | 3155 | { |
3172 | struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, | 3156 | struct buffer_head *ret = kmem_cache_alloc(bh_cachep, |
3173 | set_migrateflags(gfp_flags, __GFP_RECLAIMABLE)); | 3157 | set_migrateflags(gfp_flags, __GFP_RECLAIMABLE)); |
3174 | if (ret) { | 3158 | if (ret) { |
3175 | INIT_LIST_HEAD(&ret->b_assoc_buffers); | 3159 | INIT_LIST_HEAD(&ret->b_assoc_buffers); |
@@ -3257,12 +3241,24 @@ int bh_submit_read(struct buffer_head *bh) | |||
3257 | } | 3241 | } |
3258 | EXPORT_SYMBOL(bh_submit_read); | 3242 | EXPORT_SYMBOL(bh_submit_read); |
3259 | 3243 | ||
3244 | static void | ||
3245 | init_buffer_head(struct kmem_cache *cachep, void *data) | ||
3246 | { | ||
3247 | struct buffer_head *bh = data; | ||
3248 | |||
3249 | memset(bh, 0, sizeof(*bh)); | ||
3250 | INIT_LIST_HEAD(&bh->b_assoc_buffers); | ||
3251 | } | ||
3252 | |||
3260 | void __init buffer_init(void) | 3253 | void __init buffer_init(void) |
3261 | { | 3254 | { |
3262 | int nrpages; | 3255 | int nrpages; |
3263 | 3256 | ||
3264 | bh_cachep = KMEM_CACHE(buffer_head, | 3257 | bh_cachep = kmem_cache_create("buffer_head", |
3265 | SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); | 3258 | sizeof(struct buffer_head), 0, |
3259 | (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| | ||
3260 | SLAB_MEM_SPREAD), | ||
3261 | init_buffer_head); | ||
3266 | 3262 | ||
3267 | /* | 3263 | /* |
3268 | * Limit the bh occupancy to 10% of ZONE_NORMAL | 3264 | * Limit the bh occupancy to 10% of ZONE_NORMAL |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d9567ba2960b..47f2621001e4 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -1386,7 +1386,7 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from) | |||
1386 | if (!page) | 1386 | if (!page) |
1387 | return -ENOMEM; | 1387 | return -ENOMEM; |
1388 | 1388 | ||
1389 | zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); | 1389 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); |
1390 | unlock_page(page); | 1390 | unlock_page(page); |
1391 | page_cache_release(page); | 1391 | page_cache_release(page); |
1392 | return rc; | 1392 | return rc; |
diff --git a/fs/compat.c b/fs/compat.c index 5216c3fd7517..69baca5ad608 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -2206,19 +2206,41 @@ asmlinkage long compat_sys_signalfd(int ufd, | |||
2206 | 2206 | ||
2207 | #ifdef CONFIG_TIMERFD | 2207 | #ifdef CONFIG_TIMERFD |
2208 | 2208 | ||
2209 | asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags, | 2209 | asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, |
2210 | const struct compat_itimerspec __user *utmr) | 2210 | const struct compat_itimerspec __user *utmr, |
2211 | struct compat_itimerspec __user *otmr) | ||
2211 | { | 2212 | { |
2213 | int error; | ||
2212 | struct itimerspec t; | 2214 | struct itimerspec t; |
2213 | struct itimerspec __user *ut; | 2215 | struct itimerspec __user *ut; |
2214 | 2216 | ||
2215 | if (get_compat_itimerspec(&t, utmr)) | 2217 | if (get_compat_itimerspec(&t, utmr)) |
2216 | return -EFAULT; | 2218 | return -EFAULT; |
2217 | ut = compat_alloc_user_space(sizeof(*ut)); | 2219 | ut = compat_alloc_user_space(2 * sizeof(struct itimerspec)); |
2218 | if (copy_to_user(ut, &t, sizeof(t))) | 2220 | if (copy_to_user(&ut[0], &t, sizeof(t))) |
2219 | return -EFAULT; | 2221 | return -EFAULT; |
2222 | error = sys_timerfd_settime(ufd, flags, &ut[0], &ut[1]); | ||
2223 | if (!error && otmr) | ||
2224 | error = (copy_from_user(&t, &ut[1], sizeof(struct itimerspec)) || | ||
2225 | put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0; | ||
2226 | |||
2227 | return error; | ||
2228 | } | ||
2229 | |||
2230 | asmlinkage long compat_sys_timerfd_gettime(int ufd, | ||
2231 | struct compat_itimerspec __user *otmr) | ||
2232 | { | ||
2233 | int error; | ||
2234 | struct itimerspec t; | ||
2235 | struct itimerspec __user *ut; | ||
2220 | 2236 | ||
2221 | return sys_timerfd(ufd, clockid, flags, ut); | 2237 | ut = compat_alloc_user_space(sizeof(struct itimerspec)); |
2238 | error = sys_timerfd_gettime(ufd, ut); | ||
2239 | if (!error) | ||
2240 | error = (copy_from_user(&t, ut, sizeof(struct itimerspec)) || | ||
2241 | put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0; | ||
2242 | |||
2243 | return error; | ||
2222 | } | 2244 | } |
2223 | 2245 | ||
2224 | #endif /* CONFIG_TIMERFD */ | 2246 | #endif /* CONFIG_TIMERFD */ |
diff --git a/fs/direct-io.c b/fs/direct-io.c index acf0da1bd257..9e81addbd6ea 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -878,8 +878,8 @@ do_holes: | |||
878 | page_cache_release(page); | 878 | page_cache_release(page); |
879 | goto out; | 879 | goto out; |
880 | } | 880 | } |
881 | zero_user_page(page, block_in_page << blkbits, | 881 | zero_user(page, block_in_page << blkbits, |
882 | 1 << blkbits, KM_USER0); | 882 | 1 << blkbits); |
883 | dio->block_in_file++; | 883 | dio->block_in_file++; |
884 | block_in_page++; | 884 | block_in_page++; |
885 | goto next_block; | 885 | goto next_block; |
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 32c5711d79a3..0535412d8c64 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c | |||
@@ -257,8 +257,7 @@ static int fill_zeros_to_end_of_page(struct page *page, unsigned int to) | |||
257 | end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE; | 257 | end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE; |
258 | if (to > end_byte_in_page) | 258 | if (to > end_byte_in_page) |
259 | end_byte_in_page = to; | 259 | end_byte_in_page = to; |
260 | zero_user_page(page, end_byte_in_page, | 260 | zero_user_segment(page, end_byte_in_page, PAGE_CACHE_SIZE); |
261 | PAGE_CACHE_SIZE - end_byte_in_page, KM_USER0); | ||
262 | out: | 261 | out: |
263 | return 0; | 262 | return 0; |
264 | } | 263 | } |
@@ -307,7 +306,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, | |||
307 | */ | 306 | */ |
308 | if ((i_size_read(page->mapping->host) == prev_page_end_size) && | 307 | if ((i_size_read(page->mapping->host) == prev_page_end_size) && |
309 | (from != 0)) { | 308 | (from != 0)) { |
310 | zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); | 309 | zero_user(page, 0, PAGE_CACHE_SIZE); |
311 | } | 310 | } |
312 | out: | 311 | out: |
313 | return rc; | 312 | return rc; |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 81c04abfb1aa..a415f42d32cf 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -353,7 +353,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) | |||
353 | spin_unlock_irqrestore(&psw->lock, flags); | 353 | spin_unlock_irqrestore(&psw->lock, flags); |
354 | 354 | ||
355 | /* Do really wake up now */ | 355 | /* Do really wake up now */ |
356 | wake_up(wq); | 356 | wake_up_nested(wq, 1 + wake_nests); |
357 | 357 | ||
358 | /* Remove the current task from the list */ | 358 | /* Remove the current task from the list */ |
359 | spin_lock_irqsave(&psw->lock, flags); | 359 | spin_lock_irqsave(&psw->lock, flags); |
@@ -760,7 +760,7 @@ static int de_thread(struct task_struct *tsk) | |||
760 | */ | 760 | */ |
761 | read_lock(&tasklist_lock); | 761 | read_lock(&tasklist_lock); |
762 | spin_lock_irq(lock); | 762 | spin_lock_irq(lock); |
763 | if (sig->flags & SIGNAL_GROUP_EXIT) { | 763 | if (signal_group_exit(sig)) { |
764 | /* | 764 | /* |
765 | * Another group action in progress, just | 765 | * Another group action in progress, just |
766 | * return so that the signal is processed. | 766 | * return so that the signal is processed. |
@@ -778,6 +778,7 @@ static int de_thread(struct task_struct *tsk) | |||
778 | if (unlikely(tsk->group_leader == task_child_reaper(tsk))) | 778 | if (unlikely(tsk->group_leader == task_child_reaper(tsk))) |
779 | task_active_pid_ns(tsk)->child_reaper = tsk; | 779 | task_active_pid_ns(tsk)->child_reaper = tsk; |
780 | 780 | ||
781 | sig->group_exit_task = tsk; | ||
781 | zap_other_threads(tsk); | 782 | zap_other_threads(tsk); |
782 | read_unlock(&tasklist_lock); | 783 | read_unlock(&tasklist_lock); |
783 | 784 | ||
@@ -802,7 +803,6 @@ static int de_thread(struct task_struct *tsk) | |||
802 | } | 803 | } |
803 | 804 | ||
804 | sig->notify_count = count; | 805 | sig->notify_count = count; |
805 | sig->group_exit_task = tsk; | ||
806 | while (atomic_read(&sig->count) > count) { | 806 | while (atomic_read(&sig->count) > count) { |
807 | __set_current_state(TASK_UNINTERRUPTIBLE); | 807 | __set_current_state(TASK_UNINTERRUPTIBLE); |
808 | spin_unlock_irq(lock); | 808 | spin_unlock_irq(lock); |
@@ -871,15 +871,10 @@ static int de_thread(struct task_struct *tsk) | |||
871 | leader->exit_state = EXIT_DEAD; | 871 | leader->exit_state = EXIT_DEAD; |
872 | 872 | ||
873 | write_unlock_irq(&tasklist_lock); | 873 | write_unlock_irq(&tasklist_lock); |
874 | } | 874 | } |
875 | 875 | ||
876 | sig->group_exit_task = NULL; | 876 | sig->group_exit_task = NULL; |
877 | sig->notify_count = 0; | 877 | sig->notify_count = 0; |
878 | /* | ||
879 | * There may be one thread left which is just exiting, | ||
880 | * but it's safe to stop telling the group to kill themselves. | ||
881 | */ | ||
882 | sig->flags = 0; | ||
883 | 878 | ||
884 | no_thread_group: | 879 | no_thread_group: |
885 | exit_itimers(sig); | 880 | exit_itimers(sig); |
@@ -947,12 +942,13 @@ static void flush_old_files(struct files_struct * files) | |||
947 | spin_unlock(&files->file_lock); | 942 | spin_unlock(&files->file_lock); |
948 | } | 943 | } |
949 | 944 | ||
950 | void get_task_comm(char *buf, struct task_struct *tsk) | 945 | char *get_task_comm(char *buf, struct task_struct *tsk) |
951 | { | 946 | { |
952 | /* buf must be at least sizeof(tsk->comm) in size */ | 947 | /* buf must be at least sizeof(tsk->comm) in size */ |
953 | task_lock(tsk); | 948 | task_lock(tsk); |
954 | strncpy(buf, tsk->comm, sizeof(tsk->comm)); | 949 | strncpy(buf, tsk->comm, sizeof(tsk->comm)); |
955 | task_unlock(tsk); | 950 | task_unlock(tsk); |
951 | return buf; | ||
956 | } | 952 | } |
957 | 953 | ||
958 | void set_task_comm(struct task_struct *tsk, char *buf) | 954 | void set_task_comm(struct task_struct *tsk, char *buf) |
@@ -1548,7 +1544,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, | |||
1548 | int err = -EAGAIN; | 1544 | int err = -EAGAIN; |
1549 | 1545 | ||
1550 | spin_lock_irq(&tsk->sighand->siglock); | 1546 | spin_lock_irq(&tsk->sighand->siglock); |
1551 | if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) { | 1547 | if (!signal_group_exit(tsk->signal)) { |
1552 | tsk->signal->group_exit_code = exit_code; | 1548 | tsk->signal->group_exit_code = exit_code; |
1553 | zap_process(tsk); | 1549 | zap_process(tsk); |
1554 | err = 0; | 1550 | err = 0; |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 9b162cd6c16c..077535439288 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1845,7 +1845,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, | |||
1845 | */ | 1845 | */ |
1846 | if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && | 1846 | if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && |
1847 | ext3_should_writeback_data(inode) && PageUptodate(page)) { | 1847 | ext3_should_writeback_data(inode) && PageUptodate(page)) { |
1848 | zero_user_page(page, offset, length, KM_USER0); | 1848 | zero_user(page, offset, length); |
1849 | set_page_dirty(page); | 1849 | set_page_dirty(page); |
1850 | goto unlock; | 1850 | goto unlock; |
1851 | } | 1851 | } |
@@ -1898,7 +1898,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, | |||
1898 | goto unlock; | 1898 | goto unlock; |
1899 | } | 1899 | } |
1900 | 1900 | ||
1901 | zero_user_page(page, offset, length, KM_USER0); | 1901 | zero_user(page, offset, length); |
1902 | BUFFER_TRACE(bh, "zeroed end of block"); | 1902 | BUFFER_TRACE(bh, "zeroed end of block"); |
1903 | 1903 | ||
1904 | err = 0; | 1904 | err = 0; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bb717cbb749c..05c4145dd27d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1840,7 +1840,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, | |||
1840 | */ | 1840 | */ |
1841 | if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && | 1841 | if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && |
1842 | ext4_should_writeback_data(inode) && PageUptodate(page)) { | 1842 | ext4_should_writeback_data(inode) && PageUptodate(page)) { |
1843 | zero_user_page(page, offset, length, KM_USER0); | 1843 | zero_user(page, offset, length); |
1844 | set_page_dirty(page); | 1844 | set_page_dirty(page); |
1845 | goto unlock; | 1845 | goto unlock; |
1846 | } | 1846 | } |
@@ -1893,7 +1893,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, | |||
1893 | goto unlock; | 1893 | goto unlock; |
1894 | } | 1894 | } |
1895 | 1895 | ||
1896 | zero_user_page(page, offset, length, KM_USER0); | 1896 | zero_user(page, offset, length); |
1897 | 1897 | ||
1898 | BUFFER_TRACE(bh, "zeroed end of block"); | 1898 | BUFFER_TRACE(bh, "zeroed end of block"); |
1899 | 1899 | ||
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 300324bd563c..0b3064079fa5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -284,7 +284,17 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
284 | * soon as the queue becomes uncongested. | 284 | * soon as the queue becomes uncongested. |
285 | */ | 285 | */ |
286 | inode->i_state |= I_DIRTY_PAGES; | 286 | inode->i_state |= I_DIRTY_PAGES; |
287 | requeue_io(inode); | 287 | if (wbc->nr_to_write <= 0) { |
288 | /* | ||
289 | * slice used up: queue for next turn | ||
290 | */ | ||
291 | requeue_io(inode); | ||
292 | } else { | ||
293 | /* | ||
294 | * somehow blocked: retry later | ||
295 | */ | ||
296 | redirty_tail(inode); | ||
297 | } | ||
288 | } else { | 298 | } else { |
289 | /* | 299 | /* |
290 | * Otherwise fully redirty the inode so that | 300 | * Otherwise fully redirty the inode so that |
@@ -334,9 +344,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
334 | WARN_ON(inode->i_state & I_WILL_FREE); | 344 | WARN_ON(inode->i_state & I_WILL_FREE); |
335 | 345 | ||
336 | if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { | 346 | if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { |
337 | struct address_space *mapping = inode->i_mapping; | ||
338 | int ret; | ||
339 | |||
340 | /* | 347 | /* |
341 | * We're skipping this inode because it's locked, and we're not | 348 | * We're skipping this inode because it's locked, and we're not |
342 | * doing writeback-for-data-integrity. Move it to s_more_io so | 349 | * doing writeback-for-data-integrity. Move it to s_more_io so |
@@ -345,15 +352,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
345 | * completed a full scan of s_io. | 352 | * completed a full scan of s_io. |
346 | */ | 353 | */ |
347 | requeue_io(inode); | 354 | requeue_io(inode); |
348 | 355 | return 0; | |
349 | /* | ||
350 | * Even if we don't actually write the inode itself here, | ||
351 | * we can at least start some of the data writeout.. | ||
352 | */ | ||
353 | spin_unlock(&inode_lock); | ||
354 | ret = do_writepages(mapping, wbc); | ||
355 | spin_lock(&inode_lock); | ||
356 | return ret; | ||
357 | } | 356 | } |
358 | 357 | ||
359 | /* | 358 | /* |
@@ -479,8 +478,12 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) | |||
479 | iput(inode); | 478 | iput(inode); |
480 | cond_resched(); | 479 | cond_resched(); |
481 | spin_lock(&inode_lock); | 480 | spin_lock(&inode_lock); |
482 | if (wbc->nr_to_write <= 0) | 481 | if (wbc->nr_to_write <= 0) { |
482 | wbc->more_io = 1; | ||
483 | break; | 483 | break; |
484 | } | ||
485 | if (!list_empty(&sb->s_more_io)) | ||
486 | wbc->more_io = 1; | ||
484 | } | 487 | } |
485 | return; /* Leave any unwritten inodes on s_io */ | 488 | return; /* Leave any unwritten inodes on s_io */ |
486 | } | 489 | } |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index e4effc47abfc..e9456ebd3bb6 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -932,7 +932,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping) | |||
932 | if (!gfs2_is_writeback(ip)) | 932 | if (!gfs2_is_writeback(ip)) |
933 | gfs2_trans_add_bh(ip->i_gl, bh, 0); | 933 | gfs2_trans_add_bh(ip->i_gl, bh, 0); |
934 | 934 | ||
935 | zero_user_page(page, offset, length, KM_USER0); | 935 | zero_user(page, offset, length); |
936 | 936 | ||
937 | unlock: | 937 | unlock: |
938 | unlock_page(page); | 938 | unlock_page(page); |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 38dbe99a30ed..ac772b6d9dbb 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -446,7 +446,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) | |||
446 | * so we need to supply one here. It doesn't happen often. | 446 | * so we need to supply one here. It doesn't happen often. |
447 | */ | 447 | */ |
448 | if (unlikely(page->index)) { | 448 | if (unlikely(page->index)) { |
449 | zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); | 449 | zero_user(page, 0, PAGE_CACHE_SIZE); |
450 | return 0; | 450 | return 0; |
451 | } | 451 | } |
452 | 452 | ||
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 35c1a9f33f47..53fd0a67c11a 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c | |||
@@ -285,17 +285,17 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) | |||
285 | return err; | 285 | return err; |
286 | 286 | ||
287 | times[0].tv_sec = atime_ts.tv_sec; | 287 | times[0].tv_sec = atime_ts.tv_sec; |
288 | times[0].tv_usec = atime_ts.tv_nsec * 1000; | 288 | times[0].tv_usec = atime_ts.tv_nsec / 1000; |
289 | times[1].tv_sec = mtime_ts.tv_sec; | 289 | times[1].tv_sec = mtime_ts.tv_sec; |
290 | times[1].tv_usec = mtime_ts.tv_nsec * 1000; | 290 | times[1].tv_usec = mtime_ts.tv_nsec / 1000; |
291 | 291 | ||
292 | if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) { | 292 | if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) { |
293 | times[0].tv_sec = attrs->ia_atime.tv_sec; | 293 | times[0].tv_sec = attrs->ia_atime.tv_sec; |
294 | times[0].tv_usec = attrs->ia_atime.tv_nsec * 1000; | 294 | times[0].tv_usec = attrs->ia_atime.tv_nsec / 1000; |
295 | } | 295 | } |
296 | if (attrs->ia_valid & HOSTFS_ATTR_MTIME_SET) { | 296 | if (attrs->ia_valid & HOSTFS_ATTR_MTIME_SET) { |
297 | times[1].tv_sec = attrs->ia_mtime.tv_sec; | 297 | times[1].tv_sec = attrs->ia_mtime.tv_sec; |
298 | times[1].tv_usec = attrs->ia_mtime.tv_nsec * 1000; | 298 | times[1].tv_usec = attrs->ia_mtime.tv_nsec / 1000; |
299 | } | 299 | } |
300 | 300 | ||
301 | if (fd >= 0) { | 301 | if (fd >= 0) { |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 09ee07f02663..3b3cc28cdefc 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -768,7 +768,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | |||
768 | case Opt_mode: | 768 | case Opt_mode: |
769 | if (match_octal(&args[0], &option)) | 769 | if (match_octal(&args[0], &option)) |
770 | goto bad_val; | 770 | goto bad_val; |
771 | pconfig->mode = option & 0777U; | 771 | pconfig->mode = option & 01777U; |
772 | break; | 772 | break; |
773 | 773 | ||
774 | case Opt_size: { | 774 | case Opt_size: { |
diff --git a/fs/libfs.c b/fs/libfs.c index 6e68b700958d..5523bde96387 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -341,13 +341,10 @@ int simple_prepare_write(struct file *file, struct page *page, | |||
341 | unsigned from, unsigned to) | 341 | unsigned from, unsigned to) |
342 | { | 342 | { |
343 | if (!PageUptodate(page)) { | 343 | if (!PageUptodate(page)) { |
344 | if (to - from != PAGE_CACHE_SIZE) { | 344 | if (to - from != PAGE_CACHE_SIZE) |
345 | void *kaddr = kmap_atomic(page, KM_USER0); | 345 | zero_user_segments(page, |
346 | memset(kaddr, 0, from); | 346 | 0, from, |
347 | memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); | 347 | to, PAGE_CACHE_SIZE); |
348 | flush_dcache_page(page); | ||
349 | kunmap_atomic(kaddr, KM_USER0); | ||
350 | } | ||
351 | } | 348 | } |
352 | return 0; | 349 | return 0; |
353 | } | 350 | } |
diff --git a/fs/mpage.c b/fs/mpage.c index d54f8f897224..5df564366f36 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -276,9 +276,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, | |||
276 | } | 276 | } |
277 | 277 | ||
278 | if (first_hole != blocks_per_page) { | 278 | if (first_hole != blocks_per_page) { |
279 | zero_user_page(page, first_hole << blkbits, | 279 | zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE); |
280 | PAGE_CACHE_SIZE - (first_hole << blkbits), | ||
281 | KM_USER0); | ||
282 | if (first_hole == 0) { | 280 | if (first_hole == 0) { |
283 | SetPageUptodate(page); | 281 | SetPageUptodate(page); |
284 | unlock_page(page); | 282 | unlock_page(page); |
@@ -571,8 +569,7 @@ page_is_mapped: | |||
571 | 569 | ||
572 | if (page->index > end_index || !offset) | 570 | if (page->index > end_index || !offset) |
573 | goto confused; | 571 | goto confused; |
574 | zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, | 572 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); |
575 | KM_USER0); | ||
576 | } | 573 | } |
577 | 574 | ||
578 | /* | 575 | /* |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 8fd6dfbe1bc3..3d7d9631e125 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -79,7 +79,7 @@ void nfs_readdata_release(void *data) | |||
79 | static | 79 | static |
80 | int nfs_return_empty_page(struct page *page) | 80 | int nfs_return_empty_page(struct page *page) |
81 | { | 81 | { |
82 | zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); | 82 | zero_user(page, 0, PAGE_CACHE_SIZE); |
83 | SetPageUptodate(page); | 83 | SetPageUptodate(page); |
84 | unlock_page(page); | 84 | unlock_page(page); |
85 | return 0; | 85 | return 0; |
@@ -103,10 +103,10 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) | |||
103 | pglen = PAGE_CACHE_SIZE - base; | 103 | pglen = PAGE_CACHE_SIZE - base; |
104 | for (;;) { | 104 | for (;;) { |
105 | if (remainder <= pglen) { | 105 | if (remainder <= pglen) { |
106 | zero_user_page(*pages, base, remainder, KM_USER0); | 106 | zero_user(*pages, base, remainder); |
107 | break; | 107 | break; |
108 | } | 108 | } |
109 | zero_user_page(*pages, base, pglen, KM_USER0); | 109 | zero_user(*pages, base, pglen); |
110 | pages++; | 110 | pages++; |
111 | remainder -= pglen; | 111 | remainder -= pglen; |
112 | pglen = PAGE_CACHE_SIZE; | 112 | pglen = PAGE_CACHE_SIZE; |
@@ -130,7 +130,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
130 | return PTR_ERR(new); | 130 | return PTR_ERR(new); |
131 | } | 131 | } |
132 | if (len < PAGE_CACHE_SIZE) | 132 | if (len < PAGE_CACHE_SIZE) |
133 | zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); | 133 | zero_user_segment(page, len, PAGE_CACHE_SIZE); |
134 | 134 | ||
135 | nfs_list_add_request(new, &one_request); | 135 | nfs_list_add_request(new, &one_request); |
136 | if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) | 136 | if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) |
@@ -532,7 +532,7 @@ readpage_async_filler(void *data, struct page *page) | |||
532 | goto out_error; | 532 | goto out_error; |
533 | 533 | ||
534 | if (len < PAGE_CACHE_SIZE) | 534 | if (len < PAGE_CACHE_SIZE) |
535 | zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); | 535 | zero_user_segment(page, len, PAGE_CACHE_SIZE); |
536 | nfs_pageio_add_request(desc->pgio, new); | 536 | nfs_pageio_add_request(desc->pgio, new); |
537 | return 0; | 537 | return 0; |
538 | out_error: | 538 | out_error: |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 522efff3e2c5..b144b1957dd9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -665,9 +665,7 @@ zero_page: | |||
665 | * then we need to zero any uninitalised data. */ | 665 | * then we need to zero any uninitalised data. */ |
666 | if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE | 666 | if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE |
667 | && !PageUptodate(req->wb_page)) | 667 | && !PageUptodate(req->wb_page)) |
668 | zero_user_page(req->wb_page, req->wb_bytes, | 668 | zero_user_segment(req->wb_page, req->wb_bytes, PAGE_CACHE_SIZE); |
669 | PAGE_CACHE_SIZE - req->wb_bytes, | ||
670 | KM_USER0); | ||
671 | return req; | 669 | return req; |
672 | } | 670 | } |
673 | 671 | ||
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 21928056e35e..d13403e33622 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c | |||
@@ -11,8 +11,6 @@ | |||
11 | #include <linux/nfsd/nfsd.h> | 11 | #include <linux/nfsd/nfsd.h> |
12 | #include <linux/nfsd/export.h> | 12 | #include <linux/nfsd/export.h> |
13 | 13 | ||
14 | #define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) | ||
15 | |||
16 | int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) | 14 | int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) |
17 | { | 15 | { |
18 | struct exp_flavor_info *f; | 16 | struct exp_flavor_info *f; |
@@ -69,10 +67,12 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) | |||
69 | ret = set_current_groups(cred.cr_group_info); | 67 | ret = set_current_groups(cred.cr_group_info); |
70 | put_group_info(cred.cr_group_info); | 68 | put_group_info(cred.cr_group_info); |
71 | if ((cred.cr_uid)) { | 69 | if ((cred.cr_uid)) { |
72 | cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; | 70 | current->cap_effective = |
71 | cap_drop_nfsd_set(current->cap_effective); | ||
73 | } else { | 72 | } else { |
74 | cap_t(current->cap_effective) |= (CAP_NFSD_MASK & | 73 | current->cap_effective = |
75 | current->cap_permitted); | 74 | cap_raise_nfsd_set(current->cap_effective, |
75 | current->cap_permitted); | ||
76 | } | 76 | } |
77 | return ret; | 77 | return ret; |
78 | } | 78 | } |
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index ad87cb01299b..00e9ccde8e42 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c | |||
@@ -87,13 +87,17 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |||
87 | /* Check for the current buffer head overflowing. */ | 87 | /* Check for the current buffer head overflowing. */ |
88 | if (unlikely(file_ofs + bh->b_size > init_size)) { | 88 | if (unlikely(file_ofs + bh->b_size > init_size)) { |
89 | int ofs; | 89 | int ofs; |
90 | void *kaddr; | ||
90 | 91 | ||
91 | ofs = 0; | 92 | ofs = 0; |
92 | if (file_ofs < init_size) | 93 | if (file_ofs < init_size) |
93 | ofs = init_size - file_ofs; | 94 | ofs = init_size - file_ofs; |
94 | local_irq_save(flags); | 95 | local_irq_save(flags); |
95 | zero_user_page(page, bh_offset(bh) + ofs, | 96 | kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); |
96 | bh->b_size - ofs, KM_BIO_SRC_IRQ); | 97 | memset(kaddr + bh_offset(bh) + ofs, 0, |
98 | bh->b_size - ofs); | ||
99 | flush_dcache_page(page); | ||
100 | kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); | ||
97 | local_irq_restore(flags); | 101 | local_irq_restore(flags); |
98 | } | 102 | } |
99 | } else { | 103 | } else { |
@@ -334,7 +338,7 @@ handle_hole: | |||
334 | bh->b_blocknr = -1UL; | 338 | bh->b_blocknr = -1UL; |
335 | clear_buffer_mapped(bh); | 339 | clear_buffer_mapped(bh); |
336 | handle_zblock: | 340 | handle_zblock: |
337 | zero_user_page(page, i * blocksize, blocksize, KM_USER0); | 341 | zero_user(page, i * blocksize, blocksize); |
338 | if (likely(!err)) | 342 | if (likely(!err)) |
339 | set_buffer_uptodate(bh); | 343 | set_buffer_uptodate(bh); |
340 | } while (i++, iblock++, (bh = bh->b_this_page) != head); | 344 | } while (i++, iblock++, (bh = bh->b_this_page) != head); |
@@ -410,7 +414,7 @@ retry_readpage: | |||
410 | /* Is the page fully outside i_size? (truncate in progress) */ | 414 | /* Is the page fully outside i_size? (truncate in progress) */ |
411 | if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> | 415 | if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> |
412 | PAGE_CACHE_SHIFT)) { | 416 | PAGE_CACHE_SHIFT)) { |
413 | zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); | 417 | zero_user(page, 0, PAGE_CACHE_SIZE); |
414 | ntfs_debug("Read outside i_size - truncated?"); | 418 | ntfs_debug("Read outside i_size - truncated?"); |
415 | goto done; | 419 | goto done; |
416 | } | 420 | } |
@@ -459,7 +463,7 @@ retry_readpage: | |||
459 | * ok to ignore the compressed flag here. | 463 | * ok to ignore the compressed flag here. |
460 | */ | 464 | */ |
461 | if (unlikely(page->index > 0)) { | 465 | if (unlikely(page->index > 0)) { |
462 | zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); | 466 | zero_user(page, 0, PAGE_CACHE_SIZE); |
463 | goto done; | 467 | goto done; |
464 | } | 468 | } |
465 | if (!NInoAttr(ni)) | 469 | if (!NInoAttr(ni)) |
@@ -788,8 +792,7 @@ lock_retry_remap: | |||
788 | if (err == -ENOENT || lcn == LCN_ENOENT) { | 792 | if (err == -ENOENT || lcn == LCN_ENOENT) { |
789 | bh->b_blocknr = -1; | 793 | bh->b_blocknr = -1; |
790 | clear_buffer_dirty(bh); | 794 | clear_buffer_dirty(bh); |
791 | zero_user_page(page, bh_offset(bh), blocksize, | 795 | zero_user(page, bh_offset(bh), blocksize); |
792 | KM_USER0); | ||
793 | set_buffer_uptodate(bh); | 796 | set_buffer_uptodate(bh); |
794 | err = 0; | 797 | err = 0; |
795 | continue; | 798 | continue; |
@@ -1414,8 +1417,7 @@ retry_writepage: | |||
1414 | if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { | 1417 | if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { |
1415 | /* The page straddles i_size. */ | 1418 | /* The page straddles i_size. */ |
1416 | unsigned int ofs = i_size & ~PAGE_CACHE_MASK; | 1419 | unsigned int ofs = i_size & ~PAGE_CACHE_MASK; |
1417 | zero_user_page(page, ofs, PAGE_CACHE_SIZE - ofs, | 1420 | zero_user_segment(page, ofs, PAGE_CACHE_SIZE); |
1418 | KM_USER0); | ||
1419 | } | 1421 | } |
1420 | /* Handle mst protected attributes. */ | 1422 | /* Handle mst protected attributes. */ |
1421 | if (NInoMstProtected(ni)) | 1423 | if (NInoMstProtected(ni)) |
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index d1619d05eb23..33ff314cc507 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c | |||
@@ -565,7 +565,7 @@ int ntfs_read_compressed_block(struct page *page) | |||
565 | if (xpage >= max_page) { | 565 | if (xpage >= max_page) { |
566 | kfree(bhs); | 566 | kfree(bhs); |
567 | kfree(pages); | 567 | kfree(pages); |
568 | zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); | 568 | zero_user(page, 0, PAGE_CACHE_SIZE); |
569 | ntfs_debug("Compressed read outside i_size - truncated?"); | 569 | ntfs_debug("Compressed read outside i_size - truncated?"); |
570 | SetPageUptodate(page); | 570 | SetPageUptodate(page); |
571 | unlock_page(page); | 571 | unlock_page(page); |
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 6cd08dfdc2ed..3c5550cd11d6 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
@@ -607,8 +607,8 @@ do_next_page: | |||
607 | ntfs_submit_bh_for_read(bh); | 607 | ntfs_submit_bh_for_read(bh); |
608 | *wait_bh++ = bh; | 608 | *wait_bh++ = bh; |
609 | } else { | 609 | } else { |
610 | zero_user_page(page, bh_offset(bh), | 610 | zero_user(page, bh_offset(bh), |
611 | blocksize, KM_USER0); | 611 | blocksize); |
612 | set_buffer_uptodate(bh); | 612 | set_buffer_uptodate(bh); |
613 | } | 613 | } |
614 | } | 614 | } |
@@ -683,9 +683,8 @@ map_buffer_cached: | |||
683 | ntfs_submit_bh_for_read(bh); | 683 | ntfs_submit_bh_for_read(bh); |
684 | *wait_bh++ = bh; | 684 | *wait_bh++ = bh; |
685 | } else { | 685 | } else { |
686 | zero_user_page(page, | 686 | zero_user(page, bh_offset(bh), |
687 | bh_offset(bh), | 687 | blocksize); |
688 | blocksize, KM_USER0); | ||
689 | set_buffer_uptodate(bh); | 688 | set_buffer_uptodate(bh); |
690 | } | 689 | } |
691 | } | 690 | } |
@@ -703,8 +702,8 @@ map_buffer_cached: | |||
703 | */ | 702 | */ |
704 | if (bh_end <= pos || bh_pos >= end) { | 703 | if (bh_end <= pos || bh_pos >= end) { |
705 | if (!buffer_uptodate(bh)) { | 704 | if (!buffer_uptodate(bh)) { |
706 | zero_user_page(page, bh_offset(bh), | 705 | zero_user(page, bh_offset(bh), |
707 | blocksize, KM_USER0); | 706 | blocksize); |
708 | set_buffer_uptodate(bh); | 707 | set_buffer_uptodate(bh); |
709 | } | 708 | } |
710 | mark_buffer_dirty(bh); | 709 | mark_buffer_dirty(bh); |
@@ -743,8 +742,7 @@ map_buffer_cached: | |||
743 | if (!buffer_uptodate(bh)) | 742 | if (!buffer_uptodate(bh)) |
744 | set_buffer_uptodate(bh); | 743 | set_buffer_uptodate(bh); |
745 | } else if (!buffer_uptodate(bh)) { | 744 | } else if (!buffer_uptodate(bh)) { |
746 | zero_user_page(page, bh_offset(bh), blocksize, | 745 | zero_user(page, bh_offset(bh), blocksize); |
747 | KM_USER0); | ||
748 | set_buffer_uptodate(bh); | 746 | set_buffer_uptodate(bh); |
749 | } | 747 | } |
750 | continue; | 748 | continue; |
@@ -868,8 +866,8 @@ rl_not_mapped_enoent: | |||
868 | if (!buffer_uptodate(bh)) | 866 | if (!buffer_uptodate(bh)) |
869 | set_buffer_uptodate(bh); | 867 | set_buffer_uptodate(bh); |
870 | } else if (!buffer_uptodate(bh)) { | 868 | } else if (!buffer_uptodate(bh)) { |
871 | zero_user_page(page, bh_offset(bh), | 869 | zero_user(page, bh_offset(bh), |
872 | blocksize, KM_USER0); | 870 | blocksize); |
873 | set_buffer_uptodate(bh); | 871 | set_buffer_uptodate(bh); |
874 | } | 872 | } |
875 | continue; | 873 | continue; |
@@ -1128,8 +1126,8 @@ rl_not_mapped_enoent: | |||
1128 | 1126 | ||
1129 | if (likely(bh_pos < initialized_size)) | 1127 | if (likely(bh_pos < initialized_size)) |
1130 | ofs = initialized_size - bh_pos; | 1128 | ofs = initialized_size - bh_pos; |
1131 | zero_user_page(page, bh_offset(bh) + ofs, | 1129 | zero_user_segment(page, bh_offset(bh) + ofs, |
1132 | blocksize - ofs, KM_USER0); | 1130 | blocksize); |
1133 | } | 1131 | } |
1134 | } else /* if (unlikely(!buffer_uptodate(bh))) */ | 1132 | } else /* if (unlikely(!buffer_uptodate(bh))) */ |
1135 | err = -EIO; | 1133 | err = -EIO; |
@@ -1269,8 +1267,8 @@ rl_not_mapped_enoent: | |||
1269 | if (PageUptodate(page)) | 1267 | if (PageUptodate(page)) |
1270 | set_buffer_uptodate(bh); | 1268 | set_buffer_uptodate(bh); |
1271 | else { | 1269 | else { |
1272 | zero_user_page(page, bh_offset(bh), | 1270 | zero_user(page, bh_offset(bh), |
1273 | blocksize, KM_USER0); | 1271 | blocksize); |
1274 | set_buffer_uptodate(bh); | 1272 | set_buffer_uptodate(bh); |
1275 | } | 1273 | } |
1276 | } | 1274 | } |
@@ -1330,7 +1328,7 @@ err_out: | |||
1330 | len = PAGE_CACHE_SIZE; | 1328 | len = PAGE_CACHE_SIZE; |
1331 | if (len > bytes) | 1329 | if (len > bytes) |
1332 | len = bytes; | 1330 | len = bytes; |
1333 | zero_user_page(*pages, 0, len, KM_USER0); | 1331 | zero_user(*pages, 0, len); |
1334 | } | 1332 | } |
1335 | goto out; | 1333 | goto out; |
1336 | } | 1334 | } |
@@ -1451,7 +1449,7 @@ err_out: | |||
1451 | len = PAGE_CACHE_SIZE; | 1449 | len = PAGE_CACHE_SIZE; |
1452 | if (len > bytes) | 1450 | if (len > bytes) |
1453 | len = bytes; | 1451 | len = bytes; |
1454 | zero_user_page(*pages, 0, len, KM_USER0); | 1452 | zero_user(*pages, 0, len); |
1455 | } | 1453 | } |
1456 | goto out; | 1454 | goto out; |
1457 | } | 1455 | } |
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index e38e402e4103..cd0be3f5c3cd 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h | |||
@@ -85,8 +85,7 @@ static inline void *ntfs_malloc_nofs_nofail(unsigned long size) | |||
85 | 85 | ||
86 | static inline void ntfs_free(void *addr) | 86 | static inline void ntfs_free(void *addr) |
87 | { | 87 | { |
88 | if (likely(((unsigned long)addr < VMALLOC_START) || | 88 | if (!is_vmalloc_addr(addr)) { |
89 | ((unsigned long)addr >= VMALLOC_END ))) { | ||
90 | kfree(addr); | 89 | kfree(addr); |
91 | /* free_page((unsigned long)addr); */ | 90 | /* free_page((unsigned long)addr); */ |
92 | return; | 91 | return; |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 64713e149e46..447206eb5c2e 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -5670,7 +5670,7 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, | |||
5670 | mlog_errno(ret); | 5670 | mlog_errno(ret); |
5671 | 5671 | ||
5672 | if (zero) | 5672 | if (zero) |
5673 | zero_user_page(page, from, to - from, KM_USER0); | 5673 | zero_user_segment(page, from, to); |
5674 | 5674 | ||
5675 | /* | 5675 | /* |
5676 | * Need to set the buffers we zero'd into uptodate | 5676 | * Need to set the buffers we zero'd into uptodate |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index bc7b4cbbe8ec..82243127eebf 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -307,7 +307,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
307 | * XXX sys_readahead() seems to get that wrong? | 307 | * XXX sys_readahead() seems to get that wrong? |
308 | */ | 308 | */ |
309 | if (start >= i_size_read(inode)) { | 309 | if (start >= i_size_read(inode)) { |
310 | zero_user_page(page, 0, PAGE_SIZE, KM_USER0); | 310 | zero_user(page, 0, PAGE_SIZE); |
311 | SetPageUptodate(page); | 311 | SetPageUptodate(page); |
312 | ret = 0; | 312 | ret = 0; |
313 | goto out_alloc; | 313 | goto out_alloc; |
@@ -869,7 +869,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, | |||
869 | if (block_start >= to) | 869 | if (block_start >= to) |
870 | break; | 870 | break; |
871 | 871 | ||
872 | zero_user_page(page, block_start, bh->b_size, KM_USER0); | 872 | zero_user(page, block_start, bh->b_size); |
873 | set_buffer_uptodate(bh); | 873 | set_buffer_uptodate(bh); |
874 | mark_buffer_dirty(bh); | 874 | mark_buffer_dirty(bh); |
875 | 875 | ||
@@ -1034,7 +1034,7 @@ static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to | |||
1034 | start = max(from, block_start); | 1034 | start = max(from, block_start); |
1035 | end = min(to, block_end); | 1035 | end = min(to, block_end); |
1036 | 1036 | ||
1037 | zero_user_page(page, start, end - start, KM_USER0); | 1037 | zero_user_segment(page, start, end); |
1038 | set_buffer_uptodate(bh); | 1038 | set_buffer_uptodate(bh); |
1039 | } | 1039 | } |
1040 | 1040 | ||
diff --git a/fs/proc/array.c b/fs/proc/array.c index b380313092bd..6ba2746e4517 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -281,14 +281,23 @@ static inline char *task_sig(struct task_struct *p, char *buffer) | |||
281 | return buffer; | 281 | return buffer; |
282 | } | 282 | } |
283 | 283 | ||
284 | static char *render_cap_t(const char *header, kernel_cap_t *a, char *buffer) | ||
285 | { | ||
286 | unsigned __capi; | ||
287 | |||
288 | buffer += sprintf(buffer, "%s", header); | ||
289 | CAP_FOR_EACH_U32(__capi) { | ||
290 | buffer += sprintf(buffer, "%08x", | ||
291 | a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]); | ||
292 | } | ||
293 | return buffer + sprintf(buffer, "\n"); | ||
294 | } | ||
295 | |||
284 | static inline char *task_cap(struct task_struct *p, char *buffer) | 296 | static inline char *task_cap(struct task_struct *p, char *buffer) |
285 | { | 297 | { |
286 | return buffer + sprintf(buffer, "CapInh:\t%016x\n" | 298 | buffer = render_cap_t("CapInh:\t", &p->cap_inheritable, buffer); |
287 | "CapPrm:\t%016x\n" | 299 | buffer = render_cap_t("CapPrm:\t", &p->cap_permitted, buffer); |
288 | "CapEff:\t%016x\n", | 300 | return render_cap_t("CapEff:\t", &p->cap_effective, buffer); |
289 | cap_t(p->cap_inheritable), | ||
290 | cap_t(p->cap_permitted), | ||
291 | cap_t(p->cap_effective)); | ||
292 | } | 301 | } |
293 | 302 | ||
294 | static inline char *task_context_switch_counts(struct task_struct *p, | 303 | static inline char *task_context_switch_counts(struct task_struct *p, |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 33537487f5ab..c59852b38787 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -88,10 +88,6 @@ | |||
88 | * in /proc for a task before it execs a suid executable. | 88 | * in /proc for a task before it execs a suid executable. |
89 | */ | 89 | */ |
90 | 90 | ||
91 | |||
92 | /* Worst case buffer size needed for holding an integer. */ | ||
93 | #define PROC_NUMBUF 13 | ||
94 | |||
95 | struct pid_entry { | 91 | struct pid_entry { |
96 | char *name; | 92 | char *name; |
97 | int len; | 93 | int len; |
@@ -787,7 +783,7 @@ out_no_task: | |||
787 | } | 783 | } |
788 | #endif | 784 | #endif |
789 | 785 | ||
790 | static loff_t mem_lseek(struct file * file, loff_t offset, int orig) | 786 | loff_t mem_lseek(struct file *file, loff_t offset, int orig) |
791 | { | 787 | { |
792 | switch (orig) { | 788 | switch (orig) { |
793 | case 0: | 789 | case 0: |
@@ -935,42 +931,6 @@ static const struct file_operations proc_oom_adjust_operations = { | |||
935 | .write = oom_adjust_write, | 931 | .write = oom_adjust_write, |
936 | }; | 932 | }; |
937 | 933 | ||
938 | #ifdef CONFIG_MMU | ||
939 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | ||
940 | size_t count, loff_t *ppos) | ||
941 | { | ||
942 | struct task_struct *task; | ||
943 | char buffer[PROC_NUMBUF], *end; | ||
944 | struct mm_struct *mm; | ||
945 | |||
946 | memset(buffer, 0, sizeof(buffer)); | ||
947 | if (count > sizeof(buffer) - 1) | ||
948 | count = sizeof(buffer) - 1; | ||
949 | if (copy_from_user(buffer, buf, count)) | ||
950 | return -EFAULT; | ||
951 | if (!simple_strtol(buffer, &end, 0)) | ||
952 | return -EINVAL; | ||
953 | if (*end == '\n') | ||
954 | end++; | ||
955 | task = get_proc_task(file->f_path.dentry->d_inode); | ||
956 | if (!task) | ||
957 | return -ESRCH; | ||
958 | mm = get_task_mm(task); | ||
959 | if (mm) { | ||
960 | clear_refs_smap(mm); | ||
961 | mmput(mm); | ||
962 | } | ||
963 | put_task_struct(task); | ||
964 | if (end - buffer == 0) | ||
965 | return -EIO; | ||
966 | return end - buffer; | ||
967 | } | ||
968 | |||
969 | static struct file_operations proc_clear_refs_operations = { | ||
970 | .write = clear_refs_write, | ||
971 | }; | ||
972 | #endif | ||
973 | |||
974 | #ifdef CONFIG_AUDITSYSCALL | 934 | #ifdef CONFIG_AUDITSYSCALL |
975 | #define TMPBUFLEN 21 | 935 | #define TMPBUFLEN 21 |
976 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, | 936 | static ssize_t proc_loginuid_read(struct file * file, char __user * buf, |
@@ -2289,9 +2249,10 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2289 | LNK("exe", exe), | 2249 | LNK("exe", exe), |
2290 | REG("mounts", S_IRUGO, mounts), | 2250 | REG("mounts", S_IRUGO, mounts), |
2291 | REG("mountstats", S_IRUSR, mountstats), | 2251 | REG("mountstats", S_IRUSR, mountstats), |
2292 | #ifdef CONFIG_MMU | 2252 | #ifdef CONFIG_PROC_PAGE_MONITOR |
2293 | REG("clear_refs", S_IWUSR, clear_refs), | 2253 | REG("clear_refs", S_IWUSR, clear_refs), |
2294 | REG("smaps", S_IRUGO, smaps), | 2254 | REG("smaps", S_IRUGO, smaps), |
2255 | REG("pagemap", S_IRUSR, pagemap), | ||
2295 | #endif | 2256 | #endif |
2296 | #ifdef CONFIG_SECURITY | 2257 | #ifdef CONFIG_SECURITY |
2297 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), | 2258 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), |
@@ -2360,7 +2321,8 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) | |||
2360 | name.len = snprintf(buf, sizeof(buf), "%d", pid); | 2321 | name.len = snprintf(buf, sizeof(buf), "%d", pid); |
2361 | dentry = d_hash_and_lookup(mnt->mnt_root, &name); | 2322 | dentry = d_hash_and_lookup(mnt->mnt_root, &name); |
2362 | if (dentry) { | 2323 | if (dentry) { |
2363 | shrink_dcache_parent(dentry); | 2324 | if (!(current->flags & PF_EXITING)) |
2325 | shrink_dcache_parent(dentry); | ||
2364 | d_drop(dentry); | 2326 | d_drop(dentry); |
2365 | dput(dentry); | 2327 | dput(dentry); |
2366 | } | 2328 | } |
@@ -2617,9 +2579,10 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2617 | LNK("root", root), | 2579 | LNK("root", root), |
2618 | LNK("exe", exe), | 2580 | LNK("exe", exe), |
2619 | REG("mounts", S_IRUGO, mounts), | 2581 | REG("mounts", S_IRUGO, mounts), |
2620 | #ifdef CONFIG_MMU | 2582 | #ifdef CONFIG_PROC_PAGE_MONITOR |
2621 | REG("clear_refs", S_IWUSR, clear_refs), | 2583 | REG("clear_refs", S_IWUSR, clear_refs), |
2622 | REG("smaps", S_IRUGO, smaps), | 2584 | REG("smaps", S_IRUGO, smaps), |
2585 | REG("pagemap", S_IRUSR, pagemap), | ||
2623 | #endif | 2586 | #endif |
2624 | #ifdef CONFIG_SECURITY | 2587 | #ifdef CONFIG_SECURITY |
2625 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), | 2588 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 05b3e9006262..7d57e8069924 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -52,15 +52,13 @@ extern int proc_tid_stat(struct task_struct *, char *); | |||
52 | extern int proc_tgid_stat(struct task_struct *, char *); | 52 | extern int proc_tgid_stat(struct task_struct *, char *); |
53 | extern int proc_pid_status(struct task_struct *, char *); | 53 | extern int proc_pid_status(struct task_struct *, char *); |
54 | extern int proc_pid_statm(struct task_struct *, char *); | 54 | extern int proc_pid_statm(struct task_struct *, char *); |
55 | extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); | ||
55 | 56 | ||
56 | extern const struct file_operations proc_maps_operations; | 57 | extern const struct file_operations proc_maps_operations; |
57 | extern const struct file_operations proc_numa_maps_operations; | 58 | extern const struct file_operations proc_numa_maps_operations; |
58 | extern const struct file_operations proc_smaps_operations; | 59 | extern const struct file_operations proc_smaps_operations; |
59 | 60 | extern const struct file_operations proc_clear_refs_operations; | |
60 | extern const struct file_operations proc_maps_operations; | 61 | extern const struct file_operations proc_pagemap_operations; |
61 | extern const struct file_operations proc_numa_maps_operations; | ||
62 | extern const struct file_operations proc_smaps_operations; | ||
63 | |||
64 | 62 | ||
65 | void free_proc_entry(struct proc_dir_entry *de); | 63 | void free_proc_entry(struct proc_dir_entry *de); |
66 | 64 | ||
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 1be73082edd3..7dd26e18cbfd 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c | |||
@@ -325,7 +325,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) | |||
325 | if (m == NULL) { | 325 | if (m == NULL) { |
326 | if (clear_user(buffer, tsz)) | 326 | if (clear_user(buffer, tsz)) |
327 | return -EFAULT; | 327 | return -EFAULT; |
328 | } else if ((start >= VMALLOC_START) && (start < VMALLOC_END)) { | 328 | } else if (is_vmalloc_addr((void *)start)) { |
329 | char * elf_buf; | 329 | char * elf_buf; |
330 | struct vm_struct *m; | 330 | struct vm_struct *m; |
331 | unsigned long curstart = start; | 331 | unsigned long curstart = start; |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 3462bfde89f6..51288db37a0c 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <linux/vmalloc.h> | 46 | #include <linux/vmalloc.h> |
47 | #include <linux/crash_dump.h> | 47 | #include <linux/crash_dump.h> |
48 | #include <linux/pid_namespace.h> | 48 | #include <linux/pid_namespace.h> |
49 | #include <linux/bootmem.h> | ||
49 | #include <asm/uaccess.h> | 50 | #include <asm/uaccess.h> |
50 | #include <asm/pgtable.h> | 51 | #include <asm/pgtable.h> |
51 | #include <asm/io.h> | 52 | #include <asm/io.h> |
@@ -675,6 +676,137 @@ static const struct file_operations proc_sysrq_trigger_operations = { | |||
675 | }; | 676 | }; |
676 | #endif | 677 | #endif |
677 | 678 | ||
679 | #ifdef CONFIG_PROC_PAGE_MONITOR | ||
680 | #define KPMSIZE sizeof(u64) | ||
681 | #define KPMMASK (KPMSIZE - 1) | ||
682 | /* /proc/kpagecount - an array exposing page counts | ||
683 | * | ||
684 | * Each entry is a u64 representing the corresponding | ||
685 | * physical page count. | ||
686 | */ | ||
687 | static ssize_t kpagecount_read(struct file *file, char __user *buf, | ||
688 | size_t count, loff_t *ppos) | ||
689 | { | ||
690 | u64 __user *out = (u64 __user *)buf; | ||
691 | struct page *ppage; | ||
692 | unsigned long src = *ppos; | ||
693 | unsigned long pfn; | ||
694 | ssize_t ret = 0; | ||
695 | u64 pcount; | ||
696 | |||
697 | pfn = src / KPMSIZE; | ||
698 | count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); | ||
699 | if (src & KPMMASK || count & KPMMASK) | ||
700 | return -EIO; | ||
701 | |||
702 | while (count > 0) { | ||
703 | ppage = NULL; | ||
704 | if (pfn_valid(pfn)) | ||
705 | ppage = pfn_to_page(pfn); | ||
706 | pfn++; | ||
707 | if (!ppage) | ||
708 | pcount = 0; | ||
709 | else | ||
710 | pcount = atomic_read(&ppage->_count); | ||
711 | |||
712 | if (put_user(pcount, out++)) { | ||
713 | ret = -EFAULT; | ||
714 | break; | ||
715 | } | ||
716 | |||
717 | count -= KPMSIZE; | ||
718 | } | ||
719 | |||
720 | *ppos += (char __user *)out - buf; | ||
721 | if (!ret) | ||
722 | ret = (char __user *)out - buf; | ||
723 | return ret; | ||
724 | } | ||
725 | |||
726 | static struct file_operations proc_kpagecount_operations = { | ||
727 | .llseek = mem_lseek, | ||
728 | .read = kpagecount_read, | ||
729 | }; | ||
730 | |||
731 | /* /proc/kpageflags - an array exposing page flags | ||
732 | * | ||
733 | * Each entry is a u64 representing the corresponding | ||
734 | * physical page flags. | ||
735 | */ | ||
736 | |||
737 | /* These macros are used to decouple internal flags from exported ones */ | ||
738 | |||
739 | #define KPF_LOCKED 0 | ||
740 | #define KPF_ERROR 1 | ||
741 | #define KPF_REFERENCED 2 | ||
742 | #define KPF_UPTODATE 3 | ||
743 | #define KPF_DIRTY 4 | ||
744 | #define KPF_LRU 5 | ||
745 | #define KPF_ACTIVE 6 | ||
746 | #define KPF_SLAB 7 | ||
747 | #define KPF_WRITEBACK 8 | ||
748 | #define KPF_RECLAIM 9 | ||
749 | #define KPF_BUDDY 10 | ||
750 | |||
751 | #define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos) | ||
752 | |||
753 | static ssize_t kpageflags_read(struct file *file, char __user *buf, | ||
754 | size_t count, loff_t *ppos) | ||
755 | { | ||
756 | u64 __user *out = (u64 __user *)buf; | ||
757 | struct page *ppage; | ||
758 | unsigned long src = *ppos; | ||
759 | unsigned long pfn; | ||
760 | ssize_t ret = 0; | ||
761 | u64 kflags, uflags; | ||
762 | |||
763 | pfn = src / KPMSIZE; | ||
764 | count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); | ||
765 | if (src & KPMMASK || count & KPMMASK) | ||
766 | return -EIO; | ||
767 | |||
768 | while (count > 0) { | ||
769 | ppage = NULL; | ||
770 | if (pfn_valid(pfn)) | ||
771 | ppage = pfn_to_page(pfn); | ||
772 | pfn++; | ||
773 | if (!ppage) | ||
774 | kflags = 0; | ||
775 | else | ||
776 | kflags = ppage->flags; | ||
777 | |||
778 | uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) | | ||
779 | kpf_copy_bit(kflags, KPF_ERROR, PG_error) | | ||
780 | kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) | | ||
781 | kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) | | ||
782 | kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) | | ||
783 | kpf_copy_bit(kflags, KPF_LRU, PG_lru) | | ||
784 | kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) | | ||
785 | kpf_copy_bit(kflags, KPF_SLAB, PG_slab) | | ||
786 | kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) | | ||
787 | kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) | | ||
788 | kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy); | ||
789 | |||
790 | if (put_user(uflags, out++)) { | ||
791 | ret = -EFAULT; | ||
792 | break; | ||
793 | } | ||
794 | |||
795 | count -= KPMSIZE; | ||
796 | } | ||
797 | |||
798 | *ppos += (char __user *)out - buf; | ||
799 | if (!ret) | ||
800 | ret = (char __user *)out - buf; | ||
801 | return ret; | ||
802 | } | ||
803 | |||
804 | static struct file_operations proc_kpageflags_operations = { | ||
805 | .llseek = mem_lseek, | ||
806 | .read = kpageflags_read, | ||
807 | }; | ||
808 | #endif /* CONFIG_PROC_PAGE_MONITOR */ | ||
809 | |||
678 | struct proc_dir_entry *proc_root_kcore; | 810 | struct proc_dir_entry *proc_root_kcore; |
679 | 811 | ||
680 | void create_seq_entry(char *name, mode_t mode, const struct file_operations *f) | 812 | void create_seq_entry(char *name, mode_t mode, const struct file_operations *f) |
@@ -755,6 +887,10 @@ void __init proc_misc_init(void) | |||
755 | (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; | 887 | (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; |
756 | } | 888 | } |
757 | #endif | 889 | #endif |
890 | #ifdef CONFIG_PROC_PAGE_MONITOR | ||
891 | create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations); | ||
892 | create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations); | ||
893 | #endif | ||
758 | #ifdef CONFIG_PROC_VMCORE | 894 | #ifdef CONFIG_PROC_VMCORE |
759 | proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); | 895 | proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); |
760 | if (proc_vmcore) | 896 | if (proc_vmcore) |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8043a3eab52c..38338ed98cc6 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -5,7 +5,10 @@ | |||
5 | #include <linux/highmem.h> | 5 | #include <linux/highmem.h> |
6 | #include <linux/ptrace.h> | 6 | #include <linux/ptrace.h> |
7 | #include <linux/pagemap.h> | 7 | #include <linux/pagemap.h> |
8 | #include <linux/ptrace.h> | ||
8 | #include <linux/mempolicy.h> | 9 | #include <linux/mempolicy.h> |
10 | #include <linux/swap.h> | ||
11 | #include <linux/swapops.h> | ||
9 | 12 | ||
10 | #include <asm/elf.h> | 13 | #include <asm/elf.h> |
11 | #include <asm/uaccess.h> | 14 | #include <asm/uaccess.h> |
@@ -114,24 +117,124 @@ static void pad_len_spaces(struct seq_file *m, int len) | |||
114 | seq_printf(m, "%*c", len, ' '); | 117 | seq_printf(m, "%*c", len, ' '); |
115 | } | 118 | } |
116 | 119 | ||
117 | struct mem_size_stats | 120 | static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) |
118 | { | 121 | { |
119 | unsigned long resident; | 122 | if (vma && vma != priv->tail_vma) { |
120 | unsigned long shared_clean; | 123 | struct mm_struct *mm = vma->vm_mm; |
121 | unsigned long shared_dirty; | 124 | up_read(&mm->mmap_sem); |
122 | unsigned long private_clean; | 125 | mmput(mm); |
123 | unsigned long private_dirty; | 126 | } |
124 | unsigned long referenced; | 127 | } |
125 | }; | ||
126 | 128 | ||
127 | struct pmd_walker { | 129 | static void *m_start(struct seq_file *m, loff_t *pos) |
128 | struct vm_area_struct *vma; | 130 | { |
129 | void *private; | 131 | struct proc_maps_private *priv = m->private; |
130 | void (*action)(struct vm_area_struct *, pmd_t *, unsigned long, | 132 | unsigned long last_addr = m->version; |
131 | unsigned long, void *); | 133 | struct mm_struct *mm; |
132 | }; | 134 | struct vm_area_struct *vma, *tail_vma = NULL; |
135 | loff_t l = *pos; | ||
136 | |||
137 | /* Clear the per syscall fields in priv */ | ||
138 | priv->task = NULL; | ||
139 | priv->tail_vma = NULL; | ||
140 | |||
141 | /* | ||
142 | * We remember last_addr rather than next_addr to hit with | ||
143 | * mmap_cache most of the time. We have zero last_addr at | ||
144 | * the beginning and also after lseek. We will have -1 last_addr | ||
145 | * after the end of the vmas. | ||
146 | */ | ||
147 | |||
148 | if (last_addr == -1UL) | ||
149 | return NULL; | ||
150 | |||
151 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | ||
152 | if (!priv->task) | ||
153 | return NULL; | ||
154 | |||
155 | mm = mm_for_maps(priv->task); | ||
156 | if (!mm) | ||
157 | return NULL; | ||
158 | |||
159 | tail_vma = get_gate_vma(priv->task); | ||
160 | priv->tail_vma = tail_vma; | ||
161 | |||
162 | /* Start with last addr hint */ | ||
163 | vma = find_vma(mm, last_addr); | ||
164 | if (last_addr && vma) { | ||
165 | vma = vma->vm_next; | ||
166 | goto out; | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Check the vma index is within the range and do | ||
171 | * sequential scan until m_index. | ||
172 | */ | ||
173 | vma = NULL; | ||
174 | if ((unsigned long)l < mm->map_count) { | ||
175 | vma = mm->mmap; | ||
176 | while (l-- && vma) | ||
177 | vma = vma->vm_next; | ||
178 | goto out; | ||
179 | } | ||
180 | |||
181 | if (l != mm->map_count) | ||
182 | tail_vma = NULL; /* After gate vma */ | ||
183 | |||
184 | out: | ||
185 | if (vma) | ||
186 | return vma; | ||
187 | |||
188 | /* End of vmas has been reached */ | ||
189 | m->version = (tail_vma != NULL)? 0: -1UL; | ||
190 | up_read(&mm->mmap_sem); | ||
191 | mmput(mm); | ||
192 | return tail_vma; | ||
193 | } | ||
133 | 194 | ||
134 | static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) | 195 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) |
196 | { | ||
197 | struct proc_maps_private *priv = m->private; | ||
198 | struct vm_area_struct *vma = v; | ||
199 | struct vm_area_struct *tail_vma = priv->tail_vma; | ||
200 | |||
201 | (*pos)++; | ||
202 | if (vma && (vma != tail_vma) && vma->vm_next) | ||
203 | return vma->vm_next; | ||
204 | vma_stop(priv, vma); | ||
205 | return (vma != tail_vma)? tail_vma: NULL; | ||
206 | } | ||
207 | |||
208 | static void m_stop(struct seq_file *m, void *v) | ||
209 | { | ||
210 | struct proc_maps_private *priv = m->private; | ||
211 | struct vm_area_struct *vma = v; | ||
212 | |||
213 | vma_stop(priv, vma); | ||
214 | if (priv->task) | ||
215 | put_task_struct(priv->task); | ||
216 | } | ||
217 | |||
218 | static int do_maps_open(struct inode *inode, struct file *file, | ||
219 | struct seq_operations *ops) | ||
220 | { | ||
221 | struct proc_maps_private *priv; | ||
222 | int ret = -ENOMEM; | ||
223 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
224 | if (priv) { | ||
225 | priv->pid = proc_pid(inode); | ||
226 | ret = seq_open(file, ops); | ||
227 | if (!ret) { | ||
228 | struct seq_file *m = file->private_data; | ||
229 | m->private = priv; | ||
230 | } else { | ||
231 | kfree(priv); | ||
232 | } | ||
233 | } | ||
234 | return ret; | ||
235 | } | ||
236 | |||
237 | static int show_map(struct seq_file *m, void *v) | ||
135 | { | 238 | { |
136 | struct proc_maps_private *priv = m->private; | 239 | struct proc_maps_private *priv = m->private; |
137 | struct task_struct *task = priv->task; | 240 | struct task_struct *task = priv->task; |
@@ -191,41 +294,71 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats | |||
191 | } | 294 | } |
192 | seq_putc(m, '\n'); | 295 | seq_putc(m, '\n'); |
193 | 296 | ||
194 | if (mss) | ||
195 | seq_printf(m, | ||
196 | "Size: %8lu kB\n" | ||
197 | "Rss: %8lu kB\n" | ||
198 | "Shared_Clean: %8lu kB\n" | ||
199 | "Shared_Dirty: %8lu kB\n" | ||
200 | "Private_Clean: %8lu kB\n" | ||
201 | "Private_Dirty: %8lu kB\n" | ||
202 | "Referenced: %8lu kB\n", | ||
203 | (vma->vm_end - vma->vm_start) >> 10, | ||
204 | mss->resident >> 10, | ||
205 | mss->shared_clean >> 10, | ||
206 | mss->shared_dirty >> 10, | ||
207 | mss->private_clean >> 10, | ||
208 | mss->private_dirty >> 10, | ||
209 | mss->referenced >> 10); | ||
210 | |||
211 | if (m->count < m->size) /* vma is copied successfully */ | 297 | if (m->count < m->size) /* vma is copied successfully */ |
212 | m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; | 298 | m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; |
213 | return 0; | 299 | return 0; |
214 | } | 300 | } |
215 | 301 | ||
216 | static int show_map(struct seq_file *m, void *v) | 302 | static struct seq_operations proc_pid_maps_op = { |
303 | .start = m_start, | ||
304 | .next = m_next, | ||
305 | .stop = m_stop, | ||
306 | .show = show_map | ||
307 | }; | ||
308 | |||
309 | static int maps_open(struct inode *inode, struct file *file) | ||
217 | { | 310 | { |
218 | return show_map_internal(m, v, NULL); | 311 | return do_maps_open(inode, file, &proc_pid_maps_op); |
219 | } | 312 | } |
220 | 313 | ||
221 | static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 314 | const struct file_operations proc_maps_operations = { |
222 | unsigned long addr, unsigned long end, | 315 | .open = maps_open, |
223 | void *private) | 316 | .read = seq_read, |
317 | .llseek = seq_lseek, | ||
318 | .release = seq_release_private, | ||
319 | }; | ||
320 | |||
321 | /* | ||
322 | * Proportional Set Size(PSS): my share of RSS. | ||
323 | * | ||
324 | * PSS of a process is the count of pages it has in memory, where each | ||
325 | * page is divided by the number of processes sharing it. So if a | ||
326 | * process has 1000 pages all to itself, and 1000 shared with one other | ||
327 | * process, its PSS will be 1500. | ||
328 | * | ||
329 | * To keep (accumulated) division errors low, we adopt a 64bit | ||
330 | * fixed-point pss counter to minimize division errors. So (pss >> | ||
331 | * PSS_SHIFT) would be the real byte count. | ||
332 | * | ||
333 | * A shift of 12 before division means (assuming 4K page size): | ||
334 | * - 1M 3-user-pages add up to 8KB errors; | ||
335 | * - supports mapcount up to 2^24, or 16M; | ||
336 | * - supports PSS up to 2^52 bytes, or 4PB. | ||
337 | */ | ||
338 | #define PSS_SHIFT 12 | ||
339 | |||
340 | #ifdef CONFIG_PROC_PAGE_MONITOR | ||
341 | struct mem_size_stats | ||
342 | { | ||
343 | struct vm_area_struct *vma; | ||
344 | unsigned long resident; | ||
345 | unsigned long shared_clean; | ||
346 | unsigned long shared_dirty; | ||
347 | unsigned long private_clean; | ||
348 | unsigned long private_dirty; | ||
349 | unsigned long referenced; | ||
350 | u64 pss; | ||
351 | }; | ||
352 | |||
353 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | ||
354 | void *private) | ||
224 | { | 355 | { |
225 | struct mem_size_stats *mss = private; | 356 | struct mem_size_stats *mss = private; |
357 | struct vm_area_struct *vma = mss->vma; | ||
226 | pte_t *pte, ptent; | 358 | pte_t *pte, ptent; |
227 | spinlock_t *ptl; | 359 | spinlock_t *ptl; |
228 | struct page *page; | 360 | struct page *page; |
361 | int mapcount; | ||
229 | 362 | ||
230 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 363 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
231 | for (; addr != end; pte++, addr += PAGE_SIZE) { | 364 | for (; addr != end; pte++, addr += PAGE_SIZE) { |
@@ -242,26 +375,88 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
242 | /* Accumulate the size in pages that have been accessed. */ | 375 | /* Accumulate the size in pages that have been accessed. */ |
243 | if (pte_young(ptent) || PageReferenced(page)) | 376 | if (pte_young(ptent) || PageReferenced(page)) |
244 | mss->referenced += PAGE_SIZE; | 377 | mss->referenced += PAGE_SIZE; |
245 | if (page_mapcount(page) >= 2) { | 378 | mapcount = page_mapcount(page); |
379 | if (mapcount >= 2) { | ||
246 | if (pte_dirty(ptent)) | 380 | if (pte_dirty(ptent)) |
247 | mss->shared_dirty += PAGE_SIZE; | 381 | mss->shared_dirty += PAGE_SIZE; |
248 | else | 382 | else |
249 | mss->shared_clean += PAGE_SIZE; | 383 | mss->shared_clean += PAGE_SIZE; |
384 | mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; | ||
250 | } else { | 385 | } else { |
251 | if (pte_dirty(ptent)) | 386 | if (pte_dirty(ptent)) |
252 | mss->private_dirty += PAGE_SIZE; | 387 | mss->private_dirty += PAGE_SIZE; |
253 | else | 388 | else |
254 | mss->private_clean += PAGE_SIZE; | 389 | mss->private_clean += PAGE_SIZE; |
390 | mss->pss += (PAGE_SIZE << PSS_SHIFT); | ||
255 | } | 391 | } |
256 | } | 392 | } |
257 | pte_unmap_unlock(pte - 1, ptl); | 393 | pte_unmap_unlock(pte - 1, ptl); |
258 | cond_resched(); | 394 | cond_resched(); |
395 | return 0; | ||
259 | } | 396 | } |
260 | 397 | ||
261 | static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 398 | static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; |
262 | unsigned long addr, unsigned long end, | 399 | |
263 | void *private) | 400 | static int show_smap(struct seq_file *m, void *v) |
264 | { | 401 | { |
402 | struct vm_area_struct *vma = v; | ||
403 | struct mem_size_stats mss; | ||
404 | int ret; | ||
405 | |||
406 | memset(&mss, 0, sizeof mss); | ||
407 | mss.vma = vma; | ||
408 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | ||
409 | walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, | ||
410 | &smaps_walk, &mss); | ||
411 | |||
412 | ret = show_map(m, v); | ||
413 | if (ret) | ||
414 | return ret; | ||
415 | |||
416 | seq_printf(m, | ||
417 | "Size: %8lu kB\n" | ||
418 | "Rss: %8lu kB\n" | ||
419 | "Pss: %8lu kB\n" | ||
420 | "Shared_Clean: %8lu kB\n" | ||
421 | "Shared_Dirty: %8lu kB\n" | ||
422 | "Private_Clean: %8lu kB\n" | ||
423 | "Private_Dirty: %8lu kB\n" | ||
424 | "Referenced: %8lu kB\n", | ||
425 | (vma->vm_end - vma->vm_start) >> 10, | ||
426 | mss.resident >> 10, | ||
427 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), | ||
428 | mss.shared_clean >> 10, | ||
429 | mss.shared_dirty >> 10, | ||
430 | mss.private_clean >> 10, | ||
431 | mss.private_dirty >> 10, | ||
432 | mss.referenced >> 10); | ||
433 | |||
434 | return ret; | ||
435 | } | ||
436 | |||
437 | static struct seq_operations proc_pid_smaps_op = { | ||
438 | .start = m_start, | ||
439 | .next = m_next, | ||
440 | .stop = m_stop, | ||
441 | .show = show_smap | ||
442 | }; | ||
443 | |||
444 | static int smaps_open(struct inode *inode, struct file *file) | ||
445 | { | ||
446 | return do_maps_open(inode, file, &proc_pid_smaps_op); | ||
447 | } | ||
448 | |||
449 | const struct file_operations proc_smaps_operations = { | ||
450 | .open = smaps_open, | ||
451 | .read = seq_read, | ||
452 | .llseek = seq_lseek, | ||
453 | .release = seq_release_private, | ||
454 | }; | ||
455 | |||
456 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | ||
457 | unsigned long end, void *private) | ||
458 | { | ||
459 | struct vm_area_struct *vma = private; | ||
265 | pte_t *pte, ptent; | 460 | pte_t *pte, ptent; |
266 | spinlock_t *ptl; | 461 | spinlock_t *ptl; |
267 | struct page *page; | 462 | struct page *page; |
@@ -282,235 +477,248 @@ static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
282 | } | 477 | } |
283 | pte_unmap_unlock(pte - 1, ptl); | 478 | pte_unmap_unlock(pte - 1, ptl); |
284 | cond_resched(); | 479 | cond_resched(); |
480 | return 0; | ||
285 | } | 481 | } |
286 | 482 | ||
287 | static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud, | 483 | static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; |
288 | unsigned long addr, unsigned long end) | 484 | |
485 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | ||
486 | size_t count, loff_t *ppos) | ||
289 | { | 487 | { |
290 | pmd_t *pmd; | 488 | struct task_struct *task; |
291 | unsigned long next; | 489 | char buffer[PROC_NUMBUF], *end; |
490 | struct mm_struct *mm; | ||
491 | struct vm_area_struct *vma; | ||
292 | 492 | ||
293 | for (pmd = pmd_offset(pud, addr); addr != end; | 493 | memset(buffer, 0, sizeof(buffer)); |
294 | pmd++, addr = next) { | 494 | if (count > sizeof(buffer) - 1) |
295 | next = pmd_addr_end(addr, end); | 495 | count = sizeof(buffer) - 1; |
296 | if (pmd_none_or_clear_bad(pmd)) | 496 | if (copy_from_user(buffer, buf, count)) |
297 | continue; | 497 | return -EFAULT; |
298 | walker->action(walker->vma, pmd, addr, next, walker->private); | 498 | if (!simple_strtol(buffer, &end, 0)) |
499 | return -EINVAL; | ||
500 | if (*end == '\n') | ||
501 | end++; | ||
502 | task = get_proc_task(file->f_path.dentry->d_inode); | ||
503 | if (!task) | ||
504 | return -ESRCH; | ||
505 | mm = get_task_mm(task); | ||
506 | if (mm) { | ||
507 | down_read(&mm->mmap_sem); | ||
508 | for (vma = mm->mmap; vma; vma = vma->vm_next) | ||
509 | if (!is_vm_hugetlb_page(vma)) | ||
510 | walk_page_range(mm, vma->vm_start, vma->vm_end, | ||
511 | &clear_refs_walk, vma); | ||
512 | flush_tlb_mm(mm); | ||
513 | up_read(&mm->mmap_sem); | ||
514 | mmput(mm); | ||
299 | } | 515 | } |
516 | put_task_struct(task); | ||
517 | if (end - buffer == 0) | ||
518 | return -EIO; | ||
519 | return end - buffer; | ||
300 | } | 520 | } |
301 | 521 | ||
302 | static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd, | 522 | const struct file_operations proc_clear_refs_operations = { |
303 | unsigned long addr, unsigned long end) | 523 | .write = clear_refs_write, |
304 | { | 524 | }; |
305 | pud_t *pud; | ||
306 | unsigned long next; | ||
307 | 525 | ||
308 | for (pud = pud_offset(pgd, addr); addr != end; | 526 | struct pagemapread { |
309 | pud++, addr = next) { | 527 | char __user *out, *end; |
310 | next = pud_addr_end(addr, end); | 528 | }; |
311 | if (pud_none_or_clear_bad(pud)) | 529 | |
312 | continue; | 530 | #define PM_ENTRY_BYTES sizeof(u64) |
313 | walk_pmd_range(walker, pud, addr, next); | 531 | #define PM_RESERVED_BITS 3 |
532 | #define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS) | ||
533 | #define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET) | ||
534 | #define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK) | ||
535 | #define PM_NOT_PRESENT PM_SPECIAL(1LL) | ||
536 | #define PM_SWAP PM_SPECIAL(2LL) | ||
537 | #define PM_END_OF_BUFFER 1 | ||
538 | |||
539 | static int add_to_pagemap(unsigned long addr, u64 pfn, | ||
540 | struct pagemapread *pm) | ||
541 | { | ||
542 | /* | ||
543 | * Make sure there's room in the buffer for an | ||
544 | * entire entry. Otherwise, only copy part of | ||
545 | * the pfn. | ||
546 | */ | ||
547 | if (pm->out + PM_ENTRY_BYTES >= pm->end) { | ||
548 | if (copy_to_user(pm->out, &pfn, pm->end - pm->out)) | ||
549 | return -EFAULT; | ||
550 | pm->out = pm->end; | ||
551 | return PM_END_OF_BUFFER; | ||
314 | } | 552 | } |
553 | |||
554 | if (put_user(pfn, pm->out)) | ||
555 | return -EFAULT; | ||
556 | pm->out += PM_ENTRY_BYTES; | ||
557 | return 0; | ||
315 | } | 558 | } |
316 | 559 | ||
317 | /* | 560 | static int pagemap_pte_hole(unsigned long start, unsigned long end, |
318 | * walk_page_range - walk the page tables of a VMA with a callback | 561 | void *private) |
319 | * @vma - VMA to walk | ||
320 | * @action - callback invoked for every bottom-level (PTE) page table | ||
321 | * @private - private data passed to the callback function | ||
322 | * | ||
323 | * Recursively walk the page table for the memory area in a VMA, calling | ||
324 | * a callback for every bottom-level (PTE) page table. | ||
325 | */ | ||
326 | static inline void walk_page_range(struct vm_area_struct *vma, | ||
327 | void (*action)(struct vm_area_struct *, | ||
328 | pmd_t *, unsigned long, | ||
329 | unsigned long, void *), | ||
330 | void *private) | ||
331 | { | 562 | { |
332 | unsigned long addr = vma->vm_start; | 563 | struct pagemapread *pm = private; |
333 | unsigned long end = vma->vm_end; | 564 | unsigned long addr; |
334 | struct pmd_walker walker = { | 565 | int err = 0; |
335 | .vma = vma, | 566 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
336 | .private = private, | 567 | err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); |
337 | .action = action, | 568 | if (err) |
338 | }; | 569 | break; |
339 | pgd_t *pgd; | ||
340 | unsigned long next; | ||
341 | |||
342 | for (pgd = pgd_offset(vma->vm_mm, addr); addr != end; | ||
343 | pgd++, addr = next) { | ||
344 | next = pgd_addr_end(addr, end); | ||
345 | if (pgd_none_or_clear_bad(pgd)) | ||
346 | continue; | ||
347 | walk_pud_range(&walker, pgd, addr, next); | ||
348 | } | 570 | } |
571 | return err; | ||
349 | } | 572 | } |
350 | 573 | ||
351 | static int show_smap(struct seq_file *m, void *v) | 574 | u64 swap_pte_to_pagemap_entry(pte_t pte) |
352 | { | 575 | { |
353 | struct vm_area_struct *vma = v; | 576 | swp_entry_t e = pte_to_swp_entry(pte); |
354 | struct mem_size_stats mss; | 577 | return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); |
355 | |||
356 | memset(&mss, 0, sizeof mss); | ||
357 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | ||
358 | walk_page_range(vma, smaps_pte_range, &mss); | ||
359 | return show_map_internal(m, v, &mss); | ||
360 | } | 578 | } |
361 | 579 | ||
362 | void clear_refs_smap(struct mm_struct *mm) | 580 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
581 | void *private) | ||
363 | { | 582 | { |
364 | struct vm_area_struct *vma; | 583 | struct pagemapread *pm = private; |
584 | pte_t *pte; | ||
585 | int err = 0; | ||
586 | |||
587 | for (; addr != end; addr += PAGE_SIZE) { | ||
588 | u64 pfn = PM_NOT_PRESENT; | ||
589 | pte = pte_offset_map(pmd, addr); | ||
590 | if (is_swap_pte(*pte)) | ||
591 | pfn = swap_pte_to_pagemap_entry(*pte); | ||
592 | else if (pte_present(*pte)) | ||
593 | pfn = pte_pfn(*pte); | ||
594 | /* unmap so we're not in atomic when we copy to userspace */ | ||
595 | pte_unmap(pte); | ||
596 | err = add_to_pagemap(addr, pfn, pm); | ||
597 | if (err) | ||
598 | return err; | ||
599 | } | ||
365 | 600 | ||
366 | down_read(&mm->mmap_sem); | 601 | cond_resched(); |
367 | for (vma = mm->mmap; vma; vma = vma->vm_next) | 602 | |
368 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 603 | return err; |
369 | walk_page_range(vma, clear_refs_pte_range, NULL); | ||
370 | flush_tlb_mm(mm); | ||
371 | up_read(&mm->mmap_sem); | ||
372 | } | 604 | } |
373 | 605 | ||
374 | static void *m_start(struct seq_file *m, loff_t *pos) | 606 | static struct mm_walk pagemap_walk = { |
607 | .pmd_entry = pagemap_pte_range, | ||
608 | .pte_hole = pagemap_pte_hole | ||
609 | }; | ||
610 | |||
611 | /* | ||
612 | * /proc/pid/pagemap - an array mapping virtual pages to pfns | ||
613 | * | ||
614 | * For each page in the address space, this file contains one 64-bit | ||
615 | * entry representing the corresponding physical page frame number | ||
616 | * (PFN) if the page is present. If there is a swap entry for the | ||
617 | * physical page, then an encoding of the swap file number and the | ||
618 | * page's offset into the swap file are returned. If no page is | ||
619 | * present at all, PM_NOT_PRESENT is returned. This allows determining | ||
620 | * precisely which pages are mapped (or in swap) and comparing mapped | ||
621 | * pages between processes. | ||
622 | * | ||
623 | * Efficient users of this interface will use /proc/pid/maps to | ||
624 | * determine which areas of memory are actually mapped and llseek to | ||
625 | * skip over unmapped regions. | ||
626 | */ | ||
627 | static ssize_t pagemap_read(struct file *file, char __user *buf, | ||
628 | size_t count, loff_t *ppos) | ||
375 | { | 629 | { |
376 | struct proc_maps_private *priv = m->private; | 630 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
377 | unsigned long last_addr = m->version; | 631 | struct page **pages, *page; |
632 | unsigned long uaddr, uend; | ||
378 | struct mm_struct *mm; | 633 | struct mm_struct *mm; |
379 | struct vm_area_struct *vma, *tail_vma = NULL; | 634 | struct pagemapread pm; |
380 | loff_t l = *pos; | 635 | int pagecount; |
381 | 636 | int ret = -ESRCH; | |
382 | /* Clear the per syscall fields in priv */ | ||
383 | priv->task = NULL; | ||
384 | priv->tail_vma = NULL; | ||
385 | 637 | ||
386 | /* | 638 | if (!task) |
387 | * We remember last_addr rather than next_addr to hit with | 639 | goto out; |
388 | * mmap_cache most of the time. We have zero last_addr at | ||
389 | * the beginning and also after lseek. We will have -1 last_addr | ||
390 | * after the end of the vmas. | ||
391 | */ | ||
392 | 640 | ||
393 | if (last_addr == -1UL) | 641 | ret = -EACCES; |
394 | return NULL; | 642 | if (!ptrace_may_attach(task)) |
643 | goto out; | ||
395 | 644 | ||
396 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | 645 | ret = -EINVAL; |
397 | if (!priv->task) | 646 | /* file position must be aligned */ |
398 | return NULL; | 647 | if (*ppos % PM_ENTRY_BYTES) |
648 | goto out; | ||
399 | 649 | ||
400 | mm = mm_for_maps(priv->task); | 650 | ret = 0; |
651 | mm = get_task_mm(task); | ||
401 | if (!mm) | 652 | if (!mm) |
402 | return NULL; | ||
403 | |||
404 | priv->tail_vma = tail_vma = get_gate_vma(priv->task); | ||
405 | |||
406 | /* Start with last addr hint */ | ||
407 | if (last_addr && (vma = find_vma(mm, last_addr))) { | ||
408 | vma = vma->vm_next; | ||
409 | goto out; | 653 | goto out; |
410 | } | ||
411 | 654 | ||
412 | /* | 655 | ret = -ENOMEM; |
413 | * Check the vma index is within the range and do | 656 | uaddr = (unsigned long)buf & PAGE_MASK; |
414 | * sequential scan until m_index. | 657 | uend = (unsigned long)(buf + count); |
415 | */ | 658 | pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; |
416 | vma = NULL; | 659 | pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); |
417 | if ((unsigned long)l < mm->map_count) { | 660 | if (!pages) |
418 | vma = mm->mmap; | 661 | goto out_task; |
419 | while (l-- && vma) | ||
420 | vma = vma->vm_next; | ||
421 | goto out; | ||
422 | } | ||
423 | 662 | ||
424 | if (l != mm->map_count) | 663 | down_read(¤t->mm->mmap_sem); |
425 | tail_vma = NULL; /* After gate vma */ | 664 | ret = get_user_pages(current, current->mm, uaddr, pagecount, |
665 | 1, 0, pages, NULL); | ||
666 | up_read(¤t->mm->mmap_sem); | ||
426 | 667 | ||
427 | out: | 668 | if (ret < 0) |
428 | if (vma) | 669 | goto out_free; |
429 | return vma; | ||
430 | 670 | ||
431 | /* End of vmas has been reached */ | 671 | pm.out = buf; |
432 | m->version = (tail_vma != NULL)? 0: -1UL; | 672 | pm.end = buf + count; |
433 | up_read(&mm->mmap_sem); | ||
434 | mmput(mm); | ||
435 | return tail_vma; | ||
436 | } | ||
437 | 673 | ||
438 | static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) | 674 | if (!ptrace_may_attach(task)) { |
439 | { | 675 | ret = -EIO; |
440 | if (vma && vma != priv->tail_vma) { | 676 | } else { |
441 | struct mm_struct *mm = vma->vm_mm; | 677 | unsigned long src = *ppos; |
442 | up_read(&mm->mmap_sem); | 678 | unsigned long svpfn = src / PM_ENTRY_BYTES; |
443 | mmput(mm); | 679 | unsigned long start_vaddr = svpfn << PAGE_SHIFT; |
680 | unsigned long end_vaddr = TASK_SIZE_OF(task); | ||
681 | |||
682 | /* watch out for wraparound */ | ||
683 | if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) | ||
684 | start_vaddr = end_vaddr; | ||
685 | |||
686 | /* | ||
687 | * The odds are that this will stop walking way | ||
688 | * before end_vaddr, because the length of the | ||
689 | * user buffer is tracked in "pm", and the walk | ||
690 | * will stop when we hit the end of the buffer. | ||
691 | */ | ||
692 | ret = walk_page_range(mm, start_vaddr, end_vaddr, | ||
693 | &pagemap_walk, &pm); | ||
694 | if (ret == PM_END_OF_BUFFER) | ||
695 | ret = 0; | ||
696 | /* don't need mmap_sem for these, but this looks cleaner */ | ||
697 | *ppos += pm.out - buf; | ||
698 | if (!ret) | ||
699 | ret = pm.out - buf; | ||
444 | } | 700 | } |
445 | } | ||
446 | |||
447 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) | ||
448 | { | ||
449 | struct proc_maps_private *priv = m->private; | ||
450 | struct vm_area_struct *vma = v; | ||
451 | struct vm_area_struct *tail_vma = priv->tail_vma; | ||
452 | |||
453 | (*pos)++; | ||
454 | if (vma && (vma != tail_vma) && vma->vm_next) | ||
455 | return vma->vm_next; | ||
456 | vma_stop(priv, vma); | ||
457 | return (vma != tail_vma)? tail_vma: NULL; | ||
458 | } | ||
459 | |||
460 | static void m_stop(struct seq_file *m, void *v) | ||
461 | { | ||
462 | struct proc_maps_private *priv = m->private; | ||
463 | struct vm_area_struct *vma = v; | ||
464 | 701 | ||
465 | vma_stop(priv, vma); | 702 | for (; pagecount; pagecount--) { |
466 | if (priv->task) | 703 | page = pages[pagecount-1]; |
467 | put_task_struct(priv->task); | 704 | if (!PageReserved(page)) |
468 | } | 705 | SetPageDirty(page); |
469 | 706 | page_cache_release(page); | |
470 | static struct seq_operations proc_pid_maps_op = { | ||
471 | .start = m_start, | ||
472 | .next = m_next, | ||
473 | .stop = m_stop, | ||
474 | .show = show_map | ||
475 | }; | ||
476 | |||
477 | static struct seq_operations proc_pid_smaps_op = { | ||
478 | .start = m_start, | ||
479 | .next = m_next, | ||
480 | .stop = m_stop, | ||
481 | .show = show_smap | ||
482 | }; | ||
483 | |||
484 | static int do_maps_open(struct inode *inode, struct file *file, | ||
485 | struct seq_operations *ops) | ||
486 | { | ||
487 | struct proc_maps_private *priv; | ||
488 | int ret = -ENOMEM; | ||
489 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
490 | if (priv) { | ||
491 | priv->pid = proc_pid(inode); | ||
492 | ret = seq_open(file, ops); | ||
493 | if (!ret) { | ||
494 | struct seq_file *m = file->private_data; | ||
495 | m->private = priv; | ||
496 | } else { | ||
497 | kfree(priv); | ||
498 | } | ||
499 | } | 707 | } |
708 | mmput(mm); | ||
709 | out_free: | ||
710 | kfree(pages); | ||
711 | out_task: | ||
712 | put_task_struct(task); | ||
713 | out: | ||
500 | return ret; | 714 | return ret; |
501 | } | 715 | } |
502 | 716 | ||
503 | static int maps_open(struct inode *inode, struct file *file) | 717 | const struct file_operations proc_pagemap_operations = { |
504 | { | 718 | .llseek = mem_lseek, /* borrow this */ |
505 | return do_maps_open(inode, file, &proc_pid_maps_op); | 719 | .read = pagemap_read, |
506 | } | ||
507 | |||
508 | const struct file_operations proc_maps_operations = { | ||
509 | .open = maps_open, | ||
510 | .read = seq_read, | ||
511 | .llseek = seq_lseek, | ||
512 | .release = seq_release_private, | ||
513 | }; | 720 | }; |
721 | #endif /* CONFIG_PROC_PAGE_MONITOR */ | ||
514 | 722 | ||
515 | #ifdef CONFIG_NUMA | 723 | #ifdef CONFIG_NUMA |
516 | extern int show_numa_map(struct seq_file *m, void *v); | 724 | extern int show_numa_map(struct seq_file *m, void *v); |
@@ -545,15 +753,3 @@ const struct file_operations proc_numa_maps_operations = { | |||
545 | .release = seq_release_private, | 753 | .release = seq_release_private, |
546 | }; | 754 | }; |
547 | #endif | 755 | #endif |
548 | |||
549 | static int smaps_open(struct inode *inode, struct file *file) | ||
550 | { | ||
551 | return do_maps_open(inode, file, &proc_pid_smaps_op); | ||
552 | } | ||
553 | |||
554 | const struct file_operations proc_smaps_operations = { | ||
555 | .open = smaps_open, | ||
556 | .read = seq_read, | ||
557 | .llseek = seq_lseek, | ||
558 | .release = seq_release_private, | ||
559 | }; | ||
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 231fd5ccadc5..195309857e63 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -2143,7 +2143,7 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) | |||
2143 | /* if we are not on a block boundary */ | 2143 | /* if we are not on a block boundary */ |
2144 | if (length) { | 2144 | if (length) { |
2145 | length = blocksize - length; | 2145 | length = blocksize - length; |
2146 | zero_user_page(page, offset, length, KM_USER0); | 2146 | zero_user(page, offset, length); |
2147 | if (buffer_mapped(bh) && bh->b_blocknr != 0) { | 2147 | if (buffer_mapped(bh) && bh->b_blocknr != 0) { |
2148 | mark_buffer_dirty(bh); | 2148 | mark_buffer_dirty(bh); |
2149 | } | 2149 | } |
@@ -2367,7 +2367,7 @@ static int reiserfs_write_full_page(struct page *page, | |||
2367 | unlock_page(page); | 2367 | unlock_page(page); |
2368 | return 0; | 2368 | return 0; |
2369 | } | 2369 | } |
2370 | zero_user_page(page, last_offset, PAGE_CACHE_SIZE - last_offset, KM_USER0); | 2370 | zero_user_segment(page, last_offset, PAGE_CACHE_SIZE); |
2371 | } | 2371 | } |
2372 | bh = head; | 2372 | bh = head; |
2373 | block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); | 2373 | block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); |
diff --git a/fs/timerfd.c b/fs/timerfd.c index 61983f3b107c..10c80b59ec4b 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
@@ -25,13 +25,15 @@ struct timerfd_ctx { | |||
25 | struct hrtimer tmr; | 25 | struct hrtimer tmr; |
26 | ktime_t tintv; | 26 | ktime_t tintv; |
27 | wait_queue_head_t wqh; | 27 | wait_queue_head_t wqh; |
28 | u64 ticks; | ||
28 | int expired; | 29 | int expired; |
30 | int clockid; | ||
29 | }; | 31 | }; |
30 | 32 | ||
31 | /* | 33 | /* |
32 | * This gets called when the timer event triggers. We set the "expired" | 34 | * This gets called when the timer event triggers. We set the "expired" |
33 | * flag, but we do not re-arm the timer (in case it's necessary, | 35 | * flag, but we do not re-arm the timer (in case it's necessary, |
34 | * tintv.tv64 != 0) until the timer is read. | 36 | * tintv.tv64 != 0) until the timer is accessed. |
35 | */ | 37 | */ |
36 | static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) | 38 | static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) |
37 | { | 39 | { |
@@ -40,13 +42,24 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) | |||
40 | 42 | ||
41 | spin_lock_irqsave(&ctx->wqh.lock, flags); | 43 | spin_lock_irqsave(&ctx->wqh.lock, flags); |
42 | ctx->expired = 1; | 44 | ctx->expired = 1; |
45 | ctx->ticks++; | ||
43 | wake_up_locked(&ctx->wqh); | 46 | wake_up_locked(&ctx->wqh); |
44 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); | 47 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
45 | 48 | ||
46 | return HRTIMER_NORESTART; | 49 | return HRTIMER_NORESTART; |
47 | } | 50 | } |
48 | 51 | ||
49 | static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags, | 52 | static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) |
53 | { | ||
54 | ktime_t now, remaining; | ||
55 | |||
56 | now = ctx->tmr.base->get_time(); | ||
57 | remaining = ktime_sub(ctx->tmr.expires, now); | ||
58 | |||
59 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; | ||
60 | } | ||
61 | |||
62 | static void timerfd_setup(struct timerfd_ctx *ctx, int flags, | ||
50 | const struct itimerspec *ktmr) | 63 | const struct itimerspec *ktmr) |
51 | { | 64 | { |
52 | enum hrtimer_mode htmode; | 65 | enum hrtimer_mode htmode; |
@@ -57,8 +70,9 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags, | |||
57 | 70 | ||
58 | texp = timespec_to_ktime(ktmr->it_value); | 71 | texp = timespec_to_ktime(ktmr->it_value); |
59 | ctx->expired = 0; | 72 | ctx->expired = 0; |
73 | ctx->ticks = 0; | ||
60 | ctx->tintv = timespec_to_ktime(ktmr->it_interval); | 74 | ctx->tintv = timespec_to_ktime(ktmr->it_interval); |
61 | hrtimer_init(&ctx->tmr, clockid, htmode); | 75 | hrtimer_init(&ctx->tmr, ctx->clockid, htmode); |
62 | ctx->tmr.expires = texp; | 76 | ctx->tmr.expires = texp; |
63 | ctx->tmr.function = timerfd_tmrproc; | 77 | ctx->tmr.function = timerfd_tmrproc; |
64 | if (texp.tv64 != 0) | 78 | if (texp.tv64 != 0) |
@@ -83,7 +97,7 @@ static unsigned int timerfd_poll(struct file *file, poll_table *wait) | |||
83 | poll_wait(file, &ctx->wqh, wait); | 97 | poll_wait(file, &ctx->wqh, wait); |
84 | 98 | ||
85 | spin_lock_irqsave(&ctx->wqh.lock, flags); | 99 | spin_lock_irqsave(&ctx->wqh.lock, flags); |
86 | if (ctx->expired) | 100 | if (ctx->ticks) |
87 | events |= POLLIN; | 101 | events |= POLLIN; |
88 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); | 102 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
89 | 103 | ||
@@ -102,11 +116,11 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, | |||
102 | return -EINVAL; | 116 | return -EINVAL; |
103 | spin_lock_irq(&ctx->wqh.lock); | 117 | spin_lock_irq(&ctx->wqh.lock); |
104 | res = -EAGAIN; | 118 | res = -EAGAIN; |
105 | if (!ctx->expired && !(file->f_flags & O_NONBLOCK)) { | 119 | if (!ctx->ticks && !(file->f_flags & O_NONBLOCK)) { |
106 | __add_wait_queue(&ctx->wqh, &wait); | 120 | __add_wait_queue(&ctx->wqh, &wait); |
107 | for (res = 0;;) { | 121 | for (res = 0;;) { |
108 | set_current_state(TASK_INTERRUPTIBLE); | 122 | set_current_state(TASK_INTERRUPTIBLE); |
109 | if (ctx->expired) { | 123 | if (ctx->ticks) { |
110 | res = 0; | 124 | res = 0; |
111 | break; | 125 | break; |
112 | } | 126 | } |
@@ -121,22 +135,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, | |||
121 | __remove_wait_queue(&ctx->wqh, &wait); | 135 | __remove_wait_queue(&ctx->wqh, &wait); |
122 | __set_current_state(TASK_RUNNING); | 136 | __set_current_state(TASK_RUNNING); |
123 | } | 137 | } |
124 | if (ctx->expired) { | 138 | if (ctx->ticks) { |
125 | ctx->expired = 0; | 139 | ticks = ctx->ticks; |
126 | if (ctx->tintv.tv64 != 0) { | 140 | if (ctx->expired && ctx->tintv.tv64) { |
127 | /* | 141 | /* |
128 | * If tintv.tv64 != 0, this is a periodic timer that | 142 | * If tintv.tv64 != 0, this is a periodic timer that |
129 | * needs to be re-armed. We avoid doing it in the timer | 143 | * needs to be re-armed. We avoid doing it in the timer |
130 | * callback to avoid DoS attacks specifying a very | 144 | * callback to avoid DoS attacks specifying a very |
131 | * short timer period. | 145 | * short timer period. |
132 | */ | 146 | */ |
133 | ticks = (u64) | 147 | ticks += hrtimer_forward_now(&ctx->tmr, |
134 | hrtimer_forward(&ctx->tmr, | 148 | ctx->tintv) - 1; |
135 | hrtimer_cb_get_time(&ctx->tmr), | ||
136 | ctx->tintv); | ||
137 | hrtimer_restart(&ctx->tmr); | 149 | hrtimer_restart(&ctx->tmr); |
138 | } else | 150 | } |
139 | ticks = 1; | 151 | ctx->expired = 0; |
152 | ctx->ticks = 0; | ||
140 | } | 153 | } |
141 | spin_unlock_irq(&ctx->wqh.lock); | 154 | spin_unlock_irq(&ctx->wqh.lock); |
142 | if (ticks) | 155 | if (ticks) |
@@ -150,76 +163,132 @@ static const struct file_operations timerfd_fops = { | |||
150 | .read = timerfd_read, | 163 | .read = timerfd_read, |
151 | }; | 164 | }; |
152 | 165 | ||
153 | asmlinkage long sys_timerfd(int ufd, int clockid, int flags, | 166 | static struct file *timerfd_fget(int fd) |
154 | const struct itimerspec __user *utmr) | 167 | { |
168 | struct file *file; | ||
169 | |||
170 | file = fget(fd); | ||
171 | if (!file) | ||
172 | return ERR_PTR(-EBADF); | ||
173 | if (file->f_op != &timerfd_fops) { | ||
174 | fput(file); | ||
175 | return ERR_PTR(-EINVAL); | ||
176 | } | ||
177 | |||
178 | return file; | ||
179 | } | ||
180 | |||
181 | asmlinkage long sys_timerfd_create(int clockid, int flags) | ||
155 | { | 182 | { |
156 | int error; | 183 | int error, ufd; |
157 | struct timerfd_ctx *ctx; | 184 | struct timerfd_ctx *ctx; |
158 | struct file *file; | 185 | struct file *file; |
159 | struct inode *inode; | 186 | struct inode *inode; |
160 | struct itimerspec ktmr; | ||
161 | |||
162 | if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) | ||
163 | return -EFAULT; | ||
164 | 187 | ||
188 | if (flags) | ||
189 | return -EINVAL; | ||
165 | if (clockid != CLOCK_MONOTONIC && | 190 | if (clockid != CLOCK_MONOTONIC && |
166 | clockid != CLOCK_REALTIME) | 191 | clockid != CLOCK_REALTIME) |
167 | return -EINVAL; | 192 | return -EINVAL; |
193 | |||
194 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); | ||
195 | if (!ctx) | ||
196 | return -ENOMEM; | ||
197 | |||
198 | init_waitqueue_head(&ctx->wqh); | ||
199 | ctx->clockid = clockid; | ||
200 | hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); | ||
201 | |||
202 | error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", | ||
203 | &timerfd_fops, ctx); | ||
204 | if (error) { | ||
205 | kfree(ctx); | ||
206 | return error; | ||
207 | } | ||
208 | |||
209 | return ufd; | ||
210 | } | ||
211 | |||
212 | asmlinkage long sys_timerfd_settime(int ufd, int flags, | ||
213 | const struct itimerspec __user *utmr, | ||
214 | struct itimerspec __user *otmr) | ||
215 | { | ||
216 | struct file *file; | ||
217 | struct timerfd_ctx *ctx; | ||
218 | struct itimerspec ktmr, kotmr; | ||
219 | |||
220 | if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) | ||
221 | return -EFAULT; | ||
222 | |||
168 | if (!timespec_valid(&ktmr.it_value) || | 223 | if (!timespec_valid(&ktmr.it_value) || |
169 | !timespec_valid(&ktmr.it_interval)) | 224 | !timespec_valid(&ktmr.it_interval)) |
170 | return -EINVAL; | 225 | return -EINVAL; |
171 | 226 | ||
172 | if (ufd == -1) { | 227 | file = timerfd_fget(ufd); |
173 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | 228 | if (IS_ERR(file)) |
174 | if (!ctx) | 229 | return PTR_ERR(file); |
175 | return -ENOMEM; | 230 | ctx = file->private_data; |
176 | |||
177 | init_waitqueue_head(&ctx->wqh); | ||
178 | |||
179 | timerfd_setup(ctx, clockid, flags, &ktmr); | ||
180 | |||
181 | /* | ||
182 | * When we call this, the initialization must be complete, since | ||
183 | * anon_inode_getfd() will install the fd. | ||
184 | */ | ||
185 | error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", | ||
186 | &timerfd_fops, ctx); | ||
187 | if (error) | ||
188 | goto err_tmrcancel; | ||
189 | } else { | ||
190 | file = fget(ufd); | ||
191 | if (!file) | ||
192 | return -EBADF; | ||
193 | ctx = file->private_data; | ||
194 | if (file->f_op != &timerfd_fops) { | ||
195 | fput(file); | ||
196 | return -EINVAL; | ||
197 | } | ||
198 | /* | ||
199 | * We need to stop the existing timer before reprogramming | ||
200 | * it to the new values. | ||
201 | */ | ||
202 | for (;;) { | ||
203 | spin_lock_irq(&ctx->wqh.lock); | ||
204 | if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) | ||
205 | break; | ||
206 | spin_unlock_irq(&ctx->wqh.lock); | ||
207 | cpu_relax(); | ||
208 | } | ||
209 | /* | ||
210 | * Re-program the timer to the new value ... | ||
211 | */ | ||
212 | timerfd_setup(ctx, clockid, flags, &ktmr); | ||
213 | 231 | ||
232 | /* | ||
233 | * We need to stop the existing timer before reprogramming | ||
234 | * it to the new values. | ||
235 | */ | ||
236 | for (;;) { | ||
237 | spin_lock_irq(&ctx->wqh.lock); | ||
238 | if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) | ||
239 | break; | ||
214 | spin_unlock_irq(&ctx->wqh.lock); | 240 | spin_unlock_irq(&ctx->wqh.lock); |
215 | fput(file); | 241 | cpu_relax(); |
216 | } | 242 | } |
217 | 243 | ||
218 | return ufd; | 244 | /* |
245 | * If the timer is expired and it's periodic, we need to advance it | ||
246 | * because the caller may want to know the previous expiration time. | ||
247 | * We do not update "ticks" and "expired" since the timer will be | ||
248 | * re-programmed again in the following timerfd_setup() call. | ||
249 | */ | ||
250 | if (ctx->expired && ctx->tintv.tv64) | ||
251 | hrtimer_forward_now(&ctx->tmr, ctx->tintv); | ||
219 | 252 | ||
220 | err_tmrcancel: | 253 | kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); |
221 | hrtimer_cancel(&ctx->tmr); | 254 | kotmr.it_interval = ktime_to_timespec(ctx->tintv); |
222 | kfree(ctx); | 255 | |
223 | return error; | 256 | /* |
257 | * Re-program the timer to the new value ... | ||
258 | */ | ||
259 | timerfd_setup(ctx, flags, &ktmr); | ||
260 | |||
261 | spin_unlock_irq(&ctx->wqh.lock); | ||
262 | fput(file); | ||
263 | if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) | ||
264 | return -EFAULT; | ||
265 | |||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr) | ||
270 | { | ||
271 | struct file *file; | ||
272 | struct timerfd_ctx *ctx; | ||
273 | struct itimerspec kotmr; | ||
274 | |||
275 | file = timerfd_fget(ufd); | ||
276 | if (IS_ERR(file)) | ||
277 | return PTR_ERR(file); | ||
278 | ctx = file->private_data; | ||
279 | |||
280 | spin_lock_irq(&ctx->wqh.lock); | ||
281 | if (ctx->expired && ctx->tintv.tv64) { | ||
282 | ctx->expired = 0; | ||
283 | ctx->ticks += | ||
284 | hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1; | ||
285 | hrtimer_restart(&ctx->tmr); | ||
286 | } | ||
287 | kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); | ||
288 | kotmr.it_interval = ktime_to_timespec(ctx->tintv); | ||
289 | spin_unlock_irq(&ctx->wqh.lock); | ||
290 | fput(file); | ||
291 | |||
292 | return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; | ||
224 | } | 293 | } |
225 | 294 | ||
diff --git a/fs/xattr.c b/fs/xattr.c index 6645b7313b33..f7c8f87bb390 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -105,6 +105,33 @@ out: | |||
105 | EXPORT_SYMBOL_GPL(vfs_setxattr); | 105 | EXPORT_SYMBOL_GPL(vfs_setxattr); |
106 | 106 | ||
107 | ssize_t | 107 | ssize_t |
108 | xattr_getsecurity(struct inode *inode, const char *name, void *value, | ||
109 | size_t size) | ||
110 | { | ||
111 | void *buffer = NULL; | ||
112 | ssize_t len; | ||
113 | |||
114 | if (!value || !size) { | ||
115 | len = security_inode_getsecurity(inode, name, &buffer, false); | ||
116 | goto out_noalloc; | ||
117 | } | ||
118 | |||
119 | len = security_inode_getsecurity(inode, name, &buffer, true); | ||
120 | if (len < 0) | ||
121 | return len; | ||
122 | if (size < len) { | ||
123 | len = -ERANGE; | ||
124 | goto out; | ||
125 | } | ||
126 | memcpy(value, buffer, len); | ||
127 | out: | ||
128 | security_release_secctx(buffer, len); | ||
129 | out_noalloc: | ||
130 | return len; | ||
131 | } | ||
132 | EXPORT_SYMBOL_GPL(xattr_getsecurity); | ||
133 | |||
134 | ssize_t | ||
108 | vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) | 135 | vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) |
109 | { | 136 | { |
110 | struct inode *inode = dentry->d_inode; | 137 | struct inode *inode = dentry->d_inode; |
@@ -118,23 +145,23 @@ vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) | |||
118 | if (error) | 145 | if (error) |
119 | return error; | 146 | return error; |
120 | 147 | ||
121 | if (inode->i_op->getxattr) | ||
122 | error = inode->i_op->getxattr(dentry, name, value, size); | ||
123 | else | ||
124 | error = -EOPNOTSUPP; | ||
125 | |||
126 | if (!strncmp(name, XATTR_SECURITY_PREFIX, | 148 | if (!strncmp(name, XATTR_SECURITY_PREFIX, |
127 | XATTR_SECURITY_PREFIX_LEN)) { | 149 | XATTR_SECURITY_PREFIX_LEN)) { |
128 | const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; | 150 | const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; |
129 | int ret = security_inode_getsecurity(inode, suffix, value, | 151 | int ret = xattr_getsecurity(inode, suffix, value, size); |
130 | size, error); | ||
131 | /* | 152 | /* |
132 | * Only overwrite the return value if a security module | 153 | * Only overwrite the return value if a security module |
133 | * is actually active. | 154 | * is actually active. |
134 | */ | 155 | */ |
135 | if (ret != -EOPNOTSUPP) | 156 | if (ret == -EOPNOTSUPP) |
136 | error = ret; | 157 | goto nolsm; |
158 | return ret; | ||
137 | } | 159 | } |
160 | nolsm: | ||
161 | if (inode->i_op->getxattr) | ||
162 | error = inode->i_op->getxattr(dentry, name, value, size); | ||
163 | else | ||
164 | error = -EOPNOTSUPP; | ||
138 | 165 | ||
139 | return error; | 166 | return error; |
140 | } | 167 | } |
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index ed2b16dff914..e040f1ce1b6a 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c | |||
@@ -92,8 +92,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize, | |||
92 | void | 92 | void |
93 | kmem_free(void *ptr, size_t size) | 93 | kmem_free(void *ptr, size_t size) |
94 | { | 94 | { |
95 | if (((unsigned long)ptr < VMALLOC_START) || | 95 | if (!is_vmalloc_addr(ptr)) { |
96 | ((unsigned long)ptr >= VMALLOC_END)) { | ||
97 | kfree(ptr); | 96 | kfree(ptr); |
98 | } else { | 97 | } else { |
99 | vfree(ptr); | 98 | vfree(ptr); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index a49dd8d4b069..0382c19d6523 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -709,8 +709,7 @@ static inline struct page * | |||
709 | mem_to_page( | 709 | mem_to_page( |
710 | void *addr) | 710 | void *addr) |
711 | { | 711 | { |
712 | if (((unsigned long)addr < VMALLOC_START) || | 712 | if ((!is_vmalloc_addr(addr))) { |
713 | ((unsigned long)addr >= VMALLOC_END)) { | ||
714 | return virt_to_page(addr); | 713 | return virt_to_page(addr); |
715 | } else { | 714 | } else { |
716 | return vmalloc_to_page(addr); | 715 | return vmalloc_to_page(addr); |
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index d6a8dddb2268..6f614f35f650 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c | |||
@@ -155,7 +155,7 @@ xfs_iozero( | |||
155 | if (status) | 155 | if (status) |
156 | break; | 156 | break; |
157 | 157 | ||
158 | zero_user_page(page, offset, bytes, KM_USER0); | 158 | zero_user(page, offset, bytes); |
159 | 159 | ||
160 | status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, | 160 | status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, |
161 | page, fsdata); | 161 | page, fsdata); |