diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-11 13:51:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-11 13:51:26 -0400 |
commit | 8837c70d531a1788f975c366c254a5cb973a5291 (patch) | |
tree | f7a719d01090efb3bc534f5b0d7f13ec87eecadb /fs | |
parent | b284d4d5a6785f8cd07eda2646a95782373cd01e (diff) | |
parent | b93b016313b3ba8003c3b8bb71f569af91f19fc7 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
- almost all of the rest of MM
- kasan updates
- lots of procfs work
- misc things
- lib/ updates
- checkpatch
- rapidio
- ipc/shm updates
- the start of willy's XArray conversion
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (140 commits)
page cache: use xa_lock
xarray: add the xa_lock to the radix_tree_root
fscache: use appropriate radix tree accessors
export __set_page_dirty
unicore32: turn flush_dcache_mmap_lock into a no-op
arm64: turn flush_dcache_mmap_lock into a no-op
mac80211_hwsim: use DEFINE_IDA
radix tree: use GFP_ZONEMASK bits of gfp_t for flags
linux/const.h: refactor _BITUL and _BITULL a bit
linux/const.h: move UL() macro to include/linux/const.h
linux/const.h: prefix include guard of uapi/linux/const.h with _UAPI
xen, mm: allow deferred page initialization for xen pv domains
elf: enforce MAP_FIXED on overlaying elf segments
fs, elf: drop MAP_FIXED usage from elf_map
mm: introduce MAP_FIXED_NOREPLACE
MAINTAINERS: update bouncing aacraid@adaptec.com addresses
fs/dcache.c: add cond_resched() in shrink_dentry_list()
include/linux/kfifo.h: fix comment
ipc/shm.c: shm_split(): remove unneeded test for NULL shm_file_data.vm_ops
kernel/sysctl.c: add kdoc comments to do_proc_do{u}intvec_minmax_conv_param
...
Diffstat (limited to 'fs')
38 files changed, 547 insertions, 453 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c index 9370e2feb999..dbc3c0b0142d 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -570,10 +570,11 @@ static int afs_writepages_region(struct address_space *mapping, | |||
570 | 570 | ||
571 | _debug("wback %lx", page->index); | 571 | _debug("wback %lx", page->index); |
572 | 572 | ||
573 | /* at this point we hold neither mapping->tree_lock nor lock on | 573 | /* |
574 | * the page itself: the page may be truncated or invalidated | 574 | * at this point we hold neither the i_pages lock nor the |
575 | * (changing page->mapping to NULL), or even swizzled back from | 575 | * page lock: the page may be truncated or invalidated |
576 | * swapper_space to tmpfs file mapping | 576 | * (changing page->mapping to NULL), or even swizzled |
577 | * back from swapper_space to tmpfs file mapping | ||
577 | */ | 578 | */ |
578 | ret = lock_page_killable(page); | 579 | ret = lock_page_killable(page); |
579 | if (ret < 0) { | 580 | if (ret < 0) { |
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index a0c57c37fa21..be9c3dc048ab 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
@@ -19,9 +19,6 @@ | |||
19 | */ | 19 | */ |
20 | static autofs_wqt_t autofs4_next_wait_queue = 1; | 20 | static autofs_wqt_t autofs4_next_wait_queue = 1; |
21 | 21 | ||
22 | /* These are the signals we allow interrupting a pending mount */ | ||
23 | #define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGQUIT)) | ||
24 | |||
25 | void autofs4_catatonic_mode(struct autofs_sb_info *sbi) | 22 | void autofs4_catatonic_mode(struct autofs_sb_info *sbi) |
26 | { | 23 | { |
27 | struct autofs_wait_queue *wq, *nwq; | 24 | struct autofs_wait_queue *wq, *nwq; |
@@ -486,29 +483,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, | |||
486 | * wq->name.name is NULL iff the lock is already released | 483 | * wq->name.name is NULL iff the lock is already released |
487 | * or the mount has been made catatonic. | 484 | * or the mount has been made catatonic. |
488 | */ | 485 | */ |
489 | if (wq->name.name) { | 486 | wait_event_killable(wq->queue, wq->name.name == NULL); |
490 | /* Block all but "shutdown" signals while waiting */ | ||
491 | unsigned long shutdown_sigs_mask; | ||
492 | unsigned long irqflags; | ||
493 | sigset_t oldset; | ||
494 | |||
495 | spin_lock_irqsave(¤t->sighand->siglock, irqflags); | ||
496 | oldset = current->blocked; | ||
497 | shutdown_sigs_mask = SHUTDOWN_SIGS & ~oldset.sig[0]; | ||
498 | siginitsetinv(¤t->blocked, shutdown_sigs_mask); | ||
499 | recalc_sigpending(); | ||
500 | spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); | ||
501 | |||
502 | wait_event_interruptible(wq->queue, wq->name.name == NULL); | ||
503 | |||
504 | spin_lock_irqsave(¤t->sighand->siglock, irqflags); | ||
505 | current->blocked = oldset; | ||
506 | recalc_sigpending(); | ||
507 | spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); | ||
508 | } else { | ||
509 | pr_debug("skipped sleeping\n"); | ||
510 | } | ||
511 | |||
512 | status = wq->status; | 487 | status = wq->status; |
513 | 488 | ||
514 | /* | 489 | /* |
@@ -574,7 +549,7 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok | |||
574 | kfree(wq->name.name); | 549 | kfree(wq->name.name); |
575 | wq->name.name = NULL; /* Do not wait on this queue */ | 550 | wq->name.name = NULL; /* Do not wait on this queue */ |
576 | wq->status = status; | 551 | wq->status = status; |
577 | wake_up_interruptible(&wq->queue); | 552 | wake_up(&wq->queue); |
578 | if (!--wq->wait_ctr) | 553 | if (!--wq->wait_ctr) |
579 | kfree(wq); | 554 | kfree(wq); |
580 | mutex_unlock(&sbi->wq_mutex); | 555 | mutex_unlock(&sbi->wq_mutex); |
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index ce1824f47ba6..c3deb2e35f20 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c | |||
@@ -330,6 +330,7 @@ beyond_if: | |||
330 | #ifdef __alpha__ | 330 | #ifdef __alpha__ |
331 | regs->gp = ex.a_gpvalue; | 331 | regs->gp = ex.a_gpvalue; |
332 | #endif | 332 | #endif |
333 | finalize_exec(bprm); | ||
333 | start_thread(regs, ex.a_entry, current->mm->start_stack); | 334 | start_thread(regs, ex.a_entry, current->mm->start_stack); |
334 | return 0; | 335 | return 0; |
335 | } | 336 | } |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index bdb201230bae..41e04183e4ce 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -377,6 +377,11 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, | |||
377 | } else | 377 | } else |
378 | map_addr = vm_mmap(filep, addr, size, prot, type, off); | 378 | map_addr = vm_mmap(filep, addr, size, prot, type, off); |
379 | 379 | ||
380 | if ((type & MAP_FIXED_NOREPLACE) && BAD_ADDR(map_addr)) | ||
381 | pr_info("%d (%s): Uhuuh, elf segment at %p requested but the memory is mapped already\n", | ||
382 | task_pid_nr(current), current->comm, | ||
383 | (void *)addr); | ||
384 | |||
380 | return(map_addr); | 385 | return(map_addr); |
381 | } | 386 | } |
382 | 387 | ||
@@ -575,7 +580,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
575 | elf_prot |= PROT_EXEC; | 580 | elf_prot |= PROT_EXEC; |
576 | vaddr = eppnt->p_vaddr; | 581 | vaddr = eppnt->p_vaddr; |
577 | if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) | 582 | if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) |
578 | elf_type |= MAP_FIXED; | 583 | elf_type |= MAP_FIXED_NOREPLACE; |
579 | else if (no_base && interp_elf_ex->e_type == ET_DYN) | 584 | else if (no_base && interp_elf_ex->e_type == ET_DYN) |
580 | load_addr = -vaddr; | 585 | load_addr = -vaddr; |
581 | 586 | ||
@@ -890,7 +895,7 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
890 | the correct location in memory. */ | 895 | the correct location in memory. */ |
891 | for(i = 0, elf_ppnt = elf_phdata; | 896 | for(i = 0, elf_ppnt = elf_phdata; |
892 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { | 897 | i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { |
893 | int elf_prot = 0, elf_flags; | 898 | int elf_prot = 0, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE; |
894 | unsigned long k, vaddr; | 899 | unsigned long k, vaddr; |
895 | unsigned long total_size = 0; | 900 | unsigned long total_size = 0; |
896 | 901 | ||
@@ -922,6 +927,13 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
922 | */ | 927 | */ |
923 | } | 928 | } |
924 | } | 929 | } |
930 | |||
931 | /* | ||
932 | * Some binaries have overlapping elf segments and then | ||
933 | * we have to forcefully map over an existing mapping | ||
934 | * e.g. over this newly established brk mapping. | ||
935 | */ | ||
936 | elf_fixed = MAP_FIXED; | ||
925 | } | 937 | } |
926 | 938 | ||
927 | if (elf_ppnt->p_flags & PF_R) | 939 | if (elf_ppnt->p_flags & PF_R) |
@@ -939,7 +951,7 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
939 | * the ET_DYN load_addr calculations, proceed normally. | 951 | * the ET_DYN load_addr calculations, proceed normally. |
940 | */ | 952 | */ |
941 | if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { | 953 | if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { |
942 | elf_flags |= MAP_FIXED; | 954 | elf_flags |= elf_fixed; |
943 | } else if (loc->elf_ex.e_type == ET_DYN) { | 955 | } else if (loc->elf_ex.e_type == ET_DYN) { |
944 | /* | 956 | /* |
945 | * This logic is run once for the first LOAD Program | 957 | * This logic is run once for the first LOAD Program |
@@ -975,7 +987,7 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
975 | load_bias = ELF_ET_DYN_BASE; | 987 | load_bias = ELF_ET_DYN_BASE; |
976 | if (current->flags & PF_RANDOMIZE) | 988 | if (current->flags & PF_RANDOMIZE) |
977 | load_bias += arch_mmap_rnd(); | 989 | load_bias += arch_mmap_rnd(); |
978 | elf_flags |= MAP_FIXED; | 990 | elf_flags |= elf_fixed; |
979 | } else | 991 | } else |
980 | load_bias = 0; | 992 | load_bias = 0; |
981 | 993 | ||
@@ -1155,6 +1167,7 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
1155 | ELF_PLAT_INIT(regs, reloc_func_desc); | 1167 | ELF_PLAT_INIT(regs, reloc_func_desc); |
1156 | #endif | 1168 | #endif |
1157 | 1169 | ||
1170 | finalize_exec(bprm); | ||
1158 | start_thread(regs, elf_entry, bprm->p); | 1171 | start_thread(regs, elf_entry, bprm->p); |
1159 | retval = 0; | 1172 | retval = 0; |
1160 | out: | 1173 | out: |
@@ -1234,7 +1247,7 @@ static int load_elf_library(struct file *file) | |||
1234 | (eppnt->p_filesz + | 1247 | (eppnt->p_filesz + |
1235 | ELF_PAGEOFFSET(eppnt->p_vaddr)), | 1248 | ELF_PAGEOFFSET(eppnt->p_vaddr)), |
1236 | PROT_READ | PROT_WRITE | PROT_EXEC, | 1249 | PROT_READ | PROT_WRITE | PROT_EXEC, |
1237 | MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, | 1250 | MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE, |
1238 | (eppnt->p_offset - | 1251 | (eppnt->p_offset - |
1239 | ELF_PAGEOFFSET(eppnt->p_vaddr))); | 1252 | ELF_PAGEOFFSET(eppnt->p_vaddr))); |
1240 | if (error != ELF_PAGESTART(eppnt->p_vaddr)) | 1253 | if (error != ELF_PAGESTART(eppnt->p_vaddr)) |
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 429326b6e2e7..d90993adeffa 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -463,6 +463,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) | |||
463 | dynaddr); | 463 | dynaddr); |
464 | #endif | 464 | #endif |
465 | 465 | ||
466 | finalize_exec(bprm); | ||
466 | /* everything is now ready... get the userspace context ready to roll */ | 467 | /* everything is now ready... get the userspace context ready to roll */ |
467 | entryaddr = interp_params.entry_addr ?: exec_params.entry_addr; | 468 | entryaddr = interp_params.entry_addr ?: exec_params.entry_addr; |
468 | start_thread(regs, entryaddr, current->mm->start_stack); | 469 | start_thread(regs, entryaddr, current->mm->start_stack); |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 5d6b94475f27..82a48e830018 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -994,6 +994,7 @@ static int load_flat_binary(struct linux_binprm *bprm) | |||
994 | FLAT_PLAT_INIT(regs); | 994 | FLAT_PLAT_INIT(regs); |
995 | #endif | 995 | #endif |
996 | 996 | ||
997 | finalize_exec(bprm); | ||
997 | pr_debug("start_thread(regs=0x%p, entry=0x%lx, start_stack=0x%lx)\n", | 998 | pr_debug("start_thread(regs=0x%p, entry=0x%lx, start_stack=0x%lx)\n", |
998 | regs, start_addr, current->mm->start_stack); | 999 | regs, start_addr, current->mm->start_stack); |
999 | start_thread(regs, start_addr, current->mm->start_stack); | 1000 | start_thread(regs, start_addr, current->mm->start_stack); |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 562c3e633403..578181cd96b5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -458,7 +458,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
458 | break; | 458 | break; |
459 | 459 | ||
460 | rcu_read_lock(); | 460 | rcu_read_lock(); |
461 | page = radix_tree_lookup(&mapping->page_tree, pg_index); | 461 | page = radix_tree_lookup(&mapping->i_pages, pg_index); |
462 | rcu_read_unlock(); | 462 | rcu_read_unlock(); |
463 | if (page && !radix_tree_exceptional_entry(page)) { | 463 | if (page && !radix_tree_exceptional_entry(page)) { |
464 | misses++; | 464 | misses++; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 47a8fe9d22e8..cf87976e389d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -3963,11 +3963,11 @@ retry: | |||
3963 | 3963 | ||
3964 | done_index = page->index; | 3964 | done_index = page->index; |
3965 | /* | 3965 | /* |
3966 | * At this point we hold neither mapping->tree_lock nor | 3966 | * At this point we hold neither the i_pages lock nor |
3967 | * lock on the page itself: the page may be truncated or | 3967 | * the page lock: the page may be truncated or |
3968 | * invalidated (changing page->mapping to NULL), or even | 3968 | * invalidated (changing page->mapping to NULL), |
3969 | * swizzled back from swapper_space to tmpfs file | 3969 | * or even swizzled back from swapper_space to |
3970 | * mapping | 3970 | * tmpfs file mapping |
3971 | */ | 3971 | */ |
3972 | if (!trylock_page(page)) { | 3972 | if (!trylock_page(page)) { |
3973 | flush_write_bio(epd); | 3973 | flush_write_bio(epd); |
@@ -5174,13 +5174,13 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb) | |||
5174 | WARN_ON(!PagePrivate(page)); | 5174 | WARN_ON(!PagePrivate(page)); |
5175 | 5175 | ||
5176 | clear_page_dirty_for_io(page); | 5176 | clear_page_dirty_for_io(page); |
5177 | spin_lock_irq(&page->mapping->tree_lock); | 5177 | xa_lock_irq(&page->mapping->i_pages); |
5178 | if (!PageDirty(page)) { | 5178 | if (!PageDirty(page)) { |
5179 | radix_tree_tag_clear(&page->mapping->page_tree, | 5179 | radix_tree_tag_clear(&page->mapping->i_pages, |
5180 | page_index(page), | 5180 | page_index(page), |
5181 | PAGECACHE_TAG_DIRTY); | 5181 | PAGECACHE_TAG_DIRTY); |
5182 | } | 5182 | } |
5183 | spin_unlock_irq(&page->mapping->tree_lock); | 5183 | xa_unlock_irq(&page->mapping->i_pages); |
5184 | ClearPageError(page); | 5184 | ClearPageError(page); |
5185 | unlock_page(page); | 5185 | unlock_page(page); |
5186 | } | 5186 | } |
diff --git a/fs/buffer.c b/fs/buffer.c index ec5dd39071e6..f3491074b035 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -185,10 +185,9 @@ EXPORT_SYMBOL(end_buffer_write_sync); | |||
185 | * we get exclusion from try_to_free_buffers with the blockdev mapping's | 185 | * we get exclusion from try_to_free_buffers with the blockdev mapping's |
186 | * private_lock. | 186 | * private_lock. |
187 | * | 187 | * |
188 | * Hack idea: for the blockdev mapping, i_bufferlist_lock contention | 188 | * Hack idea: for the blockdev mapping, private_lock contention |
189 | * may be quite high. This code could TryLock the page, and if that | 189 | * may be quite high. This code could TryLock the page, and if that |
190 | * succeeds, there is no need to take private_lock. (But if | 190 | * succeeds, there is no need to take private_lock. |
191 | * private_lock is contended then so is mapping->tree_lock). | ||
192 | */ | 191 | */ |
193 | static struct buffer_head * | 192 | static struct buffer_head * |
194 | __find_get_block_slow(struct block_device *bdev, sector_t block) | 193 | __find_get_block_slow(struct block_device *bdev, sector_t block) |
@@ -594,20 +593,21 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); | |||
594 | * | 593 | * |
595 | * The caller must hold lock_page_memcg(). | 594 | * The caller must hold lock_page_memcg(). |
596 | */ | 595 | */ |
597 | static void __set_page_dirty(struct page *page, struct address_space *mapping, | 596 | void __set_page_dirty(struct page *page, struct address_space *mapping, |
598 | int warn) | 597 | int warn) |
599 | { | 598 | { |
600 | unsigned long flags; | 599 | unsigned long flags; |
601 | 600 | ||
602 | spin_lock_irqsave(&mapping->tree_lock, flags); | 601 | xa_lock_irqsave(&mapping->i_pages, flags); |
603 | if (page->mapping) { /* Race with truncate? */ | 602 | if (page->mapping) { /* Race with truncate? */ |
604 | WARN_ON_ONCE(warn && !PageUptodate(page)); | 603 | WARN_ON_ONCE(warn && !PageUptodate(page)); |
605 | account_page_dirtied(page, mapping); | 604 | account_page_dirtied(page, mapping); |
606 | radix_tree_tag_set(&mapping->page_tree, | 605 | radix_tree_tag_set(&mapping->i_pages, |
607 | page_index(page), PAGECACHE_TAG_DIRTY); | 606 | page_index(page), PAGECACHE_TAG_DIRTY); |
608 | } | 607 | } |
609 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 608 | xa_unlock_irqrestore(&mapping->i_pages, flags); |
610 | } | 609 | } |
610 | EXPORT_SYMBOL_GPL(__set_page_dirty); | ||
611 | 611 | ||
612 | /* | 612 | /* |
613 | * Add a page to the dirty page list. | 613 | * Add a page to the dirty page list. |
@@ -1095,7 +1095,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, | |||
1095 | * inode list. | 1095 | * inode list. |
1096 | * | 1096 | * |
1097 | * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, | 1097 | * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, |
1098 | * mapping->tree_lock and mapping->host->i_lock. | 1098 | * i_pages lock and mapping->host->i_lock. |
1099 | */ | 1099 | */ |
1100 | void mark_buffer_dirty(struct buffer_head *bh) | 1100 | void mark_buffer_dirty(struct buffer_head *bh) |
1101 | { | 1101 | { |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7cee97b93a61..4bcd4e838b47 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -1987,11 +1987,10 @@ wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages, | |||
1987 | for (i = 0; i < found_pages; i++) { | 1987 | for (i = 0; i < found_pages; i++) { |
1988 | page = wdata->pages[i]; | 1988 | page = wdata->pages[i]; |
1989 | /* | 1989 | /* |
1990 | * At this point we hold neither mapping->tree_lock nor | 1990 | * At this point we hold neither the i_pages lock nor the |
1991 | * lock on the page itself: the page may be truncated or | 1991 | * page lock: the page may be truncated or invalidated |
1992 | * invalidated (changing page->mapping to NULL), or even | 1992 | * (changing page->mapping to NULL), or even swizzled |
1993 | * swizzled back from swapper_space to tmpfs file | 1993 | * back from swapper_space to tmpfs file mapping |
1994 | * mapping | ||
1995 | */ | 1994 | */ |
1996 | 1995 | ||
1997 | if (nr_pages == 0) | 1996 | if (nr_pages == 0) |
@@ -158,11 +158,9 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mo | |||
158 | } | 158 | } |
159 | 159 | ||
160 | /* | 160 | /* |
161 | * We do not necessarily hold the mapping->tree_lock when we call this | 161 | * @entry may no longer be the entry at the index in the mapping. |
162 | * function so it is possible that 'entry' is no longer a valid item in the | 162 | * The important information it's conveying is whether the entry at |
163 | * radix tree. This is okay because all we really need to do is to find the | 163 | * this index used to be a PMD entry. |
164 | * correct waitqueue where tasks might be waiting for that old 'entry' and | ||
165 | * wake them. | ||
166 | */ | 164 | */ |
167 | static void dax_wake_mapping_entry_waiter(struct address_space *mapping, | 165 | static void dax_wake_mapping_entry_waiter(struct address_space *mapping, |
168 | pgoff_t index, void *entry, bool wake_all) | 166 | pgoff_t index, void *entry, bool wake_all) |
@@ -174,7 +172,7 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping, | |||
174 | 172 | ||
175 | /* | 173 | /* |
176 | * Checking for locked entry and prepare_to_wait_exclusive() happens | 174 | * Checking for locked entry and prepare_to_wait_exclusive() happens |
177 | * under mapping->tree_lock, ditto for entry handling in our callers. | 175 | * under the i_pages lock, ditto for entry handling in our callers. |
178 | * So at this point all tasks that could have seen our entry locked | 176 | * So at this point all tasks that could have seen our entry locked |
179 | * must be in the waitqueue and the following check will see them. | 177 | * must be in the waitqueue and the following check will see them. |
180 | */ | 178 | */ |
@@ -183,41 +181,39 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping, | |||
183 | } | 181 | } |
184 | 182 | ||
185 | /* | 183 | /* |
186 | * Check whether the given slot is locked. The function must be called with | 184 | * Check whether the given slot is locked. Must be called with the i_pages |
187 | * mapping->tree_lock held | 185 | * lock held. |
188 | */ | 186 | */ |
189 | static inline int slot_locked(struct address_space *mapping, void **slot) | 187 | static inline int slot_locked(struct address_space *mapping, void **slot) |
190 | { | 188 | { |
191 | unsigned long entry = (unsigned long) | 189 | unsigned long entry = (unsigned long) |
192 | radix_tree_deref_slot_protected(slot, &mapping->tree_lock); | 190 | radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); |
193 | return entry & RADIX_DAX_ENTRY_LOCK; | 191 | return entry & RADIX_DAX_ENTRY_LOCK; |
194 | } | 192 | } |
195 | 193 | ||
196 | /* | 194 | /* |
197 | * Mark the given slot is locked. The function must be called with | 195 | * Mark the given slot as locked. Must be called with the i_pages lock held. |
198 | * mapping->tree_lock held | ||
199 | */ | 196 | */ |
200 | static inline void *lock_slot(struct address_space *mapping, void **slot) | 197 | static inline void *lock_slot(struct address_space *mapping, void **slot) |
201 | { | 198 | { |
202 | unsigned long entry = (unsigned long) | 199 | unsigned long entry = (unsigned long) |
203 | radix_tree_deref_slot_protected(slot, &mapping->tree_lock); | 200 | radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); |
204 | 201 | ||
205 | entry |= RADIX_DAX_ENTRY_LOCK; | 202 | entry |= RADIX_DAX_ENTRY_LOCK; |
206 | radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry); | 203 | radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); |
207 | return (void *)entry; | 204 | return (void *)entry; |
208 | } | 205 | } |
209 | 206 | ||
210 | /* | 207 | /* |
211 | * Mark the given slot is unlocked. The function must be called with | 208 | * Mark the given slot as unlocked. Must be called with the i_pages lock held. |
212 | * mapping->tree_lock held | ||
213 | */ | 209 | */ |
214 | static inline void *unlock_slot(struct address_space *mapping, void **slot) | 210 | static inline void *unlock_slot(struct address_space *mapping, void **slot) |
215 | { | 211 | { |
216 | unsigned long entry = (unsigned long) | 212 | unsigned long entry = (unsigned long) |
217 | radix_tree_deref_slot_protected(slot, &mapping->tree_lock); | 213 | radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock); |
218 | 214 | ||
219 | entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; | 215 | entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; |
220 | radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry); | 216 | radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry); |
221 | return (void *)entry; | 217 | return (void *)entry; |
222 | } | 218 | } |
223 | 219 | ||
@@ -228,7 +224,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot) | |||
228 | * put_locked_mapping_entry() when he locked the entry and now wants to | 224 | * put_locked_mapping_entry() when he locked the entry and now wants to |
229 | * unlock it. | 225 | * unlock it. |
230 | * | 226 | * |
231 | * The function must be called with mapping->tree_lock held. | 227 | * Must be called with the i_pages lock held. |
232 | */ | 228 | */ |
233 | static void *get_unlocked_mapping_entry(struct address_space *mapping, | 229 | static void *get_unlocked_mapping_entry(struct address_space *mapping, |
234 | pgoff_t index, void ***slotp) | 230 | pgoff_t index, void ***slotp) |
@@ -241,7 +237,7 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, | |||
241 | ewait.wait.func = wake_exceptional_entry_func; | 237 | ewait.wait.func = wake_exceptional_entry_func; |
242 | 238 | ||
243 | for (;;) { | 239 | for (;;) { |
244 | entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, | 240 | entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, |
245 | &slot); | 241 | &slot); |
246 | if (!entry || | 242 | if (!entry || |
247 | WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) || | 243 | WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) || |
@@ -254,10 +250,10 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping, | |||
254 | wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key); | 250 | wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key); |
255 | prepare_to_wait_exclusive(wq, &ewait.wait, | 251 | prepare_to_wait_exclusive(wq, &ewait.wait, |
256 | TASK_UNINTERRUPTIBLE); | 252 | TASK_UNINTERRUPTIBLE); |
257 | spin_unlock_irq(&mapping->tree_lock); | 253 | xa_unlock_irq(&mapping->i_pages); |
258 | schedule(); | 254 | schedule(); |
259 | finish_wait(wq, &ewait.wait); | 255 | finish_wait(wq, &ewait.wait); |
260 | spin_lock_irq(&mapping->tree_lock); | 256 | xa_lock_irq(&mapping->i_pages); |
261 | } | 257 | } |
262 | } | 258 | } |
263 | 259 | ||
@@ -266,15 +262,15 @@ static void dax_unlock_mapping_entry(struct address_space *mapping, | |||
266 | { | 262 | { |
267 | void *entry, **slot; | 263 | void *entry, **slot; |
268 | 264 | ||
269 | spin_lock_irq(&mapping->tree_lock); | 265 | xa_lock_irq(&mapping->i_pages); |
270 | entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot); | 266 | entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); |
271 | if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) || | 267 | if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) || |
272 | !slot_locked(mapping, slot))) { | 268 | !slot_locked(mapping, slot))) { |
273 | spin_unlock_irq(&mapping->tree_lock); | 269 | xa_unlock_irq(&mapping->i_pages); |
274 | return; | 270 | return; |
275 | } | 271 | } |
276 | unlock_slot(mapping, slot); | 272 | unlock_slot(mapping, slot); |
277 | spin_unlock_irq(&mapping->tree_lock); | 273 | xa_unlock_irq(&mapping->i_pages); |
278 | dax_wake_mapping_entry_waiter(mapping, index, entry, false); | 274 | dax_wake_mapping_entry_waiter(mapping, index, entry, false); |
279 | } | 275 | } |
280 | 276 | ||
@@ -388,7 +384,7 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index, | |||
388 | void *entry, **slot; | 384 | void *entry, **slot; |
389 | 385 | ||
390 | restart: | 386 | restart: |
391 | spin_lock_irq(&mapping->tree_lock); | 387 | xa_lock_irq(&mapping->i_pages); |
392 | entry = get_unlocked_mapping_entry(mapping, index, &slot); | 388 | entry = get_unlocked_mapping_entry(mapping, index, &slot); |
393 | 389 | ||
394 | if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) { | 390 | if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) { |
@@ -420,12 +416,12 @@ restart: | |||
420 | if (pmd_downgrade) { | 416 | if (pmd_downgrade) { |
421 | /* | 417 | /* |
422 | * Make sure 'entry' remains valid while we drop | 418 | * Make sure 'entry' remains valid while we drop |
423 | * mapping->tree_lock. | 419 | * the i_pages lock. |
424 | */ | 420 | */ |
425 | entry = lock_slot(mapping, slot); | 421 | entry = lock_slot(mapping, slot); |
426 | } | 422 | } |
427 | 423 | ||
428 | spin_unlock_irq(&mapping->tree_lock); | 424 | xa_unlock_irq(&mapping->i_pages); |
429 | /* | 425 | /* |
430 | * Besides huge zero pages the only other thing that gets | 426 | * Besides huge zero pages the only other thing that gets |
431 | * downgraded are empty entries which don't need to be | 427 | * downgraded are empty entries which don't need to be |
@@ -442,27 +438,27 @@ restart: | |||
442 | put_locked_mapping_entry(mapping, index); | 438 | put_locked_mapping_entry(mapping, index); |
443 | return ERR_PTR(err); | 439 | return ERR_PTR(err); |
444 | } | 440 | } |
445 | spin_lock_irq(&mapping->tree_lock); | 441 | xa_lock_irq(&mapping->i_pages); |
446 | 442 | ||
447 | if (!entry) { | 443 | if (!entry) { |
448 | /* | 444 | /* |
449 | * We needed to drop the page_tree lock while calling | 445 | * We needed to drop the i_pages lock while calling |
450 | * radix_tree_preload() and we didn't have an entry to | 446 | * radix_tree_preload() and we didn't have an entry to |
451 | * lock. See if another thread inserted an entry at | 447 | * lock. See if another thread inserted an entry at |
452 | * our index during this time. | 448 | * our index during this time. |
453 | */ | 449 | */ |
454 | entry = __radix_tree_lookup(&mapping->page_tree, index, | 450 | entry = __radix_tree_lookup(&mapping->i_pages, index, |
455 | NULL, &slot); | 451 | NULL, &slot); |
456 | if (entry) { | 452 | if (entry) { |
457 | radix_tree_preload_end(); | 453 | radix_tree_preload_end(); |
458 | spin_unlock_irq(&mapping->tree_lock); | 454 | xa_unlock_irq(&mapping->i_pages); |
459 | goto restart; | 455 | goto restart; |
460 | } | 456 | } |
461 | } | 457 | } |
462 | 458 | ||
463 | if (pmd_downgrade) { | 459 | if (pmd_downgrade) { |
464 | dax_disassociate_entry(entry, mapping, false); | 460 | dax_disassociate_entry(entry, mapping, false); |
465 | radix_tree_delete(&mapping->page_tree, index); | 461 | radix_tree_delete(&mapping->i_pages, index); |
466 | mapping->nrexceptional--; | 462 | mapping->nrexceptional--; |
467 | dax_wake_mapping_entry_waiter(mapping, index, entry, | 463 | dax_wake_mapping_entry_waiter(mapping, index, entry, |
468 | true); | 464 | true); |
@@ -470,11 +466,11 @@ restart: | |||
470 | 466 | ||
471 | entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY); | 467 | entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY); |
472 | 468 | ||
473 | err = __radix_tree_insert(&mapping->page_tree, index, | 469 | err = __radix_tree_insert(&mapping->i_pages, index, |
474 | dax_radix_order(entry), entry); | 470 | dax_radix_order(entry), entry); |
475 | radix_tree_preload_end(); | 471 | radix_tree_preload_end(); |
476 | if (err) { | 472 | if (err) { |
477 | spin_unlock_irq(&mapping->tree_lock); | 473 | xa_unlock_irq(&mapping->i_pages); |
478 | /* | 474 | /* |
479 | * Our insertion of a DAX entry failed, most likely | 475 | * Our insertion of a DAX entry failed, most likely |
480 | * because we were inserting a PMD entry and it | 476 | * because we were inserting a PMD entry and it |
@@ -487,12 +483,12 @@ restart: | |||
487 | } | 483 | } |
488 | /* Good, we have inserted empty locked entry into the tree. */ | 484 | /* Good, we have inserted empty locked entry into the tree. */ |
489 | mapping->nrexceptional++; | 485 | mapping->nrexceptional++; |
490 | spin_unlock_irq(&mapping->tree_lock); | 486 | xa_unlock_irq(&mapping->i_pages); |
491 | return entry; | 487 | return entry; |
492 | } | 488 | } |
493 | entry = lock_slot(mapping, slot); | 489 | entry = lock_slot(mapping, slot); |
494 | out_unlock: | 490 | out_unlock: |
495 | spin_unlock_irq(&mapping->tree_lock); | 491 | xa_unlock_irq(&mapping->i_pages); |
496 | return entry; | 492 | return entry; |
497 | } | 493 | } |
498 | 494 | ||
@@ -501,23 +497,23 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping, | |||
501 | { | 497 | { |
502 | int ret = 0; | 498 | int ret = 0; |
503 | void *entry; | 499 | void *entry; |
504 | struct radix_tree_root *page_tree = &mapping->page_tree; | 500 | struct radix_tree_root *pages = &mapping->i_pages; |
505 | 501 | ||
506 | spin_lock_irq(&mapping->tree_lock); | 502 | xa_lock_irq(pages); |
507 | entry = get_unlocked_mapping_entry(mapping, index, NULL); | 503 | entry = get_unlocked_mapping_entry(mapping, index, NULL); |
508 | if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry))) | 504 | if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry))) |
509 | goto out; | 505 | goto out; |
510 | if (!trunc && | 506 | if (!trunc && |
511 | (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || | 507 | (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) || |
512 | radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))) | 508 | radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))) |
513 | goto out; | 509 | goto out; |
514 | dax_disassociate_entry(entry, mapping, trunc); | 510 | dax_disassociate_entry(entry, mapping, trunc); |
515 | radix_tree_delete(page_tree, index); | 511 | radix_tree_delete(pages, index); |
516 | mapping->nrexceptional--; | 512 | mapping->nrexceptional--; |
517 | ret = 1; | 513 | ret = 1; |
518 | out: | 514 | out: |
519 | put_unlocked_mapping_entry(mapping, index, entry); | 515 | put_unlocked_mapping_entry(mapping, index, entry); |
520 | spin_unlock_irq(&mapping->tree_lock); | 516 | xa_unlock_irq(pages); |
521 | return ret; | 517 | return ret; |
522 | } | 518 | } |
523 | /* | 519 | /* |
@@ -587,7 +583,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, | |||
587 | void *entry, pfn_t pfn_t, | 583 | void *entry, pfn_t pfn_t, |
588 | unsigned long flags, bool dirty) | 584 | unsigned long flags, bool dirty) |
589 | { | 585 | { |
590 | struct radix_tree_root *page_tree = &mapping->page_tree; | 586 | struct radix_tree_root *pages = &mapping->i_pages; |
591 | unsigned long pfn = pfn_t_to_pfn(pfn_t); | 587 | unsigned long pfn = pfn_t_to_pfn(pfn_t); |
592 | pgoff_t index = vmf->pgoff; | 588 | pgoff_t index = vmf->pgoff; |
593 | void *new_entry; | 589 | void *new_entry; |
@@ -604,7 +600,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, | |||
604 | unmap_mapping_pages(mapping, vmf->pgoff, 1, false); | 600 | unmap_mapping_pages(mapping, vmf->pgoff, 1, false); |
605 | } | 601 | } |
606 | 602 | ||
607 | spin_lock_irq(&mapping->tree_lock); | 603 | xa_lock_irq(pages); |
608 | new_entry = dax_radix_locked_entry(pfn, flags); | 604 | new_entry = dax_radix_locked_entry(pfn, flags); |
609 | if (dax_entry_size(entry) != dax_entry_size(new_entry)) { | 605 | if (dax_entry_size(entry) != dax_entry_size(new_entry)) { |
610 | dax_disassociate_entry(entry, mapping, false); | 606 | dax_disassociate_entry(entry, mapping, false); |
@@ -624,17 +620,17 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, | |||
624 | void **slot; | 620 | void **slot; |
625 | void *ret; | 621 | void *ret; |
626 | 622 | ||
627 | ret = __radix_tree_lookup(page_tree, index, &node, &slot); | 623 | ret = __radix_tree_lookup(pages, index, &node, &slot); |
628 | WARN_ON_ONCE(ret != entry); | 624 | WARN_ON_ONCE(ret != entry); |
629 | __radix_tree_replace(page_tree, node, slot, | 625 | __radix_tree_replace(pages, node, slot, |
630 | new_entry, NULL); | 626 | new_entry, NULL); |
631 | entry = new_entry; | 627 | entry = new_entry; |
632 | } | 628 | } |
633 | 629 | ||
634 | if (dirty) | 630 | if (dirty) |
635 | radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY); | 631 | radix_tree_tag_set(pages, index, PAGECACHE_TAG_DIRTY); |
636 | 632 | ||
637 | spin_unlock_irq(&mapping->tree_lock); | 633 | xa_unlock_irq(pages); |
638 | return entry; | 634 | return entry; |
639 | } | 635 | } |
640 | 636 | ||
@@ -723,7 +719,7 @@ unlock_pte: | |||
723 | static int dax_writeback_one(struct dax_device *dax_dev, | 719 | static int dax_writeback_one(struct dax_device *dax_dev, |
724 | struct address_space *mapping, pgoff_t index, void *entry) | 720 | struct address_space *mapping, pgoff_t index, void *entry) |
725 | { | 721 | { |
726 | struct radix_tree_root *page_tree = &mapping->page_tree; | 722 | struct radix_tree_root *pages = &mapping->i_pages; |
727 | void *entry2, **slot; | 723 | void *entry2, **slot; |
728 | unsigned long pfn; | 724 | unsigned long pfn; |
729 | long ret = 0; | 725 | long ret = 0; |
@@ -736,7 +732,7 @@ static int dax_writeback_one(struct dax_device *dax_dev, | |||
736 | if (WARN_ON(!radix_tree_exceptional_entry(entry))) | 732 | if (WARN_ON(!radix_tree_exceptional_entry(entry))) |
737 | return -EIO; | 733 | return -EIO; |
738 | 734 | ||
739 | spin_lock_irq(&mapping->tree_lock); | 735 | xa_lock_irq(pages); |
740 | entry2 = get_unlocked_mapping_entry(mapping, index, &slot); | 736 | entry2 = get_unlocked_mapping_entry(mapping, index, &slot); |
741 | /* Entry got punched out / reallocated? */ | 737 | /* Entry got punched out / reallocated? */ |
742 | if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2))) | 738 | if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2))) |
@@ -755,7 +751,7 @@ static int dax_writeback_one(struct dax_device *dax_dev, | |||
755 | } | 751 | } |
756 | 752 | ||
757 | /* Another fsync thread may have already written back this entry */ | 753 | /* Another fsync thread may have already written back this entry */ |
758 | if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)) | 754 | if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE)) |
759 | goto put_unlocked; | 755 | goto put_unlocked; |
760 | /* Lock the entry to serialize with page faults */ | 756 | /* Lock the entry to serialize with page faults */ |
761 | entry = lock_slot(mapping, slot); | 757 | entry = lock_slot(mapping, slot); |
@@ -763,11 +759,11 @@ static int dax_writeback_one(struct dax_device *dax_dev, | |||
763 | * We can clear the tag now but we have to be careful so that concurrent | 759 | * We can clear the tag now but we have to be careful so that concurrent |
764 | * dax_writeback_one() calls for the same index cannot finish before we | 760 | * dax_writeback_one() calls for the same index cannot finish before we |
765 | * actually flush the caches. This is achieved as the calls will look | 761 | * actually flush the caches. This is achieved as the calls will look |
766 | * at the entry only under tree_lock and once they do that they will | 762 | * at the entry only under the i_pages lock and once they do that |
767 | * see the entry locked and wait for it to unlock. | 763 | * they will see the entry locked and wait for it to unlock. |
768 | */ | 764 | */ |
769 | radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE); | 765 | radix_tree_tag_clear(pages, index, PAGECACHE_TAG_TOWRITE); |
770 | spin_unlock_irq(&mapping->tree_lock); | 766 | xa_unlock_irq(pages); |
771 | 767 | ||
772 | /* | 768 | /* |
773 | * Even if dax_writeback_mapping_range() was given a wbc->range_start | 769 | * Even if dax_writeback_mapping_range() was given a wbc->range_start |
@@ -787,16 +783,16 @@ static int dax_writeback_one(struct dax_device *dax_dev, | |||
787 | * the pfn mappings are writeprotected and fault waits for mapping | 783 | * the pfn mappings are writeprotected and fault waits for mapping |
788 | * entry lock. | 784 | * entry lock. |
789 | */ | 785 | */ |
790 | spin_lock_irq(&mapping->tree_lock); | 786 | xa_lock_irq(pages); |
791 | radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY); | 787 | radix_tree_tag_clear(pages, index, PAGECACHE_TAG_DIRTY); |
792 | spin_unlock_irq(&mapping->tree_lock); | 788 | xa_unlock_irq(pages); |
793 | trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT); | 789 | trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT); |
794 | put_locked_mapping_entry(mapping, index); | 790 | put_locked_mapping_entry(mapping, index); |
795 | return ret; | 791 | return ret; |
796 | 792 | ||
797 | put_unlocked: | 793 | put_unlocked: |
798 | put_unlocked_mapping_entry(mapping, index, entry2); | 794 | put_unlocked_mapping_entry(mapping, index, entry2); |
799 | spin_unlock_irq(&mapping->tree_lock); | 795 | xa_unlock_irq(pages); |
800 | return ret; | 796 | return ret; |
801 | } | 797 | } |
802 | 798 | ||
@@ -1566,21 +1562,21 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf, | |||
1566 | pgoff_t index = vmf->pgoff; | 1562 | pgoff_t index = vmf->pgoff; |
1567 | int vmf_ret, error; | 1563 | int vmf_ret, error; |
1568 | 1564 | ||
1569 | spin_lock_irq(&mapping->tree_lock); | 1565 | xa_lock_irq(&mapping->i_pages); |
1570 | entry = get_unlocked_mapping_entry(mapping, index, &slot); | 1566 | entry = get_unlocked_mapping_entry(mapping, index, &slot); |
1571 | /* Did we race with someone splitting entry or so? */ | 1567 | /* Did we race with someone splitting entry or so? */ |
1572 | if (!entry || | 1568 | if (!entry || |
1573 | (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) || | 1569 | (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) || |
1574 | (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) { | 1570 | (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) { |
1575 | put_unlocked_mapping_entry(mapping, index, entry); | 1571 | put_unlocked_mapping_entry(mapping, index, entry); |
1576 | spin_unlock_irq(&mapping->tree_lock); | 1572 | xa_unlock_irq(&mapping->i_pages); |
1577 | trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, | 1573 | trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, |
1578 | VM_FAULT_NOPAGE); | 1574 | VM_FAULT_NOPAGE); |
1579 | return VM_FAULT_NOPAGE; | 1575 | return VM_FAULT_NOPAGE; |
1580 | } | 1576 | } |
1581 | radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY); | 1577 | radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY); |
1582 | entry = lock_slot(mapping, slot); | 1578 | entry = lock_slot(mapping, slot); |
1583 | spin_unlock_irq(&mapping->tree_lock); | 1579 | xa_unlock_irq(&mapping->i_pages); |
1584 | switch (pe_size) { | 1580 | switch (pe_size) { |
1585 | case PE_SIZE_PTE: | 1581 | case PE_SIZE_PTE: |
1586 | error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); | 1582 | error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); |
diff --git a/fs/dcache.c b/fs/dcache.c index 593079176123..86d2de63461e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -257,11 +257,25 @@ static void __d_free(struct rcu_head *head) | |||
257 | kmem_cache_free(dentry_cache, dentry); | 257 | kmem_cache_free(dentry_cache, dentry); |
258 | } | 258 | } |
259 | 259 | ||
260 | static void __d_free_external_name(struct rcu_head *head) | ||
261 | { | ||
262 | struct external_name *name = container_of(head, struct external_name, | ||
263 | u.head); | ||
264 | |||
265 | mod_node_page_state(page_pgdat(virt_to_page(name)), | ||
266 | NR_INDIRECTLY_RECLAIMABLE_BYTES, | ||
267 | -ksize(name)); | ||
268 | |||
269 | kfree(name); | ||
270 | } | ||
271 | |||
260 | static void __d_free_external(struct rcu_head *head) | 272 | static void __d_free_external(struct rcu_head *head) |
261 | { | 273 | { |
262 | struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); | 274 | struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); |
263 | kfree(external_name(dentry)); | 275 | |
264 | kmem_cache_free(dentry_cache, dentry); | 276 | __d_free_external_name(&external_name(dentry)->u.head); |
277 | |||
278 | kmem_cache_free(dentry_cache, dentry); | ||
265 | } | 279 | } |
266 | 280 | ||
267 | static inline int dname_external(const struct dentry *dentry) | 281 | static inline int dname_external(const struct dentry *dentry) |
@@ -291,7 +305,7 @@ void release_dentry_name_snapshot(struct name_snapshot *name) | |||
291 | struct external_name *p; | 305 | struct external_name *p; |
292 | p = container_of(name->name, struct external_name, name[0]); | 306 | p = container_of(name->name, struct external_name, name[0]); |
293 | if (unlikely(atomic_dec_and_test(&p->u.count))) | 307 | if (unlikely(atomic_dec_and_test(&p->u.count))) |
294 | kfree_rcu(p, u.head); | 308 | call_rcu(&p->u.head, __d_free_external_name); |
295 | } | 309 | } |
296 | } | 310 | } |
297 | EXPORT_SYMBOL(release_dentry_name_snapshot); | 311 | EXPORT_SYMBOL(release_dentry_name_snapshot); |
@@ -1038,6 +1052,8 @@ static void shrink_dentry_list(struct list_head *list) | |||
1038 | while (!list_empty(list)) { | 1052 | while (!list_empty(list)) { |
1039 | struct dentry *dentry, *parent; | 1053 | struct dentry *dentry, *parent; |
1040 | 1054 | ||
1055 | cond_resched(); | ||
1056 | |||
1041 | dentry = list_entry(list->prev, struct dentry, d_lru); | 1057 | dentry = list_entry(list->prev, struct dentry, d_lru); |
1042 | spin_lock(&dentry->d_lock); | 1058 | spin_lock(&dentry->d_lock); |
1043 | rcu_read_lock(); | 1059 | rcu_read_lock(); |
@@ -1191,7 +1207,6 @@ void shrink_dcache_sb(struct super_block *sb) | |||
1191 | 1207 | ||
1192 | this_cpu_sub(nr_dentry_unused, freed); | 1208 | this_cpu_sub(nr_dentry_unused, freed); |
1193 | shrink_dentry_list(&dispose); | 1209 | shrink_dentry_list(&dispose); |
1194 | cond_resched(); | ||
1195 | } while (list_lru_count(&sb->s_dentry_lru) > 0); | 1210 | } while (list_lru_count(&sb->s_dentry_lru) > 0); |
1196 | } | 1211 | } |
1197 | EXPORT_SYMBOL(shrink_dcache_sb); | 1212 | EXPORT_SYMBOL(shrink_dcache_sb); |
@@ -1473,7 +1488,6 @@ void shrink_dcache_parent(struct dentry *parent) | |||
1473 | break; | 1488 | break; |
1474 | 1489 | ||
1475 | shrink_dentry_list(&data.dispose); | 1490 | shrink_dentry_list(&data.dispose); |
1476 | cond_resched(); | ||
1477 | } | 1491 | } |
1478 | } | 1492 | } |
1479 | EXPORT_SYMBOL(shrink_dcache_parent); | 1493 | EXPORT_SYMBOL(shrink_dcache_parent); |
@@ -1600,7 +1614,6 @@ void d_invalidate(struct dentry *dentry) | |||
1600 | detach_mounts(data.mountpoint); | 1614 | detach_mounts(data.mountpoint); |
1601 | dput(data.mountpoint); | 1615 | dput(data.mountpoint); |
1602 | } | 1616 | } |
1603 | cond_resched(); | ||
1604 | } | 1617 | } |
1605 | } | 1618 | } |
1606 | EXPORT_SYMBOL(d_invalidate); | 1619 | EXPORT_SYMBOL(d_invalidate); |
@@ -1617,6 +1630,7 @@ EXPORT_SYMBOL(d_invalidate); | |||
1617 | 1630 | ||
1618 | struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) | 1631 | struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) |
1619 | { | 1632 | { |
1633 | struct external_name *ext = NULL; | ||
1620 | struct dentry *dentry; | 1634 | struct dentry *dentry; |
1621 | char *dname; | 1635 | char *dname; |
1622 | int err; | 1636 | int err; |
@@ -1637,14 +1651,14 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) | |||
1637 | dname = dentry->d_iname; | 1651 | dname = dentry->d_iname; |
1638 | } else if (name->len > DNAME_INLINE_LEN-1) { | 1652 | } else if (name->len > DNAME_INLINE_LEN-1) { |
1639 | size_t size = offsetof(struct external_name, name[1]); | 1653 | size_t size = offsetof(struct external_name, name[1]); |
1640 | struct external_name *p = kmalloc(size + name->len, | 1654 | |
1641 | GFP_KERNEL_ACCOUNT); | 1655 | ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT); |
1642 | if (!p) { | 1656 | if (!ext) { |
1643 | kmem_cache_free(dentry_cache, dentry); | 1657 | kmem_cache_free(dentry_cache, dentry); |
1644 | return NULL; | 1658 | return NULL; |
1645 | } | 1659 | } |
1646 | atomic_set(&p->u.count, 1); | 1660 | atomic_set(&ext->u.count, 1); |
1647 | dname = p->name; | 1661 | dname = ext->name; |
1648 | } else { | 1662 | } else { |
1649 | dname = dentry->d_iname; | 1663 | dname = dentry->d_iname; |
1650 | } | 1664 | } |
@@ -1683,6 +1697,12 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) | |||
1683 | } | 1697 | } |
1684 | } | 1698 | } |
1685 | 1699 | ||
1700 | if (unlikely(ext)) { | ||
1701 | pg_data_t *pgdat = page_pgdat(virt_to_page(ext)); | ||
1702 | mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES, | ||
1703 | ksize(ext)); | ||
1704 | } | ||
1705 | |||
1686 | this_cpu_inc(nr_dentry); | 1706 | this_cpu_inc(nr_dentry); |
1687 | 1707 | ||
1688 | return dentry; | 1708 | return dentry; |
@@ -2770,7 +2790,7 @@ static void copy_name(struct dentry *dentry, struct dentry *target) | |||
2770 | dentry->d_name.hash_len = target->d_name.hash_len; | 2790 | dentry->d_name.hash_len = target->d_name.hash_len; |
2771 | } | 2791 | } |
2772 | if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) | 2792 | if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) |
2773 | kfree_rcu(old_name, u.head); | 2793 | call_rcu(&old_name->u.head, __d_free_external_name); |
2774 | } | 2794 | } |
2775 | 2795 | ||
2776 | /* | 2796 | /* |
@@ -257,7 +257,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, | |||
257 | * to work from. | 257 | * to work from. |
258 | */ | 258 | */ |
259 | limit = _STK_LIM / 4 * 3; | 259 | limit = _STK_LIM / 4 * 3; |
260 | limit = min(limit, rlimit(RLIMIT_STACK) / 4); | 260 | limit = min(limit, bprm->rlim_stack.rlim_cur / 4); |
261 | if (size > limit) | 261 | if (size > limit) |
262 | goto fail; | 262 | goto fail; |
263 | } | 263 | } |
@@ -411,6 +411,11 @@ static int bprm_mm_init(struct linux_binprm *bprm) | |||
411 | if (!mm) | 411 | if (!mm) |
412 | goto err; | 412 | goto err; |
413 | 413 | ||
414 | /* Save current stack limit for all calculations made during exec. */ | ||
415 | task_lock(current->group_leader); | ||
416 | bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK]; | ||
417 | task_unlock(current->group_leader); | ||
418 | |||
414 | err = __bprm_mm_init(bprm); | 419 | err = __bprm_mm_init(bprm); |
415 | if (err) | 420 | if (err) |
416 | goto err; | 421 | goto err; |
@@ -697,7 +702,7 @@ int setup_arg_pages(struct linux_binprm *bprm, | |||
697 | 702 | ||
698 | #ifdef CONFIG_STACK_GROWSUP | 703 | #ifdef CONFIG_STACK_GROWSUP |
699 | /* Limit stack size */ | 704 | /* Limit stack size */ |
700 | stack_base = rlimit_max(RLIMIT_STACK); | 705 | stack_base = bprm->rlim_stack.rlim_max; |
701 | if (stack_base > STACK_SIZE_MAX) | 706 | if (stack_base > STACK_SIZE_MAX) |
702 | stack_base = STACK_SIZE_MAX; | 707 | stack_base = STACK_SIZE_MAX; |
703 | 708 | ||
@@ -770,7 +775,7 @@ int setup_arg_pages(struct linux_binprm *bprm, | |||
770 | * Align this down to a page boundary as expand_stack | 775 | * Align this down to a page boundary as expand_stack |
771 | * will align it up. | 776 | * will align it up. |
772 | */ | 777 | */ |
773 | rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK; | 778 | rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK; |
774 | #ifdef CONFIG_STACK_GROWSUP | 779 | #ifdef CONFIG_STACK_GROWSUP |
775 | if (stack_size + stack_expand > rlim_stack) | 780 | if (stack_size + stack_expand > rlim_stack) |
776 | stack_base = vma->vm_start + rlim_stack; | 781 | stack_base = vma->vm_start + rlim_stack; |
@@ -1341,11 +1346,11 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
1341 | * RLIMIT_STACK, but after the point of no return to avoid | 1346 | * RLIMIT_STACK, but after the point of no return to avoid |
1342 | * needing to clean up the change on failure. | 1347 | * needing to clean up the change on failure. |
1343 | */ | 1348 | */ |
1344 | if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM) | 1349 | if (bprm->rlim_stack.rlim_cur > _STK_LIM) |
1345 | current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM; | 1350 | bprm->rlim_stack.rlim_cur = _STK_LIM; |
1346 | } | 1351 | } |
1347 | 1352 | ||
1348 | arch_pick_mmap_layout(current->mm); | 1353 | arch_pick_mmap_layout(current->mm, &bprm->rlim_stack); |
1349 | 1354 | ||
1350 | current->sas_ss_sp = current->sas_ss_size = 0; | 1355 | current->sas_ss_sp = current->sas_ss_size = 0; |
1351 | 1356 | ||
@@ -1378,6 +1383,16 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
1378 | } | 1383 | } |
1379 | EXPORT_SYMBOL(setup_new_exec); | 1384 | EXPORT_SYMBOL(setup_new_exec); |
1380 | 1385 | ||
1386 | /* Runs immediately before start_thread() takes over. */ | ||
1387 | void finalize_exec(struct linux_binprm *bprm) | ||
1388 | { | ||
1389 | /* Store any stack rlimit changes before starting thread. */ | ||
1390 | task_lock(current->group_leader); | ||
1391 | current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack; | ||
1392 | task_unlock(current->group_leader); | ||
1393 | } | ||
1394 | EXPORT_SYMBOL(finalize_exec); | ||
1395 | |||
1381 | /* | 1396 | /* |
1382 | * Prepare credentials and lock ->cred_guard_mutex. | 1397 | * Prepare credentials and lock ->cred_guard_mutex. |
1383 | * install_exec_creds() commits the new creds and drops the lock. | 1398 | * install_exec_creds() commits the new creds and drops the lock. |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index db50686f5096..02237d4d91f5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -2424,12 +2424,12 @@ void f2fs_set_page_dirty_nobuffers(struct page *page) | |||
2424 | SetPageDirty(page); | 2424 | SetPageDirty(page); |
2425 | spin_unlock(&mapping->private_lock); | 2425 | spin_unlock(&mapping->private_lock); |
2426 | 2426 | ||
2427 | spin_lock_irqsave(&mapping->tree_lock, flags); | 2427 | xa_lock_irqsave(&mapping->i_pages, flags); |
2428 | WARN_ON_ONCE(!PageUptodate(page)); | 2428 | WARN_ON_ONCE(!PageUptodate(page)); |
2429 | account_page_dirtied(page, mapping); | 2429 | account_page_dirtied(page, mapping); |
2430 | radix_tree_tag_set(&mapping->page_tree, | 2430 | radix_tree_tag_set(&mapping->i_pages, |
2431 | page_index(page), PAGECACHE_TAG_DIRTY); | 2431 | page_index(page), PAGECACHE_TAG_DIRTY); |
2432 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 2432 | xa_unlock_irqrestore(&mapping->i_pages, flags); |
2433 | unlock_page_memcg(page); | 2433 | unlock_page_memcg(page); |
2434 | 2434 | ||
2435 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 2435 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index fe661274ff10..8c9c2f31b253 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c | |||
@@ -732,10 +732,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
732 | 732 | ||
733 | if (bit_pos == NR_DENTRY_IN_BLOCK && | 733 | if (bit_pos == NR_DENTRY_IN_BLOCK && |
734 | !truncate_hole(dir, page->index, page->index + 1)) { | 734 | !truncate_hole(dir, page->index, page->index + 1)) { |
735 | spin_lock_irqsave(&mapping->tree_lock, flags); | 735 | xa_lock_irqsave(&mapping->i_pages, flags); |
736 | radix_tree_tag_clear(&mapping->page_tree, page_index(page), | 736 | radix_tree_tag_clear(&mapping->i_pages, page_index(page), |
737 | PAGECACHE_TAG_DIRTY); | 737 | PAGECACHE_TAG_DIRTY); |
738 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 738 | xa_unlock_irqrestore(&mapping->i_pages, flags); |
739 | 739 | ||
740 | clear_page_dirty_for_io(page); | 740 | clear_page_dirty_for_io(page); |
741 | ClearPagePrivate(page); | 741 | ClearPagePrivate(page); |
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index bfb7a4a3a929..9327411fd93b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c | |||
@@ -1015,7 +1015,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, | |||
1015 | unsigned int init_segno = segno; | 1015 | unsigned int init_segno = segno; |
1016 | struct gc_inode_list gc_list = { | 1016 | struct gc_inode_list gc_list = { |
1017 | .ilist = LIST_HEAD_INIT(gc_list.ilist), | 1017 | .ilist = LIST_HEAD_INIT(gc_list.ilist), |
1018 | .iroot = RADIX_TREE_INIT(GFP_NOFS), | 1018 | .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), |
1019 | }; | 1019 | }; |
1020 | 1020 | ||
1021 | trace_f2fs_gc_begin(sbi->sb, sync, background, | 1021 | trace_f2fs_gc_begin(sbi->sb, sync, background, |
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 3b77d6421218..265da200daa8 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c | |||
@@ -226,10 +226,10 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) | |||
226 | kunmap_atomic(src_addr); | 226 | kunmap_atomic(src_addr); |
227 | set_page_dirty(dn.inode_page); | 227 | set_page_dirty(dn.inode_page); |
228 | 228 | ||
229 | spin_lock_irqsave(&mapping->tree_lock, flags); | 229 | xa_lock_irqsave(&mapping->i_pages, flags); |
230 | radix_tree_tag_clear(&mapping->page_tree, page_index(page), | 230 | radix_tree_tag_clear(&mapping->i_pages, page_index(page), |
231 | PAGECACHE_TAG_DIRTY); | 231 | PAGECACHE_TAG_DIRTY); |
232 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 232 | xa_unlock_irqrestore(&mapping->i_pages, flags); |
233 | 233 | ||
234 | set_inode_flag(inode, FI_APPEND_WRITE); | 234 | set_inode_flag(inode, FI_APPEND_WRITE); |
235 | set_inode_flag(inode, FI_DATA_EXIST); | 235 | set_inode_flag(inode, FI_DATA_EXIST); |
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9a99243054ba..f202398e20ea 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -91,11 +91,11 @@ static void clear_node_page_dirty(struct page *page) | |||
91 | unsigned int long flags; | 91 | unsigned int long flags; |
92 | 92 | ||
93 | if (PageDirty(page)) { | 93 | if (PageDirty(page)) { |
94 | spin_lock_irqsave(&mapping->tree_lock, flags); | 94 | xa_lock_irqsave(&mapping->i_pages, flags); |
95 | radix_tree_tag_clear(&mapping->page_tree, | 95 | radix_tree_tag_clear(&mapping->i_pages, |
96 | page_index(page), | 96 | page_index(page), |
97 | PAGECACHE_TAG_DIRTY); | 97 | PAGECACHE_TAG_DIRTY); |
98 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | 98 | xa_unlock_irqrestore(&mapping->i_pages, flags); |
99 | 99 | ||
100 | clear_page_dirty_for_io(page); | 100 | clear_page_dirty_for_io(page); |
101 | dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); | 101 | dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES); |
@@ -1161,7 +1161,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) | |||
1161 | f2fs_bug_on(sbi, check_nid_range(sbi, nid)); | 1161 | f2fs_bug_on(sbi, check_nid_range(sbi, nid)); |
1162 | 1162 | ||
1163 | rcu_read_lock(); | 1163 | rcu_read_lock(); |
1164 | apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid); | 1164 | apage = radix_tree_lookup(&NODE_MAPPING(sbi)->i_pages, nid); |
1165 | rcu_read_unlock(); | 1165 | rcu_read_unlock(); |
1166 | if (apage) | 1166 | if (apage) |
1167 | return; | 1167 | return; |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1280f915079b..4b12ba70a895 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -347,9 +347,9 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) | |||
347 | * By the time control reaches here, RCU grace period has passed | 347 | * By the time control reaches here, RCU grace period has passed |
348 | * since I_WB_SWITCH assertion and all wb stat update transactions | 348 | * since I_WB_SWITCH assertion and all wb stat update transactions |
349 | * between unlocked_inode_to_wb_begin/end() are guaranteed to be | 349 | * between unlocked_inode_to_wb_begin/end() are guaranteed to be |
350 | * synchronizing against mapping->tree_lock. | 350 | * synchronizing against the i_pages lock. |
351 | * | 351 | * |
352 | * Grabbing old_wb->list_lock, inode->i_lock and mapping->tree_lock | 352 | * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock |
353 | * gives us exclusion against all wb related operations on @inode | 353 | * gives us exclusion against all wb related operations on @inode |
354 | * including IO list manipulations and stat updates. | 354 | * including IO list manipulations and stat updates. |
355 | */ | 355 | */ |
@@ -361,7 +361,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) | |||
361 | spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING); | 361 | spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING); |
362 | } | 362 | } |
363 | spin_lock(&inode->i_lock); | 363 | spin_lock(&inode->i_lock); |
364 | spin_lock_irq(&mapping->tree_lock); | 364 | xa_lock_irq(&mapping->i_pages); |
365 | 365 | ||
366 | /* | 366 | /* |
367 | * Once I_FREEING is visible under i_lock, the eviction path owns | 367 | * Once I_FREEING is visible under i_lock, the eviction path owns |
@@ -373,22 +373,22 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) | |||
373 | /* | 373 | /* |
374 | * Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points | 374 | * Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points |
375 | * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to | 375 | * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to |
376 | * pages actually under underwriteback. | 376 | * pages actually under writeback. |
377 | */ | 377 | */ |
378 | radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0, | 378 | radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0, |
379 | PAGECACHE_TAG_DIRTY) { | 379 | PAGECACHE_TAG_DIRTY) { |
380 | struct page *page = radix_tree_deref_slot_protected(slot, | 380 | struct page *page = radix_tree_deref_slot_protected(slot, |
381 | &mapping->tree_lock); | 381 | &mapping->i_pages.xa_lock); |
382 | if (likely(page) && PageDirty(page)) { | 382 | if (likely(page) && PageDirty(page)) { |
383 | dec_wb_stat(old_wb, WB_RECLAIMABLE); | 383 | dec_wb_stat(old_wb, WB_RECLAIMABLE); |
384 | inc_wb_stat(new_wb, WB_RECLAIMABLE); | 384 | inc_wb_stat(new_wb, WB_RECLAIMABLE); |
385 | } | 385 | } |
386 | } | 386 | } |
387 | 387 | ||
388 | radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0, | 388 | radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0, |
389 | PAGECACHE_TAG_WRITEBACK) { | 389 | PAGECACHE_TAG_WRITEBACK) { |
390 | struct page *page = radix_tree_deref_slot_protected(slot, | 390 | struct page *page = radix_tree_deref_slot_protected(slot, |
391 | &mapping->tree_lock); | 391 | &mapping->i_pages.xa_lock); |
392 | if (likely(page)) { | 392 | if (likely(page)) { |
393 | WARN_ON_ONCE(!PageWriteback(page)); | 393 | WARN_ON_ONCE(!PageWriteback(page)); |
394 | dec_wb_stat(old_wb, WB_WRITEBACK); | 394 | dec_wb_stat(old_wb, WB_WRITEBACK); |
@@ -430,7 +430,7 @@ skip_switch: | |||
430 | */ | 430 | */ |
431 | smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH); | 431 | smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH); |
432 | 432 | ||
433 | spin_unlock_irq(&mapping->tree_lock); | 433 | xa_unlock_irq(&mapping->i_pages); |
434 | spin_unlock(&inode->i_lock); | 434 | spin_unlock(&inode->i_lock); |
435 | spin_unlock(&new_wb->list_lock); | 435 | spin_unlock(&new_wb->list_lock); |
436 | spin_unlock(&old_wb->list_lock); | 436 | spin_unlock(&old_wb->list_lock); |
@@ -506,8 +506,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) | |||
506 | 506 | ||
507 | /* | 507 | /* |
508 | * In addition to synchronizing among switchers, I_WB_SWITCH tells | 508 | * In addition to synchronizing among switchers, I_WB_SWITCH tells |
509 | * the RCU protected stat update paths to grab the mapping's | 509 | * the RCU protected stat update paths to grab the i_page |
510 | * tree_lock so that stat transfer can synchronize against them. | 510 | * lock so that stat transfer can synchronize against them. |
511 | * Let's continue after I_WB_SWITCH is guaranteed to be visible. | 511 | * Let's continue after I_WB_SWITCH is guaranteed to be visible. |
512 | */ | 512 | */ |
513 | call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); | 513 | call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); |
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 7dc55b93a830..97137d7ec5ee 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c | |||
@@ -832,7 +832,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, | |||
832 | /* Clear pointers back to the netfs */ | 832 | /* Clear pointers back to the netfs */ |
833 | cookie->netfs_data = NULL; | 833 | cookie->netfs_data = NULL; |
834 | cookie->def = NULL; | 834 | cookie->def = NULL; |
835 | BUG_ON(cookie->stores.rnode); | 835 | BUG_ON(!radix_tree_empty(&cookie->stores)); |
836 | 836 | ||
837 | if (cookie->parent) { | 837 | if (cookie->parent) { |
838 | ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0); | 838 | ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0); |
diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 1085ca12e25c..20e0d0a4dc8c 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c | |||
@@ -973,7 +973,7 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj | |||
973 | * retire the object instead. | 973 | * retire the object instead. |
974 | */ | 974 | */ |
975 | if (!fscache_use_cookie(object)) { | 975 | if (!fscache_use_cookie(object)) { |
976 | ASSERT(object->cookie->stores.rnode == NULL); | 976 | ASSERT(radix_tree_empty(&object->cookie->stores)); |
977 | set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); | 977 | set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); |
978 | _leave(" [no cookie]"); | 978 | _leave(" [no cookie]"); |
979 | return transit_to(KILL_OBJECT); | 979 | return transit_to(KILL_OBJECT); |
diff --git a/fs/inode.c b/fs/inode.c index b153aeaa61ea..13ceb98c3bd3 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -348,8 +348,7 @@ EXPORT_SYMBOL(inc_nlink); | |||
348 | 348 | ||
349 | static void __address_space_init_once(struct address_space *mapping) | 349 | static void __address_space_init_once(struct address_space *mapping) |
350 | { | 350 | { |
351 | INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT); | 351 | INIT_RADIX_TREE(&mapping->i_pages, GFP_ATOMIC | __GFP_ACCOUNT); |
352 | spin_lock_init(&mapping->tree_lock); | ||
353 | init_rwsem(&mapping->i_mmap_rwsem); | 352 | init_rwsem(&mapping->i_mmap_rwsem); |
354 | INIT_LIST_HEAD(&mapping->private_list); | 353 | INIT_LIST_HEAD(&mapping->private_list); |
355 | spin_lock_init(&mapping->private_lock); | 354 | spin_lock_init(&mapping->private_lock); |
@@ -504,14 +503,14 @@ EXPORT_SYMBOL(__remove_inode_hash); | |||
504 | void clear_inode(struct inode *inode) | 503 | void clear_inode(struct inode *inode) |
505 | { | 504 | { |
506 | /* | 505 | /* |
507 | * We have to cycle tree_lock here because reclaim can be still in the | 506 | * We have to cycle the i_pages lock here because reclaim can be in the |
508 | * process of removing the last page (in __delete_from_page_cache()) | 507 | * process of removing the last page (in __delete_from_page_cache()) |
509 | * and we must not free mapping under it. | 508 | * and we must not free the mapping under it. |
510 | */ | 509 | */ |
511 | spin_lock_irq(&inode->i_data.tree_lock); | 510 | xa_lock_irq(&inode->i_data.i_pages); |
512 | BUG_ON(inode->i_data.nrpages); | 511 | BUG_ON(inode->i_data.nrpages); |
513 | BUG_ON(inode->i_data.nrexceptional); | 512 | BUG_ON(inode->i_data.nrexceptional); |
514 | spin_unlock_irq(&inode->i_data.tree_lock); | 513 | xa_unlock_irq(&inode->i_data.i_pages); |
515 | BUG_ON(!list_empty(&inode->i_data.private_list)); | 514 | BUG_ON(!list_empty(&inode->i_data.private_list)); |
516 | BUG_ON(!(inode->i_state & I_FREEING)); | 515 | BUG_ON(!(inode->i_state & I_FREEING)); |
517 | BUG_ON(inode->i_state & I_CLEAR); | 516 | BUG_ON(inode->i_state & I_CLEAR); |
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index c21e0b4454a6..dec98cab729d 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c | |||
@@ -193,9 +193,9 @@ retry: | |||
193 | (unsigned long long)oldkey, | 193 | (unsigned long long)oldkey, |
194 | (unsigned long long)newkey); | 194 | (unsigned long long)newkey); |
195 | 195 | ||
196 | spin_lock_irq(&btnc->tree_lock); | 196 | xa_lock_irq(&btnc->i_pages); |
197 | err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); | 197 | err = radix_tree_insert(&btnc->i_pages, newkey, obh->b_page); |
198 | spin_unlock_irq(&btnc->tree_lock); | 198 | xa_unlock_irq(&btnc->i_pages); |
199 | /* | 199 | /* |
200 | * Note: page->index will not change to newkey until | 200 | * Note: page->index will not change to newkey until |
201 | * nilfs_btnode_commit_change_key() will be called. | 201 | * nilfs_btnode_commit_change_key() will be called. |
@@ -251,11 +251,11 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, | |||
251 | (unsigned long long)newkey); | 251 | (unsigned long long)newkey); |
252 | mark_buffer_dirty(obh); | 252 | mark_buffer_dirty(obh); |
253 | 253 | ||
254 | spin_lock_irq(&btnc->tree_lock); | 254 | xa_lock_irq(&btnc->i_pages); |
255 | radix_tree_delete(&btnc->page_tree, oldkey); | 255 | radix_tree_delete(&btnc->i_pages, oldkey); |
256 | radix_tree_tag_set(&btnc->page_tree, newkey, | 256 | radix_tree_tag_set(&btnc->i_pages, newkey, |
257 | PAGECACHE_TAG_DIRTY); | 257 | PAGECACHE_TAG_DIRTY); |
258 | spin_unlock_irq(&btnc->tree_lock); | 258 | xa_unlock_irq(&btnc->i_pages); |
259 | 259 | ||
260 | opage->index = obh->b_blocknr = newkey; | 260 | opage->index = obh->b_blocknr = newkey; |
261 | unlock_page(opage); | 261 | unlock_page(opage); |
@@ -283,9 +283,9 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc, | |||
283 | return; | 283 | return; |
284 | 284 | ||
285 | if (nbh == NULL) { /* blocksize == pagesize */ | 285 | if (nbh == NULL) { /* blocksize == pagesize */ |
286 | spin_lock_irq(&btnc->tree_lock); | 286 | xa_lock_irq(&btnc->i_pages); |
287 | radix_tree_delete(&btnc->page_tree, newkey); | 287 | radix_tree_delete(&btnc->i_pages, newkey); |
288 | spin_unlock_irq(&btnc->tree_lock); | 288 | xa_unlock_irq(&btnc->i_pages); |
289 | unlock_page(ctxt->bh->b_page); | 289 | unlock_page(ctxt->bh->b_page); |
290 | } else | 290 | } else |
291 | brelse(nbh); | 291 | brelse(nbh); |
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 68241512d7c1..4cb850a6f1c2 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c | |||
@@ -331,15 +331,15 @@ repeat: | |||
331 | struct page *page2; | 331 | struct page *page2; |
332 | 332 | ||
333 | /* move the page to the destination cache */ | 333 | /* move the page to the destination cache */ |
334 | spin_lock_irq(&smap->tree_lock); | 334 | xa_lock_irq(&smap->i_pages); |
335 | page2 = radix_tree_delete(&smap->page_tree, offset); | 335 | page2 = radix_tree_delete(&smap->i_pages, offset); |
336 | WARN_ON(page2 != page); | 336 | WARN_ON(page2 != page); |
337 | 337 | ||
338 | smap->nrpages--; | 338 | smap->nrpages--; |
339 | spin_unlock_irq(&smap->tree_lock); | 339 | xa_unlock_irq(&smap->i_pages); |
340 | 340 | ||
341 | spin_lock_irq(&dmap->tree_lock); | 341 | xa_lock_irq(&dmap->i_pages); |
342 | err = radix_tree_insert(&dmap->page_tree, offset, page); | 342 | err = radix_tree_insert(&dmap->i_pages, offset, page); |
343 | if (unlikely(err < 0)) { | 343 | if (unlikely(err < 0)) { |
344 | WARN_ON(err == -EEXIST); | 344 | WARN_ON(err == -EEXIST); |
345 | page->mapping = NULL; | 345 | page->mapping = NULL; |
@@ -348,11 +348,11 @@ repeat: | |||
348 | page->mapping = dmap; | 348 | page->mapping = dmap; |
349 | dmap->nrpages++; | 349 | dmap->nrpages++; |
350 | if (PageDirty(page)) | 350 | if (PageDirty(page)) |
351 | radix_tree_tag_set(&dmap->page_tree, | 351 | radix_tree_tag_set(&dmap->i_pages, |
352 | offset, | 352 | offset, |
353 | PAGECACHE_TAG_DIRTY); | 353 | PAGECACHE_TAG_DIRTY); |
354 | } | 354 | } |
355 | spin_unlock_irq(&dmap->tree_lock); | 355 | xa_unlock_irq(&dmap->i_pages); |
356 | } | 356 | } |
357 | unlock_page(page); | 357 | unlock_page(page); |
358 | } | 358 | } |
@@ -474,15 +474,15 @@ int __nilfs_clear_page_dirty(struct page *page) | |||
474 | struct address_space *mapping = page->mapping; | 474 | struct address_space *mapping = page->mapping; |
475 | 475 | ||
476 | if (mapping) { | 476 | if (mapping) { |
477 | spin_lock_irq(&mapping->tree_lock); | 477 | xa_lock_irq(&mapping->i_pages); |
478 | if (test_bit(PG_dirty, &page->flags)) { | 478 | if (test_bit(PG_dirty, &page->flags)) { |
479 | radix_tree_tag_clear(&mapping->page_tree, | 479 | radix_tree_tag_clear(&mapping->i_pages, |
480 | page_index(page), | 480 | page_index(page), |
481 | PAGECACHE_TAG_DIRTY); | 481 | PAGECACHE_TAG_DIRTY); |
482 | spin_unlock_irq(&mapping->tree_lock); | 482 | xa_unlock_irq(&mapping->i_pages); |
483 | return clear_page_dirty_for_io(page); | 483 | return clear_page_dirty_for_io(page); |
484 | } | 484 | } |
485 | spin_unlock_irq(&mapping->tree_lock); | 485 | xa_unlock_irq(&mapping->i_pages); |
486 | return 0; | 486 | return 0; |
487 | } | 487 | } |
488 | return TestClearPageDirty(page); | 488 | return TestClearPageDirty(page); |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 598803576e4c..ae2c807fd719 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -141,25 +141,12 @@ static inline const char *get_task_state(struct task_struct *tsk) | |||
141 | return task_state_array[task_state_index(tsk)]; | 141 | return task_state_array[task_state_index(tsk)]; |
142 | } | 142 | } |
143 | 143 | ||
144 | static inline int get_task_umask(struct task_struct *tsk) | ||
145 | { | ||
146 | struct fs_struct *fs; | ||
147 | int umask = -ENOENT; | ||
148 | |||
149 | task_lock(tsk); | ||
150 | fs = tsk->fs; | ||
151 | if (fs) | ||
152 | umask = fs->umask; | ||
153 | task_unlock(tsk); | ||
154 | return umask; | ||
155 | } | ||
156 | |||
157 | static inline void task_state(struct seq_file *m, struct pid_namespace *ns, | 144 | static inline void task_state(struct seq_file *m, struct pid_namespace *ns, |
158 | struct pid *pid, struct task_struct *p) | 145 | struct pid *pid, struct task_struct *p) |
159 | { | 146 | { |
160 | struct user_namespace *user_ns = seq_user_ns(m); | 147 | struct user_namespace *user_ns = seq_user_ns(m); |
161 | struct group_info *group_info; | 148 | struct group_info *group_info; |
162 | int g, umask; | 149 | int g, umask = -1; |
163 | struct task_struct *tracer; | 150 | struct task_struct *tracer; |
164 | const struct cred *cred; | 151 | const struct cred *cred; |
165 | pid_t ppid, tpid = 0, tgid, ngid; | 152 | pid_t ppid, tpid = 0, tgid, ngid; |
@@ -177,17 +164,18 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, | |||
177 | ngid = task_numa_group_id(p); | 164 | ngid = task_numa_group_id(p); |
178 | cred = get_task_cred(p); | 165 | cred = get_task_cred(p); |
179 | 166 | ||
180 | umask = get_task_umask(p); | ||
181 | if (umask >= 0) | ||
182 | seq_printf(m, "Umask:\t%#04o\n", umask); | ||
183 | |||
184 | task_lock(p); | 167 | task_lock(p); |
168 | if (p->fs) | ||
169 | umask = p->fs->umask; | ||
185 | if (p->files) | 170 | if (p->files) |
186 | max_fds = files_fdtable(p->files)->max_fds; | 171 | max_fds = files_fdtable(p->files)->max_fds; |
187 | task_unlock(p); | 172 | task_unlock(p); |
188 | rcu_read_unlock(); | 173 | rcu_read_unlock(); |
189 | 174 | ||
190 | seq_printf(m, "State:\t%s", get_task_state(p)); | 175 | if (umask >= 0) |
176 | seq_printf(m, "Umask:\t%#04o\n", umask); | ||
177 | seq_puts(m, "State:\t"); | ||
178 | seq_puts(m, get_task_state(p)); | ||
191 | 179 | ||
192 | seq_put_decimal_ull(m, "\nTgid:\t", tgid); | 180 | seq_put_decimal_ull(m, "\nTgid:\t", tgid); |
193 | seq_put_decimal_ull(m, "\nNgid:\t", ngid); | 181 | seq_put_decimal_ull(m, "\nNgid:\t", ngid); |
@@ -313,8 +301,8 @@ static void render_cap_t(struct seq_file *m, const char *header, | |||
313 | 301 | ||
314 | seq_puts(m, header); | 302 | seq_puts(m, header); |
315 | CAP_FOR_EACH_U32(__capi) { | 303 | CAP_FOR_EACH_U32(__capi) { |
316 | seq_printf(m, "%08x", | 304 | seq_put_hex_ll(m, NULL, |
317 | a->cap[CAP_LAST_U32 - __capi]); | 305 | a->cap[CAP_LAST_U32 - __capi], 8); |
318 | } | 306 | } |
319 | seq_putc(m, '\n'); | 307 | seq_putc(m, '\n'); |
320 | } | 308 | } |
@@ -368,7 +356,8 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) | |||
368 | 356 | ||
369 | static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) | 357 | static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) |
370 | { | 358 | { |
371 | seq_printf(m, "CoreDumping:\t%d\n", !!mm->core_state); | 359 | seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state); |
360 | seq_putc(m, '\n'); | ||
372 | } | 361 | } |
373 | 362 | ||
374 | int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | 363 | int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, |
@@ -504,7 +493,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
504 | /* convert nsec -> ticks */ | 493 | /* convert nsec -> ticks */ |
505 | start_time = nsec_to_clock_t(task->real_start_time); | 494 | start_time = nsec_to_clock_t(task->real_start_time); |
506 | 495 | ||
507 | seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state); | 496 | seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns)); |
497 | seq_puts(m, " ("); | ||
498 | seq_puts(m, tcomm); | ||
499 | seq_puts(m, ") "); | ||
500 | seq_putc(m, state); | ||
508 | seq_put_decimal_ll(m, " ", ppid); | 501 | seq_put_decimal_ll(m, " ", ppid); |
509 | seq_put_decimal_ll(m, " ", pgid); | 502 | seq_put_decimal_ll(m, " ", pgid); |
510 | seq_put_decimal_ll(m, " ", sid); | 503 | seq_put_decimal_ll(m, " ", sid); |
diff --git a/fs/proc/base.c b/fs/proc/base.c index d53246863cfb..eafa39a3a88c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -388,14 +388,17 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, | |||
388 | unsigned long wchan; | 388 | unsigned long wchan; |
389 | char symname[KSYM_NAME_LEN]; | 389 | char symname[KSYM_NAME_LEN]; |
390 | 390 | ||
391 | wchan = get_wchan(task); | 391 | if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) |
392 | goto print0; | ||
392 | 393 | ||
393 | if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS) | 394 | wchan = get_wchan(task); |
394 | && !lookup_symbol_name(wchan, symname)) | 395 | if (wchan && !lookup_symbol_name(wchan, symname)) { |
395 | seq_printf(m, "%s", symname); | 396 | seq_puts(m, symname); |
396 | else | 397 | return 0; |
397 | seq_putc(m, '0'); | 398 | } |
398 | 399 | ||
400 | print0: | ||
401 | seq_putc(m, '0'); | ||
399 | return 0; | 402 | return 0; |
400 | } | 403 | } |
401 | #endif /* CONFIG_KALLSYMS */ | 404 | #endif /* CONFIG_KALLSYMS */ |
@@ -1910,6 +1913,8 @@ static int dname_to_vma_addr(struct dentry *dentry, | |||
1910 | unsigned long long sval, eval; | 1913 | unsigned long long sval, eval; |
1911 | unsigned int len; | 1914 | unsigned int len; |
1912 | 1915 | ||
1916 | if (str[0] == '0' && str[1] != '-') | ||
1917 | return -EINVAL; | ||
1913 | len = _parse_integer(str, 16, &sval); | 1918 | len = _parse_integer(str, 16, &sval); |
1914 | if (len & KSTRTOX_OVERFLOW) | 1919 | if (len & KSTRTOX_OVERFLOW) |
1915 | return -EINVAL; | 1920 | return -EINVAL; |
@@ -1921,6 +1926,8 @@ static int dname_to_vma_addr(struct dentry *dentry, | |||
1921 | return -EINVAL; | 1926 | return -EINVAL; |
1922 | str++; | 1927 | str++; |
1923 | 1928 | ||
1929 | if (str[0] == '0' && str[1]) | ||
1930 | return -EINVAL; | ||
1924 | len = _parse_integer(str, 16, &eval); | 1931 | len = _parse_integer(str, 16, &eval); |
1925 | if (len & KSTRTOX_OVERFLOW) | 1932 | if (len & KSTRTOX_OVERFLOW) |
1926 | return -EINVAL; | 1933 | return -EINVAL; |
@@ -2204,6 +2211,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) | |||
2204 | } | 2211 | } |
2205 | } | 2212 | } |
2206 | up_read(&mm->mmap_sem); | 2213 | up_read(&mm->mmap_sem); |
2214 | mmput(mm); | ||
2207 | 2215 | ||
2208 | for (i = 0; i < nr_files; i++) { | 2216 | for (i = 0; i < nr_files; i++) { |
2209 | char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ | 2217 | char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ |
@@ -2221,7 +2229,6 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) | |||
2221 | } | 2229 | } |
2222 | if (fa) | 2230 | if (fa) |
2223 | flex_array_free(fa); | 2231 | flex_array_free(fa); |
2224 | mmput(mm); | ||
2225 | 2232 | ||
2226 | out_put_task: | 2233 | out_put_task: |
2227 | put_task_struct(task); | 2234 | put_task_struct(task); |
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c index 403cbb12a6e9..8233e7af9389 100644 --- a/fs/proc/cmdline.c +++ b/fs/proc/cmdline.c | |||
@@ -6,7 +6,8 @@ | |||
6 | 6 | ||
7 | static int cmdline_proc_show(struct seq_file *m, void *v) | 7 | static int cmdline_proc_show(struct seq_file *m, void *v) |
8 | { | 8 | { |
9 | seq_printf(m, "%s\n", saved_command_line); | 9 | seq_puts(m, saved_command_line); |
10 | seq_putc(m, '\n'); | ||
10 | return 0; | 11 | return 0; |
11 | } | 12 | } |
12 | 13 | ||
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 5d709fa8f3a2..04c4804cbdef 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -8,6 +8,7 @@ | |||
8 | * Copyright (C) 1997 Theodore Ts'o | 8 | * Copyright (C) 1997 Theodore Ts'o |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/cache.h> | ||
11 | #include <linux/errno.h> | 12 | #include <linux/errno.h> |
12 | #include <linux/time.h> | 13 | #include <linux/time.h> |
13 | #include <linux/proc_fs.h> | 14 | #include <linux/proc_fs.h> |
@@ -28,6 +29,17 @@ | |||
28 | 29 | ||
29 | static DEFINE_RWLOCK(proc_subdir_lock); | 30 | static DEFINE_RWLOCK(proc_subdir_lock); |
30 | 31 | ||
32 | struct kmem_cache *proc_dir_entry_cache __ro_after_init; | ||
33 | |||
34 | void pde_free(struct proc_dir_entry *pde) | ||
35 | { | ||
36 | if (S_ISLNK(pde->mode)) | ||
37 | kfree(pde->data); | ||
38 | if (pde->name != pde->inline_name) | ||
39 | kfree(pde->name); | ||
40 | kmem_cache_free(proc_dir_entry_cache, pde); | ||
41 | } | ||
42 | |||
31 | static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len) | 43 | static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len) |
32 | { | 44 | { |
33 | if (len < de->namelen) | 45 | if (len < de->namelen) |
@@ -40,8 +52,8 @@ static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int | |||
40 | 52 | ||
41 | static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir) | 53 | static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir) |
42 | { | 54 | { |
43 | return rb_entry_safe(rb_first_cached(&dir->subdir), | 55 | return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry, |
44 | struct proc_dir_entry, subdir_node); | 56 | subdir_node); |
45 | } | 57 | } |
46 | 58 | ||
47 | static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir) | 59 | static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir) |
@@ -54,7 +66,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, | |||
54 | const char *name, | 66 | const char *name, |
55 | unsigned int len) | 67 | unsigned int len) |
56 | { | 68 | { |
57 | struct rb_node *node = dir->subdir.rb_root.rb_node; | 69 | struct rb_node *node = dir->subdir.rb_node; |
58 | 70 | ||
59 | while (node) { | 71 | while (node) { |
60 | struct proc_dir_entry *de = rb_entry(node, | 72 | struct proc_dir_entry *de = rb_entry(node, |
@@ -75,9 +87,8 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, | |||
75 | static bool pde_subdir_insert(struct proc_dir_entry *dir, | 87 | static bool pde_subdir_insert(struct proc_dir_entry *dir, |
76 | struct proc_dir_entry *de) | 88 | struct proc_dir_entry *de) |
77 | { | 89 | { |
78 | struct rb_root_cached *root = &dir->subdir; | 90 | struct rb_root *root = &dir->subdir; |
79 | struct rb_node **new = &root->rb_root.rb_node, *parent = NULL; | 91 | struct rb_node **new = &root->rb_node, *parent = NULL; |
80 | bool leftmost = true; | ||
81 | 92 | ||
82 | /* Figure out where to put new node */ | 93 | /* Figure out where to put new node */ |
83 | while (*new) { | 94 | while (*new) { |
@@ -89,16 +100,15 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir, | |||
89 | parent = *new; | 100 | parent = *new; |
90 | if (result < 0) | 101 | if (result < 0) |
91 | new = &(*new)->rb_left; | 102 | new = &(*new)->rb_left; |
92 | else if (result > 0) { | 103 | else if (result > 0) |
93 | new = &(*new)->rb_right; | 104 | new = &(*new)->rb_right; |
94 | leftmost = false; | 105 | else |
95 | } else | ||
96 | return false; | 106 | return false; |
97 | } | 107 | } |
98 | 108 | ||
99 | /* Add new node and rebalance tree. */ | 109 | /* Add new node and rebalance tree. */ |
100 | rb_link_node(&de->subdir_node, parent, new); | 110 | rb_link_node(&de->subdir_node, parent, new); |
101 | rb_insert_color_cached(&de->subdir_node, root, leftmost); | 111 | rb_insert_color(&de->subdir_node, root); |
102 | return true; | 112 | return true; |
103 | } | 113 | } |
104 | 114 | ||
@@ -354,6 +364,14 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, | |||
354 | WARN(1, "name len %u\n", qstr.len); | 364 | WARN(1, "name len %u\n", qstr.len); |
355 | return NULL; | 365 | return NULL; |
356 | } | 366 | } |
367 | if (qstr.len == 1 && fn[0] == '.') { | ||
368 | WARN(1, "name '.'\n"); | ||
369 | return NULL; | ||
370 | } | ||
371 | if (qstr.len == 2 && fn[0] == '.' && fn[1] == '.') { | ||
372 | WARN(1, "name '..'\n"); | ||
373 | return NULL; | ||
374 | } | ||
357 | if (*parent == &proc_root && name_to_int(&qstr) != ~0U) { | 375 | if (*parent == &proc_root && name_to_int(&qstr) != ~0U) { |
358 | WARN(1, "create '/proc/%s' by hand\n", qstr.name); | 376 | WARN(1, "create '/proc/%s' by hand\n", qstr.name); |
359 | return NULL; | 377 | return NULL; |
@@ -363,16 +381,26 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, | |||
363 | return NULL; | 381 | return NULL; |
364 | } | 382 | } |
365 | 383 | ||
366 | ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL); | 384 | ent = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL); |
367 | if (!ent) | 385 | if (!ent) |
368 | goto out; | 386 | goto out; |
369 | 387 | ||
388 | if (qstr.len + 1 <= sizeof(ent->inline_name)) { | ||
389 | ent->name = ent->inline_name; | ||
390 | } else { | ||
391 | ent->name = kmalloc(qstr.len + 1, GFP_KERNEL); | ||
392 | if (!ent->name) { | ||
393 | pde_free(ent); | ||
394 | return NULL; | ||
395 | } | ||
396 | } | ||
397 | |||
370 | memcpy(ent->name, fn, qstr.len + 1); | 398 | memcpy(ent->name, fn, qstr.len + 1); |
371 | ent->namelen = qstr.len; | 399 | ent->namelen = qstr.len; |
372 | ent->mode = mode; | 400 | ent->mode = mode; |
373 | ent->nlink = nlink; | 401 | ent->nlink = nlink; |
374 | ent->subdir = RB_ROOT_CACHED; | 402 | ent->subdir = RB_ROOT; |
375 | atomic_set(&ent->count, 1); | 403 | refcount_set(&ent->refcnt, 1); |
376 | spin_lock_init(&ent->pde_unload_lock); | 404 | spin_lock_init(&ent->pde_unload_lock); |
377 | INIT_LIST_HEAD(&ent->pde_openers); | 405 | INIT_LIST_HEAD(&ent->pde_openers); |
378 | proc_set_user(ent, (*parent)->uid, (*parent)->gid); | 406 | proc_set_user(ent, (*parent)->uid, (*parent)->gid); |
@@ -395,12 +423,11 @@ struct proc_dir_entry *proc_symlink(const char *name, | |||
395 | strcpy((char*)ent->data,dest); | 423 | strcpy((char*)ent->data,dest); |
396 | ent->proc_iops = &proc_link_inode_operations; | 424 | ent->proc_iops = &proc_link_inode_operations; |
397 | if (proc_register(parent, ent) < 0) { | 425 | if (proc_register(parent, ent) < 0) { |
398 | kfree(ent->data); | 426 | pde_free(ent); |
399 | kfree(ent); | ||
400 | ent = NULL; | 427 | ent = NULL; |
401 | } | 428 | } |
402 | } else { | 429 | } else { |
403 | kfree(ent); | 430 | pde_free(ent); |
404 | ent = NULL; | 431 | ent = NULL; |
405 | } | 432 | } |
406 | } | 433 | } |
@@ -423,7 +450,7 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, | |||
423 | ent->proc_iops = &proc_dir_inode_operations; | 450 | ent->proc_iops = &proc_dir_inode_operations; |
424 | parent->nlink++; | 451 | parent->nlink++; |
425 | if (proc_register(parent, ent) < 0) { | 452 | if (proc_register(parent, ent) < 0) { |
426 | kfree(ent); | 453 | pde_free(ent); |
427 | parent->nlink--; | 454 | parent->nlink--; |
428 | ent = NULL; | 455 | ent = NULL; |
429 | } | 456 | } |
@@ -458,7 +485,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name) | |||
458 | ent->proc_iops = NULL; | 485 | ent->proc_iops = NULL; |
459 | parent->nlink++; | 486 | parent->nlink++; |
460 | if (proc_register(parent, ent) < 0) { | 487 | if (proc_register(parent, ent) < 0) { |
461 | kfree(ent); | 488 | pde_free(ent); |
462 | parent->nlink--; | 489 | parent->nlink--; |
463 | ent = NULL; | 490 | ent = NULL; |
464 | } | 491 | } |
@@ -495,7 +522,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, | |||
495 | goto out_free; | 522 | goto out_free; |
496 | return pde; | 523 | return pde; |
497 | out_free: | 524 | out_free: |
498 | kfree(pde); | 525 | pde_free(pde); |
499 | out: | 526 | out: |
500 | return NULL; | 527 | return NULL; |
501 | } | 528 | } |
@@ -522,19 +549,12 @@ void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) | |||
522 | } | 549 | } |
523 | EXPORT_SYMBOL(proc_set_user); | 550 | EXPORT_SYMBOL(proc_set_user); |
524 | 551 | ||
525 | static void free_proc_entry(struct proc_dir_entry *de) | ||
526 | { | ||
527 | proc_free_inum(de->low_ino); | ||
528 | |||
529 | if (S_ISLNK(de->mode)) | ||
530 | kfree(de->data); | ||
531 | kfree(de); | ||
532 | } | ||
533 | |||
534 | void pde_put(struct proc_dir_entry *pde) | 552 | void pde_put(struct proc_dir_entry *pde) |
535 | { | 553 | { |
536 | if (atomic_dec_and_test(&pde->count)) | 554 | if (refcount_dec_and_test(&pde->refcnt)) { |
537 | free_proc_entry(pde); | 555 | proc_free_inum(pde->low_ino); |
556 | pde_free(pde); | ||
557 | } | ||
538 | } | 558 | } |
539 | 559 | ||
540 | /* | 560 | /* |
@@ -555,7 +575,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) | |||
555 | 575 | ||
556 | de = pde_subdir_find(parent, fn, len); | 576 | de = pde_subdir_find(parent, fn, len); |
557 | if (de) | 577 | if (de) |
558 | rb_erase_cached(&de->subdir_node, &parent->subdir); | 578 | rb_erase(&de->subdir_node, &parent->subdir); |
559 | write_unlock(&proc_subdir_lock); | 579 | write_unlock(&proc_subdir_lock); |
560 | if (!de) { | 580 | if (!de) { |
561 | WARN(1, "name '%s'\n", name); | 581 | WARN(1, "name '%s'\n", name); |
@@ -592,13 +612,13 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) | |||
592 | write_unlock(&proc_subdir_lock); | 612 | write_unlock(&proc_subdir_lock); |
593 | return -ENOENT; | 613 | return -ENOENT; |
594 | } | 614 | } |
595 | rb_erase_cached(&root->subdir_node, &parent->subdir); | 615 | rb_erase(&root->subdir_node, &parent->subdir); |
596 | 616 | ||
597 | de = root; | 617 | de = root; |
598 | while (1) { | 618 | while (1) { |
599 | next = pde_subdir_first(de); | 619 | next = pde_subdir_first(de); |
600 | if (next) { | 620 | if (next) { |
601 | rb_erase_cached(&next->subdir_node, &de->subdir); | 621 | rb_erase(&next->subdir_node, &de->subdir); |
602 | de = next; | 622 | de = next; |
603 | continue; | 623 | continue; |
604 | } | 624 | } |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 6e8724958116..2cf3b74391ca 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -54,6 +54,7 @@ static void proc_evict_inode(struct inode *inode) | |||
54 | } | 54 | } |
55 | 55 | ||
56 | static struct kmem_cache *proc_inode_cachep __ro_after_init; | 56 | static struct kmem_cache *proc_inode_cachep __ro_after_init; |
57 | static struct kmem_cache *pde_opener_cache __ro_after_init; | ||
57 | 58 | ||
58 | static struct inode *proc_alloc_inode(struct super_block *sb) | 59 | static struct inode *proc_alloc_inode(struct super_block *sb) |
59 | { | 60 | { |
@@ -92,7 +93,7 @@ static void init_once(void *foo) | |||
92 | inode_init_once(&ei->vfs_inode); | 93 | inode_init_once(&ei->vfs_inode); |
93 | } | 94 | } |
94 | 95 | ||
95 | void __init proc_init_inodecache(void) | 96 | void __init proc_init_kmemcache(void) |
96 | { | 97 | { |
97 | proc_inode_cachep = kmem_cache_create("proc_inode_cache", | 98 | proc_inode_cachep = kmem_cache_create("proc_inode_cache", |
98 | sizeof(struct proc_inode), | 99 | sizeof(struct proc_inode), |
@@ -100,6 +101,13 @@ void __init proc_init_inodecache(void) | |||
100 | SLAB_MEM_SPREAD|SLAB_ACCOUNT| | 101 | SLAB_MEM_SPREAD|SLAB_ACCOUNT| |
101 | SLAB_PANIC), | 102 | SLAB_PANIC), |
102 | init_once); | 103 | init_once); |
104 | pde_opener_cache = | ||
105 | kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0, | ||
106 | SLAB_ACCOUNT|SLAB_PANIC, NULL); | ||
107 | proc_dir_entry_cache = kmem_cache_create_usercopy( | ||
108 | "proc_dir_entry", sizeof(struct proc_dir_entry), 0, SLAB_PANIC, | ||
109 | offsetof(struct proc_dir_entry, inline_name), | ||
110 | sizeof_field(struct proc_dir_entry, inline_name), NULL); | ||
103 | } | 111 | } |
104 | 112 | ||
105 | static int proc_show_options(struct seq_file *seq, struct dentry *root) | 113 | static int proc_show_options(struct seq_file *seq, struct dentry *root) |
@@ -138,7 +146,7 @@ static void unuse_pde(struct proc_dir_entry *pde) | |||
138 | complete(pde->pde_unload_completion); | 146 | complete(pde->pde_unload_completion); |
139 | } | 147 | } |
140 | 148 | ||
141 | /* pde is locked */ | 149 | /* pde is locked on entry, unlocked on exit */ |
142 | static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) | 150 | static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) |
143 | { | 151 | { |
144 | /* | 152 | /* |
@@ -157,9 +165,10 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) | |||
157 | pdeo->c = &c; | 165 | pdeo->c = &c; |
158 | spin_unlock(&pde->pde_unload_lock); | 166 | spin_unlock(&pde->pde_unload_lock); |
159 | wait_for_completion(&c); | 167 | wait_for_completion(&c); |
160 | spin_lock(&pde->pde_unload_lock); | ||
161 | } else { | 168 | } else { |
162 | struct file *file; | 169 | struct file *file; |
170 | struct completion *c; | ||
171 | |||
163 | pdeo->closing = true; | 172 | pdeo->closing = true; |
164 | spin_unlock(&pde->pde_unload_lock); | 173 | spin_unlock(&pde->pde_unload_lock); |
165 | file = pdeo->file; | 174 | file = pdeo->file; |
@@ -167,9 +176,11 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) | |||
167 | spin_lock(&pde->pde_unload_lock); | 176 | spin_lock(&pde->pde_unload_lock); |
168 | /* After ->release. */ | 177 | /* After ->release. */ |
169 | list_del(&pdeo->lh); | 178 | list_del(&pdeo->lh); |
170 | if (unlikely(pdeo->c)) | 179 | c = pdeo->c; |
171 | complete(pdeo->c); | 180 | spin_unlock(&pde->pde_unload_lock); |
172 | kfree(pdeo); | 181 | if (unlikely(c)) |
182 | complete(c); | ||
183 | kmem_cache_free(pde_opener_cache, pdeo); | ||
173 | } | 184 | } |
174 | } | 185 | } |
175 | 186 | ||
@@ -188,6 +199,7 @@ void proc_entry_rundown(struct proc_dir_entry *de) | |||
188 | struct pde_opener *pdeo; | 199 | struct pde_opener *pdeo; |
189 | pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); | 200 | pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); |
190 | close_pdeo(de, pdeo); | 201 | close_pdeo(de, pdeo); |
202 | spin_lock(&de->pde_unload_lock); | ||
191 | } | 203 | } |
192 | spin_unlock(&de->pde_unload_lock); | 204 | spin_unlock(&de->pde_unload_lock); |
193 | } | 205 | } |
@@ -338,31 +350,36 @@ static int proc_reg_open(struct inode *inode, struct file *file) | |||
338 | * | 350 | * |
339 | * Save every "struct file" with custom ->release hook. | 351 | * Save every "struct file" with custom ->release hook. |
340 | */ | 352 | */ |
341 | pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); | 353 | if (!use_pde(pde)) |
342 | if (!pdeo) | ||
343 | return -ENOMEM; | ||
344 | |||
345 | if (!use_pde(pde)) { | ||
346 | kfree(pdeo); | ||
347 | return -ENOENT; | 354 | return -ENOENT; |
348 | } | 355 | |
349 | open = pde->proc_fops->open; | ||
350 | release = pde->proc_fops->release; | 356 | release = pde->proc_fops->release; |
357 | if (release) { | ||
358 | pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL); | ||
359 | if (!pdeo) { | ||
360 | rv = -ENOMEM; | ||
361 | goto out_unuse; | ||
362 | } | ||
363 | } | ||
351 | 364 | ||
365 | open = pde->proc_fops->open; | ||
352 | if (open) | 366 | if (open) |
353 | rv = open(inode, file); | 367 | rv = open(inode, file); |
354 | 368 | ||
355 | if (rv == 0 && release) { | 369 | if (release) { |
356 | /* To know what to release. */ | 370 | if (rv == 0) { |
357 | pdeo->file = file; | 371 | /* To know what to release. */ |
358 | pdeo->closing = false; | 372 | pdeo->file = file; |
359 | pdeo->c = NULL; | 373 | pdeo->closing = false; |
360 | spin_lock(&pde->pde_unload_lock); | 374 | pdeo->c = NULL; |
361 | list_add(&pdeo->lh, &pde->pde_openers); | 375 | spin_lock(&pde->pde_unload_lock); |
362 | spin_unlock(&pde->pde_unload_lock); | 376 | list_add(&pdeo->lh, &pde->pde_openers); |
363 | } else | 377 | spin_unlock(&pde->pde_unload_lock); |
364 | kfree(pdeo); | 378 | } else |
379 | kmem_cache_free(pde_opener_cache, pdeo); | ||
380 | } | ||
365 | 381 | ||
382 | out_unuse: | ||
366 | unuse_pde(pde); | 383 | unuse_pde(pde); |
367 | return rv; | 384 | return rv; |
368 | } | 385 | } |
@@ -375,7 +392,7 @@ static int proc_reg_release(struct inode *inode, struct file *file) | |||
375 | list_for_each_entry(pdeo, &pde->pde_openers, lh) { | 392 | list_for_each_entry(pdeo, &pde->pde_openers, lh) { |
376 | if (pdeo->file == file) { | 393 | if (pdeo->file == file) { |
377 | close_pdeo(pde, pdeo); | 394 | close_pdeo(pde, pdeo); |
378 | break; | 395 | return 0; |
379 | } | 396 | } |
380 | } | 397 | } |
381 | spin_unlock(&pde->pde_unload_lock); | 398 | spin_unlock(&pde->pde_unload_lock); |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index d697c8ab0a14..0f1692e63cb6 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | #include <linux/proc_fs.h> | 12 | #include <linux/proc_fs.h> |
13 | #include <linux/proc_ns.h> | 13 | #include <linux/proc_ns.h> |
14 | #include <linux/refcount.h> | ||
14 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
15 | #include <linux/atomic.h> | 16 | #include <linux/atomic.h> |
16 | #include <linux/binfmts.h> | 17 | #include <linux/binfmts.h> |
@@ -36,7 +37,7 @@ struct proc_dir_entry { | |||
36 | * negative -> it's going away RSN | 37 | * negative -> it's going away RSN |
37 | */ | 38 | */ |
38 | atomic_t in_use; | 39 | atomic_t in_use; |
39 | atomic_t count; /* use count */ | 40 | refcount_t refcnt; |
40 | struct list_head pde_openers; /* who did ->open, but not ->release */ | 41 | struct list_head pde_openers; /* who did ->open, but not ->release */ |
41 | /* protects ->pde_openers and all struct pde_opener instances */ | 42 | /* protects ->pde_openers and all struct pde_opener instances */ |
42 | spinlock_t pde_unload_lock; | 43 | spinlock_t pde_unload_lock; |
@@ -50,13 +51,22 @@ struct proc_dir_entry { | |||
50 | kgid_t gid; | 51 | kgid_t gid; |
51 | loff_t size; | 52 | loff_t size; |
52 | struct proc_dir_entry *parent; | 53 | struct proc_dir_entry *parent; |
53 | struct rb_root_cached subdir; | 54 | struct rb_root subdir; |
54 | struct rb_node subdir_node; | 55 | struct rb_node subdir_node; |
56 | char *name; | ||
55 | umode_t mode; | 57 | umode_t mode; |
56 | u8 namelen; | 58 | u8 namelen; |
57 | char name[]; | 59 | #ifdef CONFIG_64BIT |
60 | #define SIZEOF_PDE_INLINE_NAME (192-139) | ||
61 | #else | ||
62 | #define SIZEOF_PDE_INLINE_NAME (128-87) | ||
63 | #endif | ||
64 | char inline_name[SIZEOF_PDE_INLINE_NAME]; | ||
58 | } __randomize_layout; | 65 | } __randomize_layout; |
59 | 66 | ||
67 | extern struct kmem_cache *proc_dir_entry_cache; | ||
68 | void pde_free(struct proc_dir_entry *pde); | ||
69 | |||
60 | union proc_op { | 70 | union proc_op { |
61 | int (*proc_get_link)(struct dentry *, struct path *); | 71 | int (*proc_get_link)(struct dentry *, struct path *); |
62 | int (*proc_show)(struct seq_file *m, | 72 | int (*proc_show)(struct seq_file *m, |
@@ -159,7 +169,7 @@ int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry * | |||
159 | 169 | ||
160 | static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) | 170 | static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) |
161 | { | 171 | { |
162 | atomic_inc(&pde->count); | 172 | refcount_inc(&pde->refcnt); |
163 | return pde; | 173 | return pde; |
164 | } | 174 | } |
165 | extern void pde_put(struct proc_dir_entry *); | 175 | extern void pde_put(struct proc_dir_entry *); |
@@ -177,12 +187,12 @@ struct pde_opener { | |||
177 | struct list_head lh; | 187 | struct list_head lh; |
178 | bool closing; | 188 | bool closing; |
179 | struct completion *c; | 189 | struct completion *c; |
180 | }; | 190 | } __randomize_layout; |
181 | extern const struct inode_operations proc_link_inode_operations; | 191 | extern const struct inode_operations proc_link_inode_operations; |
182 | 192 | ||
183 | extern const struct inode_operations proc_pid_link_inode_operations; | 193 | extern const struct inode_operations proc_pid_link_inode_operations; |
184 | 194 | ||
185 | extern void proc_init_inodecache(void); | 195 | void proc_init_kmemcache(void); |
186 | void set_proc_pid_nlink(void); | 196 | void set_proc_pid_nlink(void); |
187 | extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); | 197 | extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); |
188 | extern int proc_fill_super(struct super_block *, void *data, int flags); | 198 | extern int proc_fill_super(struct super_block *, void *data, int flags); |
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 6bb20f864259..65a72ab57471 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c | |||
@@ -26,20 +26,7 @@ void __attribute__((weak)) arch_report_meminfo(struct seq_file *m) | |||
26 | 26 | ||
27 | static void show_val_kb(struct seq_file *m, const char *s, unsigned long num) | 27 | static void show_val_kb(struct seq_file *m, const char *s, unsigned long num) |
28 | { | 28 | { |
29 | char v[32]; | 29 | seq_put_decimal_ull_width(m, s, num << (PAGE_SHIFT - 10), 8); |
30 | static const char blanks[7] = {' ', ' ', ' ', ' ',' ', ' ', ' '}; | ||
31 | int len; | ||
32 | |||
33 | len = num_to_str(v, sizeof(v), num << (PAGE_SHIFT - 10)); | ||
34 | |||
35 | seq_write(m, s, 16); | ||
36 | |||
37 | if (len > 0) { | ||
38 | if (len < 8) | ||
39 | seq_write(m, blanks, 8 - len); | ||
40 | |||
41 | seq_write(m, v, len); | ||
42 | } | ||
43 | seq_write(m, " kB\n", 4); | 30 | seq_write(m, " kB\n", 4); |
44 | } | 31 | } |
45 | 32 | ||
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 68c06ae7888c..1763f370489d 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c | |||
@@ -192,15 +192,16 @@ static __net_init int proc_net_ns_init(struct net *net) | |||
192 | int err; | 192 | int err; |
193 | 193 | ||
194 | err = -ENOMEM; | 194 | err = -ENOMEM; |
195 | netd = kzalloc(sizeof(*netd) + 4, GFP_KERNEL); | 195 | netd = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL); |
196 | if (!netd) | 196 | if (!netd) |
197 | goto out; | 197 | goto out; |
198 | 198 | ||
199 | netd->subdir = RB_ROOT_CACHED; | 199 | netd->subdir = RB_ROOT; |
200 | netd->data = net; | 200 | netd->data = net; |
201 | netd->nlink = 2; | 201 | netd->nlink = 2; |
202 | netd->namelen = 3; | 202 | netd->namelen = 3; |
203 | netd->parent = &proc_root; | 203 | netd->parent = &proc_root; |
204 | netd->name = netd->inline_name; | ||
204 | memcpy(netd->name, "net", 4); | 205 | memcpy(netd->name, "net", 4); |
205 | 206 | ||
206 | uid = make_kuid(net->user_ns, 0); | 207 | uid = make_kuid(net->user_ns, 0); |
@@ -223,7 +224,7 @@ static __net_init int proc_net_ns_init(struct net *net) | |||
223 | return 0; | 224 | return 0; |
224 | 225 | ||
225 | free_net: | 226 | free_net: |
226 | kfree(netd); | 227 | pde_free(netd); |
227 | out: | 228 | out: |
228 | return err; | 229 | return err; |
229 | } | 230 | } |
@@ -231,7 +232,7 @@ out: | |||
231 | static __net_exit void proc_net_ns_exit(struct net *net) | 232 | static __net_exit void proc_net_ns_exit(struct net *net) |
232 | { | 233 | { |
233 | remove_proc_entry("stat", net->proc_net); | 234 | remove_proc_entry("stat", net->proc_net); |
234 | kfree(net->proc_net); | 235 | pde_free(net->proc_net); |
235 | } | 236 | } |
236 | 237 | ||
237 | static struct pernet_operations __net_initdata proc_net_ns_ops = { | 238 | static struct pernet_operations __net_initdata proc_net_ns_ops = { |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index c41ab261397d..8989936f2995 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -707,14 +707,14 @@ static bool proc_sys_link_fill_cache(struct file *file, | |||
707 | struct ctl_table *table) | 707 | struct ctl_table *table) |
708 | { | 708 | { |
709 | bool ret = true; | 709 | bool ret = true; |
710 | |||
710 | head = sysctl_head_grab(head); | 711 | head = sysctl_head_grab(head); |
712 | if (IS_ERR(head)) | ||
713 | return false; | ||
711 | 714 | ||
712 | if (S_ISLNK(table->mode)) { | 715 | /* It is not an error if we can not follow the link ignore it */ |
713 | /* It is not an error if we can not follow the link ignore it */ | 716 | if (sysctl_follow_link(&head, &table)) |
714 | int err = sysctl_follow_link(&head, &table); | 717 | goto out; |
715 | if (err) | ||
716 | goto out; | ||
717 | } | ||
718 | 718 | ||
719 | ret = proc_sys_fill_cache(file, ctx, head, table); | 719 | ret = proc_sys_fill_cache(file, ctx, head, table); |
720 | out: | 720 | out: |
@@ -1086,7 +1086,7 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table) | |||
1086 | if ((table->proc_handler == proc_douintvec) || | 1086 | if ((table->proc_handler == proc_douintvec) || |
1087 | (table->proc_handler == proc_douintvec_minmax)) { | 1087 | (table->proc_handler == proc_douintvec_minmax)) { |
1088 | if (table->maxlen != sizeof(unsigned int)) | 1088 | if (table->maxlen != sizeof(unsigned int)) |
1089 | err |= sysctl_err(path, table, "array now allowed"); | 1089 | err |= sysctl_err(path, table, "array not allowed"); |
1090 | } | 1090 | } |
1091 | 1091 | ||
1092 | return err; | 1092 | return err; |
diff --git a/fs/proc/root.c b/fs/proc/root.c index ede8e64974be..61b7340b357a 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -123,23 +123,13 @@ static struct file_system_type proc_fs_type = { | |||
123 | 123 | ||
124 | void __init proc_root_init(void) | 124 | void __init proc_root_init(void) |
125 | { | 125 | { |
126 | int err; | 126 | proc_init_kmemcache(); |
127 | |||
128 | proc_init_inodecache(); | ||
129 | set_proc_pid_nlink(); | 127 | set_proc_pid_nlink(); |
130 | err = register_filesystem(&proc_fs_type); | ||
131 | if (err) | ||
132 | return; | ||
133 | |||
134 | proc_self_init(); | 128 | proc_self_init(); |
135 | proc_thread_self_init(); | 129 | proc_thread_self_init(); |
136 | proc_symlink("mounts", NULL, "self/mounts"); | 130 | proc_symlink("mounts", NULL, "self/mounts"); |
137 | 131 | ||
138 | proc_net_init(); | 132 | proc_net_init(); |
139 | |||
140 | #ifdef CONFIG_SYSVIPC | ||
141 | proc_mkdir("sysvipc", NULL); | ||
142 | #endif | ||
143 | proc_mkdir("fs", NULL); | 133 | proc_mkdir("fs", NULL); |
144 | proc_mkdir("driver", NULL); | 134 | proc_mkdir("driver", NULL); |
145 | proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */ | 135 | proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */ |
@@ -150,6 +140,8 @@ void __init proc_root_init(void) | |||
150 | proc_tty_init(); | 140 | proc_tty_init(); |
151 | proc_mkdir("bus", NULL); | 141 | proc_mkdir("bus", NULL); |
152 | proc_sys_init(); | 142 | proc_sys_init(); |
143 | |||
144 | register_filesystem(&proc_fs_type); | ||
153 | } | 145 | } |
154 | 146 | ||
155 | static int proc_root_getattr(const struct path *path, struct kstat *stat, | 147 | static int proc_root_getattr(const struct path *path, struct kstat *stat, |
@@ -207,12 +199,13 @@ struct proc_dir_entry proc_root = { | |||
207 | .namelen = 5, | 199 | .namelen = 5, |
208 | .mode = S_IFDIR | S_IRUGO | S_IXUGO, | 200 | .mode = S_IFDIR | S_IRUGO | S_IXUGO, |
209 | .nlink = 2, | 201 | .nlink = 2, |
210 | .count = ATOMIC_INIT(1), | 202 | .refcnt = REFCOUNT_INIT(1), |
211 | .proc_iops = &proc_root_inode_operations, | 203 | .proc_iops = &proc_root_inode_operations, |
212 | .proc_fops = &proc_root_operations, | 204 | .proc_fops = &proc_root_operations, |
213 | .parent = &proc_root, | 205 | .parent = &proc_root, |
214 | .subdir = RB_ROOT_CACHED, | 206 | .subdir = RB_ROOT, |
215 | .name = "/proc", | 207 | .name = proc_root.inline_name, |
208 | .inline_name = "/proc", | ||
216 | }; | 209 | }; |
217 | 210 | ||
218 | int pid_ns_prepare_proc(struct pid_namespace *ns) | 211 | int pid_ns_prepare_proc(struct pid_namespace *ns) |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ec6d2983a5cb..65ae54659833 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -24,6 +24,8 @@ | |||
24 | #include <asm/tlbflush.h> | 24 | #include <asm/tlbflush.h> |
25 | #include "internal.h" | 25 | #include "internal.h" |
26 | 26 | ||
27 | #define SEQ_PUT_DEC(str, val) \ | ||
28 | seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8) | ||
27 | void task_mem(struct seq_file *m, struct mm_struct *mm) | 29 | void task_mem(struct seq_file *m, struct mm_struct *mm) |
28 | { | 30 | { |
29 | unsigned long text, lib, swap, anon, file, shmem; | 31 | unsigned long text, lib, swap, anon, file, shmem; |
@@ -53,39 +55,28 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
53 | lib = (mm->exec_vm << PAGE_SHIFT) - text; | 55 | lib = (mm->exec_vm << PAGE_SHIFT) - text; |
54 | 56 | ||
55 | swap = get_mm_counter(mm, MM_SWAPENTS); | 57 | swap = get_mm_counter(mm, MM_SWAPENTS); |
56 | seq_printf(m, | 58 | SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); |
57 | "VmPeak:\t%8lu kB\n" | 59 | SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); |
58 | "VmSize:\t%8lu kB\n" | 60 | SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); |
59 | "VmLck:\t%8lu kB\n" | 61 | SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm); |
60 | "VmPin:\t%8lu kB\n" | 62 | SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss); |
61 | "VmHWM:\t%8lu kB\n" | 63 | SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss); |
62 | "VmRSS:\t%8lu kB\n" | 64 | SEQ_PUT_DEC(" kB\nRssAnon:\t", anon); |
63 | "RssAnon:\t%8lu kB\n" | 65 | SEQ_PUT_DEC(" kB\nRssFile:\t", file); |
64 | "RssFile:\t%8lu kB\n" | 66 | SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem); |
65 | "RssShmem:\t%8lu kB\n" | 67 | SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm); |
66 | "VmData:\t%8lu kB\n" | 68 | SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm); |
67 | "VmStk:\t%8lu kB\n" | 69 | seq_put_decimal_ull_width(m, |
68 | "VmExe:\t%8lu kB\n" | 70 | " kB\nVmExe:\t", text >> 10, 8); |
69 | "VmLib:\t%8lu kB\n" | 71 | seq_put_decimal_ull_width(m, |
70 | "VmPTE:\t%8lu kB\n" | 72 | " kB\nVmLib:\t", lib >> 10, 8); |
71 | "VmSwap:\t%8lu kB\n", | 73 | seq_put_decimal_ull_width(m, |
72 | hiwater_vm << (PAGE_SHIFT-10), | 74 | " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8); |
73 | total_vm << (PAGE_SHIFT-10), | 75 | SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); |
74 | mm->locked_vm << (PAGE_SHIFT-10), | 76 | seq_puts(m, " kB\n"); |
75 | mm->pinned_vm << (PAGE_SHIFT-10), | ||
76 | hiwater_rss << (PAGE_SHIFT-10), | ||
77 | total_rss << (PAGE_SHIFT-10), | ||
78 | anon << (PAGE_SHIFT-10), | ||
79 | file << (PAGE_SHIFT-10), | ||
80 | shmem << (PAGE_SHIFT-10), | ||
81 | mm->data_vm << (PAGE_SHIFT-10), | ||
82 | mm->stack_vm << (PAGE_SHIFT-10), | ||
83 | text >> 10, | ||
84 | lib >> 10, | ||
85 | mm_pgtables_bytes(mm) >> 10, | ||
86 | swap << (PAGE_SHIFT-10)); | ||
87 | hugetlb_report_usage(m, mm); | 77 | hugetlb_report_usage(m, mm); |
88 | } | 78 | } |
79 | #undef SEQ_PUT_DEC | ||
89 | 80 | ||
90 | unsigned long task_vsize(struct mm_struct *mm) | 81 | unsigned long task_vsize(struct mm_struct *mm) |
91 | { | 82 | { |
@@ -287,15 +278,18 @@ static void show_vma_header_prefix(struct seq_file *m, | |||
287 | dev_t dev, unsigned long ino) | 278 | dev_t dev, unsigned long ino) |
288 | { | 279 | { |
289 | seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); | 280 | seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); |
290 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", | 281 | seq_put_hex_ll(m, NULL, start, 8); |
291 | start, | 282 | seq_put_hex_ll(m, "-", end, 8); |
292 | end, | 283 | seq_putc(m, ' '); |
293 | flags & VM_READ ? 'r' : '-', | 284 | seq_putc(m, flags & VM_READ ? 'r' : '-'); |
294 | flags & VM_WRITE ? 'w' : '-', | 285 | seq_putc(m, flags & VM_WRITE ? 'w' : '-'); |
295 | flags & VM_EXEC ? 'x' : '-', | 286 | seq_putc(m, flags & VM_EXEC ? 'x' : '-'); |
296 | flags & VM_MAYSHARE ? 's' : 'p', | 287 | seq_putc(m, flags & VM_MAYSHARE ? 's' : 'p'); |
297 | pgoff, | 288 | seq_put_hex_ll(m, " ", pgoff, 8); |
298 | MAJOR(dev), MINOR(dev), ino); | 289 | seq_put_hex_ll(m, " ", MAJOR(dev), 2); |
290 | seq_put_hex_ll(m, ":", MINOR(dev), 2); | ||
291 | seq_put_decimal_ull(m, " ", ino); | ||
292 | seq_putc(m, ' '); | ||
299 | } | 293 | } |
300 | 294 | ||
301 | static void | 295 | static void |
@@ -694,8 +688,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) | |||
694 | if (!mnemonics[i][0]) | 688 | if (!mnemonics[i][0]) |
695 | continue; | 689 | continue; |
696 | if (vma->vm_flags & (1UL << i)) { | 690 | if (vma->vm_flags & (1UL << i)) { |
697 | seq_printf(m, "%c%c ", | 691 | seq_putc(m, mnemonics[i][0]); |
698 | mnemonics[i][0], mnemonics[i][1]); | 692 | seq_putc(m, mnemonics[i][1]); |
693 | seq_putc(m, ' '); | ||
699 | } | 694 | } |
700 | } | 695 | } |
701 | seq_putc(m, '\n'); | 696 | seq_putc(m, '\n'); |
@@ -736,6 +731,8 @@ void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma) | |||
736 | { | 731 | { |
737 | } | 732 | } |
738 | 733 | ||
734 | #define SEQ_PUT_DEC(str, val) \ | ||
735 | seq_put_decimal_ull_width(m, str, (val) >> 10, 8) | ||
739 | static int show_smap(struct seq_file *m, void *v, int is_pid) | 736 | static int show_smap(struct seq_file *m, void *v, int is_pid) |
740 | { | 737 | { |
741 | struct proc_maps_private *priv = m->private; | 738 | struct proc_maps_private *priv = m->private; |
@@ -809,51 +806,34 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) | |||
809 | ret = SEQ_SKIP; | 806 | ret = SEQ_SKIP; |
810 | } | 807 | } |
811 | 808 | ||
812 | if (!rollup_mode) | 809 | if (!rollup_mode) { |
813 | seq_printf(m, | 810 | SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start); |
814 | "Size: %8lu kB\n" | 811 | SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma)); |
815 | "KernelPageSize: %8lu kB\n" | 812 | SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma)); |
816 | "MMUPageSize: %8lu kB\n", | 813 | seq_puts(m, " kB\n"); |
817 | (vma->vm_end - vma->vm_start) >> 10, | 814 | } |
818 | vma_kernel_pagesize(vma) >> 10, | ||
819 | vma_mmu_pagesize(vma) >> 10); | ||
820 | |||
821 | |||
822 | if (!rollup_mode || last_vma) | ||
823 | seq_printf(m, | ||
824 | "Rss: %8lu kB\n" | ||
825 | "Pss: %8lu kB\n" | ||
826 | "Shared_Clean: %8lu kB\n" | ||
827 | "Shared_Dirty: %8lu kB\n" | ||
828 | "Private_Clean: %8lu kB\n" | ||
829 | "Private_Dirty: %8lu kB\n" | ||
830 | "Referenced: %8lu kB\n" | ||
831 | "Anonymous: %8lu kB\n" | ||
832 | "LazyFree: %8lu kB\n" | ||
833 | "AnonHugePages: %8lu kB\n" | ||
834 | "ShmemPmdMapped: %8lu kB\n" | ||
835 | "Shared_Hugetlb: %8lu kB\n" | ||
836 | "Private_Hugetlb: %7lu kB\n" | ||
837 | "Swap: %8lu kB\n" | ||
838 | "SwapPss: %8lu kB\n" | ||
839 | "Locked: %8lu kB\n", | ||
840 | mss->resident >> 10, | ||
841 | (unsigned long)(mss->pss >> (10 + PSS_SHIFT)), | ||
842 | mss->shared_clean >> 10, | ||
843 | mss->shared_dirty >> 10, | ||
844 | mss->private_clean >> 10, | ||
845 | mss->private_dirty >> 10, | ||
846 | mss->referenced >> 10, | ||
847 | mss->anonymous >> 10, | ||
848 | mss->lazyfree >> 10, | ||
849 | mss->anonymous_thp >> 10, | ||
850 | mss->shmem_thp >> 10, | ||
851 | mss->shared_hugetlb >> 10, | ||
852 | mss->private_hugetlb >> 10, | ||
853 | mss->swap >> 10, | ||
854 | (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)), | ||
855 | (unsigned long)(mss->pss >> (10 + PSS_SHIFT))); | ||
856 | 815 | ||
816 | if (!rollup_mode || last_vma) { | ||
817 | SEQ_PUT_DEC("Rss: ", mss->resident); | ||
818 | SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT); | ||
819 | SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean); | ||
820 | SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty); | ||
821 | SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean); | ||
822 | SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty); | ||
823 | SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced); | ||
824 | SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous); | ||
825 | SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree); | ||
826 | SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp); | ||
827 | SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp); | ||
828 | SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb); | ||
829 | seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ", | ||
830 | mss->private_hugetlb >> 10, 7); | ||
831 | SEQ_PUT_DEC(" kB\nSwap: ", mss->swap); | ||
832 | SEQ_PUT_DEC(" kB\nSwapPss: ", | ||
833 | mss->swap_pss >> PSS_SHIFT); | ||
834 | SEQ_PUT_DEC(" kB\nLocked: ", mss->pss >> PSS_SHIFT); | ||
835 | seq_puts(m, " kB\n"); | ||
836 | } | ||
857 | if (!rollup_mode) { | 837 | if (!rollup_mode) { |
858 | arch_show_smap(m, vma); | 838 | arch_show_smap(m, vma); |
859 | show_smap_vma_flags(m, vma); | 839 | show_smap_vma_flags(m, vma); |
@@ -861,6 +841,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) | |||
861 | m_cache_vma(m, vma); | 841 | m_cache_vma(m, vma); |
862 | return ret; | 842 | return ret; |
863 | } | 843 | } |
844 | #undef SEQ_PUT_DEC | ||
864 | 845 | ||
865 | static int show_pid_smap(struct seq_file *m, void *v) | 846 | static int show_pid_smap(struct seq_file *m, void *v) |
866 | { | 847 | { |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 70057359fbaf..23148c3ed675 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super_block *super, | |||
2643 | if (IS_ERR(journal->j_dev_bd)) { | 2643 | if (IS_ERR(journal->j_dev_bd)) { |
2644 | result = PTR_ERR(journal->j_dev_bd); | 2644 | result = PTR_ERR(journal->j_dev_bd); |
2645 | journal->j_dev_bd = NULL; | 2645 | journal->j_dev_bd = NULL; |
2646 | reiserfs_warning(super, | 2646 | reiserfs_warning(super, "sh-457", |
2647 | "journal_init_dev: Cannot open '%s': %i", | 2647 | "journal_init_dev: Cannot open '%s': %i", |
2648 | jdev_name, result); | 2648 | jdev_name, result); |
2649 | return result; | 2649 | return result; |
diff --git a/fs/seq_file.c b/fs/seq_file.c index eea09f6d8830..c6c27f1f9c98 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -6,6 +6,7 @@ | |||
6 | * initial implementation -- AV, Oct 2001. | 6 | * initial implementation -- AV, Oct 2001. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/cache.h> | ||
9 | #include <linux/fs.h> | 10 | #include <linux/fs.h> |
10 | #include <linux/export.h> | 11 | #include <linux/export.h> |
11 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
@@ -19,6 +20,8 @@ | |||
19 | #include <linux/uaccess.h> | 20 | #include <linux/uaccess.h> |
20 | #include <asm/page.h> | 21 | #include <asm/page.h> |
21 | 22 | ||
23 | static struct kmem_cache *seq_file_cache __ro_after_init; | ||
24 | |||
22 | static void seq_set_overflow(struct seq_file *m) | 25 | static void seq_set_overflow(struct seq_file *m) |
23 | { | 26 | { |
24 | m->count = m->size; | 27 | m->count = m->size; |
@@ -26,7 +29,7 @@ static void seq_set_overflow(struct seq_file *m) | |||
26 | 29 | ||
27 | static void *seq_buf_alloc(unsigned long size) | 30 | static void *seq_buf_alloc(unsigned long size) |
28 | { | 31 | { |
29 | return kvmalloc(size, GFP_KERNEL); | 32 | return kvmalloc(size, GFP_KERNEL_ACCOUNT); |
30 | } | 33 | } |
31 | 34 | ||
32 | /** | 35 | /** |
@@ -51,7 +54,7 @@ int seq_open(struct file *file, const struct seq_operations *op) | |||
51 | 54 | ||
52 | WARN_ON(file->private_data); | 55 | WARN_ON(file->private_data); |
53 | 56 | ||
54 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 57 | p = kmem_cache_zalloc(seq_file_cache, GFP_KERNEL); |
55 | if (!p) | 58 | if (!p) |
56 | return -ENOMEM; | 59 | return -ENOMEM; |
57 | 60 | ||
@@ -366,7 +369,7 @@ int seq_release(struct inode *inode, struct file *file) | |||
366 | { | 369 | { |
367 | struct seq_file *m = file->private_data; | 370 | struct seq_file *m = file->private_data; |
368 | kvfree(m->buf); | 371 | kvfree(m->buf); |
369 | kfree(m); | 372 | kmem_cache_free(seq_file_cache, m); |
370 | return 0; | 373 | return 0; |
371 | } | 374 | } |
372 | EXPORT_SYMBOL(seq_release); | 375 | EXPORT_SYMBOL(seq_release); |
@@ -563,7 +566,7 @@ static void single_stop(struct seq_file *p, void *v) | |||
563 | int single_open(struct file *file, int (*show)(struct seq_file *, void *), | 566 | int single_open(struct file *file, int (*show)(struct seq_file *, void *), |
564 | void *data) | 567 | void *data) |
565 | { | 568 | { |
566 | struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL); | 569 | struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL_ACCOUNT); |
567 | int res = -ENOMEM; | 570 | int res = -ENOMEM; |
568 | 571 | ||
569 | if (op) { | 572 | if (op) { |
@@ -625,7 +628,7 @@ void *__seq_open_private(struct file *f, const struct seq_operations *ops, | |||
625 | void *private; | 628 | void *private; |
626 | struct seq_file *seq; | 629 | struct seq_file *seq; |
627 | 630 | ||
628 | private = kzalloc(psize, GFP_KERNEL); | 631 | private = kzalloc(psize, GFP_KERNEL_ACCOUNT); |
629 | if (private == NULL) | 632 | if (private == NULL) |
630 | goto out; | 633 | goto out; |
631 | 634 | ||
@@ -673,29 +676,37 @@ void seq_puts(struct seq_file *m, const char *s) | |||
673 | } | 676 | } |
674 | EXPORT_SYMBOL(seq_puts); | 677 | EXPORT_SYMBOL(seq_puts); |
675 | 678 | ||
676 | /* | 679 | /** |
677 | * A helper routine for putting decimal numbers without rich format of printf(). | 680 | * A helper routine for putting decimal numbers without rich format of printf(). |
678 | * only 'unsigned long long' is supported. | 681 | * only 'unsigned long long' is supported. |
679 | * This routine will put strlen(delimiter) + number into seq_file. | 682 | * @m: seq_file identifying the buffer to which data should be written |
683 | * @delimiter: a string which is printed before the number | ||
684 | * @num: the number | ||
685 | * @width: a minimum field width | ||
686 | * | ||
687 | * This routine will put strlen(delimiter) + number into seq_filed. | ||
680 | * This routine is very quick when you show lots of numbers. | 688 | * This routine is very quick when you show lots of numbers. |
681 | * In usual cases, it will be better to use seq_printf(). It's easier to read. | 689 | * In usual cases, it will be better to use seq_printf(). It's easier to read. |
682 | */ | 690 | */ |
683 | void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, | 691 | void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, |
684 | unsigned long long num) | 692 | unsigned long long num, unsigned int width) |
685 | { | 693 | { |
686 | int len; | 694 | int len; |
687 | 695 | ||
688 | if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */ | 696 | if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */ |
689 | goto overflow; | 697 | goto overflow; |
690 | 698 | ||
691 | len = strlen(delimiter); | 699 | if (delimiter && delimiter[0]) { |
692 | if (m->count + len >= m->size) | 700 | if (delimiter[1] == 0) |
693 | goto overflow; | 701 | seq_putc(m, delimiter[0]); |
702 | else | ||
703 | seq_puts(m, delimiter); | ||
704 | } | ||
694 | 705 | ||
695 | memcpy(m->buf + m->count, delimiter, len); | 706 | if (!width) |
696 | m->count += len; | 707 | width = 1; |
697 | 708 | ||
698 | if (m->count + 1 >= m->size) | 709 | if (m->count + width >= m->size) |
699 | goto overflow; | 710 | goto overflow; |
700 | 711 | ||
701 | if (num < 10) { | 712 | if (num < 10) { |
@@ -703,7 +714,7 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, | |||
703 | return; | 714 | return; |
704 | } | 715 | } |
705 | 716 | ||
706 | len = num_to_str(m->buf + m->count, m->size - m->count, num); | 717 | len = num_to_str(m->buf + m->count, m->size - m->count, num, width); |
707 | if (!len) | 718 | if (!len) |
708 | goto overflow; | 719 | goto overflow; |
709 | 720 | ||
@@ -713,8 +724,60 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, | |||
713 | overflow: | 724 | overflow: |
714 | seq_set_overflow(m); | 725 | seq_set_overflow(m); |
715 | } | 726 | } |
727 | |||
728 | void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, | ||
729 | unsigned long long num) | ||
730 | { | ||
731 | return seq_put_decimal_ull_width(m, delimiter, num, 0); | ||
732 | } | ||
716 | EXPORT_SYMBOL(seq_put_decimal_ull); | 733 | EXPORT_SYMBOL(seq_put_decimal_ull); |
717 | 734 | ||
735 | /** | ||
736 | * seq_put_hex_ll - put a number in hexadecimal notation | ||
737 | * @m: seq_file identifying the buffer to which data should be written | ||
738 | * @delimiter: a string which is printed before the number | ||
739 | * @v: the number | ||
740 | * @width: a minimum field width | ||
741 | * | ||
742 | * seq_put_hex_ll(m, "", v, 8) is equal to seq_printf(m, "%08llx", v) | ||
743 | * | ||
744 | * This routine is very quick when you show lots of numbers. | ||
745 | * In usual cases, it will be better to use seq_printf(). It's easier to read. | ||
746 | */ | ||
747 | void seq_put_hex_ll(struct seq_file *m, const char *delimiter, | ||
748 | unsigned long long v, unsigned int width) | ||
749 | { | ||
750 | unsigned int len; | ||
751 | int i; | ||
752 | |||
753 | if (delimiter && delimiter[0]) { | ||
754 | if (delimiter[1] == 0) | ||
755 | seq_putc(m, delimiter[0]); | ||
756 | else | ||
757 | seq_puts(m, delimiter); | ||
758 | } | ||
759 | |||
760 | /* If x is 0, the result of __builtin_clzll is undefined */ | ||
761 | if (v == 0) | ||
762 | len = 1; | ||
763 | else | ||
764 | len = (sizeof(v) * 8 - __builtin_clzll(v) + 3) / 4; | ||
765 | |||
766 | if (len < width) | ||
767 | len = width; | ||
768 | |||
769 | if (m->count + len > m->size) { | ||
770 | seq_set_overflow(m); | ||
771 | return; | ||
772 | } | ||
773 | |||
774 | for (i = len - 1; i >= 0; i--) { | ||
775 | m->buf[m->count + i] = hex_asc[0xf & v]; | ||
776 | v = v >> 4; | ||
777 | } | ||
778 | m->count += len; | ||
779 | } | ||
780 | |||
718 | void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num) | 781 | void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num) |
719 | { | 782 | { |
720 | int len; | 783 | int len; |
@@ -722,12 +785,12 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num | |||
722 | if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */ | 785 | if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */ |
723 | goto overflow; | 786 | goto overflow; |
724 | 787 | ||
725 | len = strlen(delimiter); | 788 | if (delimiter && delimiter[0]) { |
726 | if (m->count + len >= m->size) | 789 | if (delimiter[1] == 0) |
727 | goto overflow; | 790 | seq_putc(m, delimiter[0]); |
728 | 791 | else | |
729 | memcpy(m->buf + m->count, delimiter, len); | 792 | seq_puts(m, delimiter); |
730 | m->count += len; | 793 | } |
731 | 794 | ||
732 | if (m->count + 2 >= m->size) | 795 | if (m->count + 2 >= m->size) |
733 | goto overflow; | 796 | goto overflow; |
@@ -742,7 +805,7 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num | |||
742 | return; | 805 | return; |
743 | } | 806 | } |
744 | 807 | ||
745 | len = num_to_str(m->buf + m->count, m->size - m->count, num); | 808 | len = num_to_str(m->buf + m->count, m->size - m->count, num, 0); |
746 | if (!len) | 809 | if (!len) |
747 | goto overflow; | 810 | goto overflow; |
748 | 811 | ||
@@ -782,8 +845,14 @@ EXPORT_SYMBOL(seq_write); | |||
782 | void seq_pad(struct seq_file *m, char c) | 845 | void seq_pad(struct seq_file *m, char c) |
783 | { | 846 | { |
784 | int size = m->pad_until - m->count; | 847 | int size = m->pad_until - m->count; |
785 | if (size > 0) | 848 | if (size > 0) { |
786 | seq_printf(m, "%*s", size, ""); | 849 | if (size + m->count > m->size) { |
850 | seq_set_overflow(m); | ||
851 | return; | ||
852 | } | ||
853 | memset(m->buf + m->count, ' ', size); | ||
854 | m->count += size; | ||
855 | } | ||
787 | if (c) | 856 | if (c) |
788 | seq_putc(m, c); | 857 | seq_putc(m, c); |
789 | } | 858 | } |
@@ -1040,3 +1109,8 @@ seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head, | |||
1040 | return NULL; | 1109 | return NULL; |
1041 | } | 1110 | } |
1042 | EXPORT_SYMBOL(seq_hlist_next_percpu); | 1111 | EXPORT_SYMBOL(seq_hlist_next_percpu); |
1112 | |||
1113 | void __init seq_file_init(void) | ||
1114 | { | ||
1115 | seq_file_cache = KMEM_CACHE(seq_file, SLAB_ACCOUNT|SLAB_PANIC); | ||
1116 | } | ||
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 436a1de3fcdf..0ab824f574ed 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -1467,19 +1467,8 @@ xfs_vm_set_page_dirty( | |||
1467 | newly_dirty = !TestSetPageDirty(page); | 1467 | newly_dirty = !TestSetPageDirty(page); |
1468 | spin_unlock(&mapping->private_lock); | 1468 | spin_unlock(&mapping->private_lock); |
1469 | 1469 | ||
1470 | if (newly_dirty) { | 1470 | if (newly_dirty) |
1471 | /* sigh - __set_page_dirty() is static, so copy it here, too */ | 1471 | __set_page_dirty(page, mapping, 1); |
1472 | unsigned long flags; | ||
1473 | |||
1474 | spin_lock_irqsave(&mapping->tree_lock, flags); | ||
1475 | if (page->mapping) { /* Race with truncate? */ | ||
1476 | WARN_ON_ONCE(!PageUptodate(page)); | ||
1477 | account_page_dirtied(page, mapping); | ||
1478 | radix_tree_tag_set(&mapping->page_tree, | ||
1479 | page_index(page), PAGECACHE_TAG_DIRTY); | ||
1480 | } | ||
1481 | spin_unlock_irqrestore(&mapping->tree_lock, flags); | ||
1482 | } | ||
1483 | unlock_page_memcg(page); | 1472 | unlock_page_memcg(page); |
1484 | if (newly_dirty) | 1473 | if (newly_dirty) |
1485 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 1474 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); |