diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 12 | ||||
-rw-r--r-- | mm/memcontrol.c | 41 | ||||
-rw-r--r-- | mm/shmem.c | 36 |
3 files changed, 80 insertions, 9 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bccd5a628ea6..33a5dc492810 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1481,8 +1481,18 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, | |||
1481 | pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); | 1481 | pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); |
1482 | VM_BUG_ON(!pmd_none(*new_pmd)); | 1482 | VM_BUG_ON(!pmd_none(*new_pmd)); |
1483 | set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); | 1483 | set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); |
1484 | if (new_ptl != old_ptl) | 1484 | if (new_ptl != old_ptl) { |
1485 | pgtable_t pgtable; | ||
1486 | |||
1487 | /* | ||
1488 | * Move preallocated PTE page table if new_pmd is on | ||
1489 | * different PMD page table. | ||
1490 | */ | ||
1491 | pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); | ||
1492 | pgtable_trans_huge_deposit(mm, new_pmd, pgtable); | ||
1493 | |||
1485 | spin_unlock(new_ptl); | 1494 | spin_unlock(new_ptl); |
1495 | } | ||
1486 | spin_unlock(old_ptl); | 1496 | spin_unlock(old_ptl); |
1487 | } | 1497 | } |
1488 | out: | 1498 | out: |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f1a0ae6e11b8..bf5e89457149 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2694,7 +2694,10 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
2694 | goto bypass; | 2694 | goto bypass; |
2695 | 2695 | ||
2696 | if (unlikely(task_in_memcg_oom(current))) | 2696 | if (unlikely(task_in_memcg_oom(current))) |
2697 | goto bypass; | 2697 | goto nomem; |
2698 | |||
2699 | if (gfp_mask & __GFP_NOFAIL) | ||
2700 | oom = false; | ||
2698 | 2701 | ||
2699 | /* | 2702 | /* |
2700 | * We always charge the cgroup the mm_struct belongs to. | 2703 | * We always charge the cgroup the mm_struct belongs to. |
@@ -6352,6 +6355,42 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) | |||
6352 | static void mem_cgroup_css_free(struct cgroup_subsys_state *css) | 6355 | static void mem_cgroup_css_free(struct cgroup_subsys_state *css) |
6353 | { | 6356 | { |
6354 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 6357 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
6358 | /* | ||
6359 | * XXX: css_offline() would be where we should reparent all | ||
6360 | * memory to prepare the cgroup for destruction. However, | ||
6361 | * memcg does not do css_tryget() and res_counter charging | ||
6362 | * under the same RCU lock region, which means that charging | ||
6363 | * could race with offlining. Offlining only happens to | ||
6364 | * cgroups with no tasks in them but charges can show up | ||
6365 | * without any tasks from the swapin path when the target | ||
6366 | * memcg is looked up from the swapout record and not from the | ||
6367 | * current task as it usually is. A race like this can leak | ||
6368 | * charges and put pages with stale cgroup pointers into | ||
6369 | * circulation: | ||
6370 | * | ||
6371 | * #0 #1 | ||
6372 | * lookup_swap_cgroup_id() | ||
6373 | * rcu_read_lock() | ||
6374 | * mem_cgroup_lookup() | ||
6375 | * css_tryget() | ||
6376 | * rcu_read_unlock() | ||
6377 | * disable css_tryget() | ||
6378 | * call_rcu() | ||
6379 | * offline_css() | ||
6380 | * reparent_charges() | ||
6381 | * res_counter_charge() | ||
6382 | * css_put() | ||
6383 | * css_free() | ||
6384 | * pc->mem_cgroup = dead memcg | ||
6385 | * add page to lru | ||
6386 | * | ||
6387 | * The bulk of the charges are still moved in offline_css() to | ||
6388 | * avoid pinning a lot of pages in case a long-term reference | ||
6389 | * like a swapout record is deferring the css_free() to long | ||
6390 | * after offlining. But this makes sure we catch any charges | ||
6391 | * made after offlining: | ||
6392 | */ | ||
6393 | mem_cgroup_reparent_charges(memcg); | ||
6355 | 6394 | ||
6356 | memcg_destroy_kmem(memcg); | 6395 | memcg_destroy_kmem(memcg); |
6357 | __mem_cgroup_free(memcg); | 6396 | __mem_cgroup_free(memcg); |
diff --git a/mm/shmem.c b/mm/shmem.c index 8297623fcaed..902a14842b74 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -2918,13 +2918,8 @@ static struct dentry_operations anon_ops = { | |||
2918 | .d_dname = simple_dname | 2918 | .d_dname = simple_dname |
2919 | }; | 2919 | }; |
2920 | 2920 | ||
2921 | /** | 2921 | static struct file *__shmem_file_setup(const char *name, loff_t size, |
2922 | * shmem_file_setup - get an unlinked file living in tmpfs | 2922 | unsigned long flags, unsigned int i_flags) |
2923 | * @name: name for dentry (to be seen in /proc/<pid>/maps | ||
2924 | * @size: size to be set for the file | ||
2925 | * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size | ||
2926 | */ | ||
2927 | struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) | ||
2928 | { | 2923 | { |
2929 | struct file *res; | 2924 | struct file *res; |
2930 | struct inode *inode; | 2925 | struct inode *inode; |
@@ -2957,6 +2952,7 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags | |||
2957 | if (!inode) | 2952 | if (!inode) |
2958 | goto put_dentry; | 2953 | goto put_dentry; |
2959 | 2954 | ||
2955 | inode->i_flags |= i_flags; | ||
2960 | d_instantiate(path.dentry, inode); | 2956 | d_instantiate(path.dentry, inode); |
2961 | inode->i_size = size; | 2957 | inode->i_size = size; |
2962 | clear_nlink(inode); /* It is unlinked */ | 2958 | clear_nlink(inode); /* It is unlinked */ |
@@ -2977,6 +2973,32 @@ put_memory: | |||
2977 | shmem_unacct_size(flags, size); | 2973 | shmem_unacct_size(flags, size); |
2978 | return res; | 2974 | return res; |
2979 | } | 2975 | } |
2976 | |||
2977 | /** | ||
2978 | * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be | ||
2979 | * kernel internal. There will be NO LSM permission checks against the | ||
2980 | * underlying inode. So users of this interface must do LSM checks at a | ||
2981 | * higher layer. The one user is the big_key implementation. LSM checks | ||
2982 | * are provided at the key level rather than the inode level. | ||
2983 | * @name: name for dentry (to be seen in /proc/<pid>/maps | ||
2984 | * @size: size to be set for the file | ||
2985 | * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size | ||
2986 | */ | ||
2987 | struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags) | ||
2988 | { | ||
2989 | return __shmem_file_setup(name, size, flags, S_PRIVATE); | ||
2990 | } | ||
2991 | |||
2992 | /** | ||
2993 | * shmem_file_setup - get an unlinked file living in tmpfs | ||
2994 | * @name: name for dentry (to be seen in /proc/<pid>/maps | ||
2995 | * @size: size to be set for the file | ||
2996 | * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size | ||
2997 | */ | ||
2998 | struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) | ||
2999 | { | ||
3000 | return __shmem_file_setup(name, size, flags, 0); | ||
3001 | } | ||
2980 | EXPORT_SYMBOL_GPL(shmem_file_setup); | 3002 | EXPORT_SYMBOL_GPL(shmem_file_setup); |
2981 | 3003 | ||
2982 | /** | 3004 | /** |