diff options
author | Andres Lagar-Cavilla <andreslc@google.com> | 2016-05-19 20:12:47 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-19 22:12:14 -0400 |
commit | 9e18eb29356b7dfd55183bd42cf73919d1590835 (patch) | |
tree | 7d40c6809e6755f18e6e00041511c536c03dc731 /mm | |
parent | 75edd345e8ede51bc8f00672feff5d622f2b3af6 (diff) |
tmpfs: mem_cgroup charge fault to vm_mm not current mm
Although shmem_fault() has been careful to count a major fault to vm_mm,
shmem_getpage_gfp() has been careless in charging a remote access fault
to current->mm owner's memcg instead of to vma->vm_mm owner's memcg:
that is inconsistent with all the mem_cgroup charging on remote access
faults in mm/memory.c.
Fix it by passing fault_mm along with fault_type to
shmem_get_page_gfp(); but in that case, now knowing the right mm, it's
better for it to handle the PGMAJFAULT updates itself.
And let's keep this clutter out of most callers' way: change the common
shmem_getpage() wrapper to hide fault_mm and fault_type as well as gfp.
Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andres Lagar-Cavilla <andreslc@google.com>
Cc: Yang Shi <yang.shi@linaro.org>
Cc: Ning Qu <quning@gmail.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/shmem.c | 61 |
1 files changed, 34 insertions, 27 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 6d2de2c1bf11..e418a995427d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -121,13 +121,14 @@ static bool shmem_should_replace_page(struct page *page, gfp_t gfp); | |||
121 | static int shmem_replace_page(struct page **pagep, gfp_t gfp, | 121 | static int shmem_replace_page(struct page **pagep, gfp_t gfp, |
122 | struct shmem_inode_info *info, pgoff_t index); | 122 | struct shmem_inode_info *info, pgoff_t index); |
123 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, | 123 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, |
124 | struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type); | 124 | struct page **pagep, enum sgp_type sgp, |
125 | gfp_t gfp, struct mm_struct *fault_mm, int *fault_type); | ||
125 | 126 | ||
126 | static inline int shmem_getpage(struct inode *inode, pgoff_t index, | 127 | static inline int shmem_getpage(struct inode *inode, pgoff_t index, |
127 | struct page **pagep, enum sgp_type sgp, int *fault_type) | 128 | struct page **pagep, enum sgp_type sgp) |
128 | { | 129 | { |
129 | return shmem_getpage_gfp(inode, index, pagep, sgp, | 130 | return shmem_getpage_gfp(inode, index, pagep, sgp, |
130 | mapping_gfp_mask(inode->i_mapping), fault_type); | 131 | mapping_gfp_mask(inode->i_mapping), NULL, NULL); |
131 | } | 132 | } |
132 | 133 | ||
133 | static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) | 134 | static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) |
@@ -527,7 +528,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, | |||
527 | 528 | ||
528 | if (partial_start) { | 529 | if (partial_start) { |
529 | struct page *page = NULL; | 530 | struct page *page = NULL; |
530 | shmem_getpage(inode, start - 1, &page, SGP_READ, NULL); | 531 | shmem_getpage(inode, start - 1, &page, SGP_READ); |
531 | if (page) { | 532 | if (page) { |
532 | unsigned int top = PAGE_SIZE; | 533 | unsigned int top = PAGE_SIZE; |
533 | if (start > end) { | 534 | if (start > end) { |
@@ -542,7 +543,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, | |||
542 | } | 543 | } |
543 | if (partial_end) { | 544 | if (partial_end) { |
544 | struct page *page = NULL; | 545 | struct page *page = NULL; |
545 | shmem_getpage(inode, end, &page, SGP_READ, NULL); | 546 | shmem_getpage(inode, end, &page, SGP_READ); |
546 | if (page) { | 547 | if (page) { |
547 | zero_user_segment(page, 0, partial_end); | 548 | zero_user_segment(page, 0, partial_end); |
548 | set_page_dirty(page); | 549 | set_page_dirty(page); |
@@ -1115,14 +1116,19 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, | |||
1115 | * | 1116 | * |
1116 | * If we allocate a new one we do not mark it dirty. That's up to the | 1117 | * If we allocate a new one we do not mark it dirty. That's up to the |
1117 | * vm. If we swap it in we mark it dirty since we also free the swap | 1118 | * vm. If we swap it in we mark it dirty since we also free the swap |
1118 | * entry since a page cannot live in both the swap and page cache | 1119 | * entry since a page cannot live in both the swap and page cache. |
1120 | * | ||
1121 | * fault_mm and fault_type are only supplied by shmem_fault: | ||
1122 | * otherwise they are NULL. | ||
1119 | */ | 1123 | */ |
1120 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, | 1124 | static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, |
1121 | struct page **pagep, enum sgp_type sgp, gfp_t gfp, int *fault_type) | 1125 | struct page **pagep, enum sgp_type sgp, gfp_t gfp, |
1126 | struct mm_struct *fault_mm, int *fault_type) | ||
1122 | { | 1127 | { |
1123 | struct address_space *mapping = inode->i_mapping; | 1128 | struct address_space *mapping = inode->i_mapping; |
1124 | struct shmem_inode_info *info; | 1129 | struct shmem_inode_info *info; |
1125 | struct shmem_sb_info *sbinfo; | 1130 | struct shmem_sb_info *sbinfo; |
1131 | struct mm_struct *charge_mm; | ||
1126 | struct mem_cgroup *memcg; | 1132 | struct mem_cgroup *memcg; |
1127 | struct page *page; | 1133 | struct page *page; |
1128 | swp_entry_t swap; | 1134 | swp_entry_t swap; |
@@ -1168,14 +1174,19 @@ repeat: | |||
1168 | */ | 1174 | */ |
1169 | info = SHMEM_I(inode); | 1175 | info = SHMEM_I(inode); |
1170 | sbinfo = SHMEM_SB(inode->i_sb); | 1176 | sbinfo = SHMEM_SB(inode->i_sb); |
1177 | charge_mm = fault_mm ? : current->mm; | ||
1171 | 1178 | ||
1172 | if (swap.val) { | 1179 | if (swap.val) { |
1173 | /* Look it up and read it in.. */ | 1180 | /* Look it up and read it in.. */ |
1174 | page = lookup_swap_cache(swap); | 1181 | page = lookup_swap_cache(swap); |
1175 | if (!page) { | 1182 | if (!page) { |
1176 | /* here we actually do the io */ | 1183 | /* Or update major stats only when swapin succeeds?? */ |
1177 | if (fault_type) | 1184 | if (fault_type) { |
1178 | *fault_type |= VM_FAULT_MAJOR; | 1185 | *fault_type |= VM_FAULT_MAJOR; |
1186 | count_vm_event(PGMAJFAULT); | ||
1187 | mem_cgroup_count_vm_event(fault_mm, PGMAJFAULT); | ||
1188 | } | ||
1189 | /* Here we actually start the io */ | ||
1179 | page = shmem_swapin(swap, gfp, info, index); | 1190 | page = shmem_swapin(swap, gfp, info, index); |
1180 | if (!page) { | 1191 | if (!page) { |
1181 | error = -ENOMEM; | 1192 | error = -ENOMEM; |
@@ -1202,7 +1213,7 @@ repeat: | |||
1202 | goto failed; | 1213 | goto failed; |
1203 | } | 1214 | } |
1204 | 1215 | ||
1205 | error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg, | 1216 | error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg, |
1206 | false); | 1217 | false); |
1207 | if (!error) { | 1218 | if (!error) { |
1208 | error = shmem_add_to_page_cache(page, mapping, index, | 1219 | error = shmem_add_to_page_cache(page, mapping, index, |
@@ -1263,7 +1274,7 @@ repeat: | |||
1263 | if (sgp == SGP_WRITE) | 1274 | if (sgp == SGP_WRITE) |
1264 | __SetPageReferenced(page); | 1275 | __SetPageReferenced(page); |
1265 | 1276 | ||
1266 | error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg, | 1277 | error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg, |
1267 | false); | 1278 | false); |
1268 | if (error) | 1279 | if (error) |
1269 | goto decused; | 1280 | goto decused; |
@@ -1352,6 +1363,7 @@ unlock: | |||
1352 | static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1363 | static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1353 | { | 1364 | { |
1354 | struct inode *inode = file_inode(vma->vm_file); | 1365 | struct inode *inode = file_inode(vma->vm_file); |
1366 | gfp_t gfp = mapping_gfp_mask(inode->i_mapping); | ||
1355 | int error; | 1367 | int error; |
1356 | int ret = VM_FAULT_LOCKED; | 1368 | int ret = VM_FAULT_LOCKED; |
1357 | 1369 | ||
@@ -1413,14 +1425,10 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1413 | spin_unlock(&inode->i_lock); | 1425 | spin_unlock(&inode->i_lock); |
1414 | } | 1426 | } |
1415 | 1427 | ||
1416 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); | 1428 | error = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE, |
1429 | gfp, vma->vm_mm, &ret); | ||
1417 | if (error) | 1430 | if (error) |
1418 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); | 1431 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); |
1419 | |||
1420 | if (ret & VM_FAULT_MAJOR) { | ||
1421 | count_vm_event(PGMAJFAULT); | ||
1422 | mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); | ||
1423 | } | ||
1424 | return ret; | 1432 | return ret; |
1425 | } | 1433 | } |
1426 | 1434 | ||
@@ -1567,7 +1575,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping, | |||
1567 | return -EPERM; | 1575 | return -EPERM; |
1568 | } | 1576 | } |
1569 | 1577 | ||
1570 | return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); | 1578 | return shmem_getpage(inode, index, pagep, SGP_WRITE); |
1571 | } | 1579 | } |
1572 | 1580 | ||
1573 | static int | 1581 | static int |
@@ -1633,7 +1641,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) | |||
1633 | break; | 1641 | break; |
1634 | } | 1642 | } |
1635 | 1643 | ||
1636 | error = shmem_getpage(inode, index, &page, sgp, NULL); | 1644 | error = shmem_getpage(inode, index, &page, sgp); |
1637 | if (error) { | 1645 | if (error) { |
1638 | if (error == -EINVAL) | 1646 | if (error == -EINVAL) |
1639 | error = 0; | 1647 | error = 0; |
@@ -1749,7 +1757,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, | |||
1749 | error = 0; | 1757 | error = 0; |
1750 | 1758 | ||
1751 | while (spd.nr_pages < nr_pages) { | 1759 | while (spd.nr_pages < nr_pages) { |
1752 | error = shmem_getpage(inode, index, &page, SGP_CACHE, NULL); | 1760 | error = shmem_getpage(inode, index, &page, SGP_CACHE); |
1753 | if (error) | 1761 | if (error) |
1754 | break; | 1762 | break; |
1755 | unlock_page(page); | 1763 | unlock_page(page); |
@@ -1771,8 +1779,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, | |||
1771 | page = spd.pages[page_nr]; | 1779 | page = spd.pages[page_nr]; |
1772 | 1780 | ||
1773 | if (!PageUptodate(page) || page->mapping != mapping) { | 1781 | if (!PageUptodate(page) || page->mapping != mapping) { |
1774 | error = shmem_getpage(inode, index, &page, | 1782 | error = shmem_getpage(inode, index, &page, SGP_CACHE); |
1775 | SGP_CACHE, NULL); | ||
1776 | if (error) | 1783 | if (error) |
1777 | break; | 1784 | break; |
1778 | unlock_page(page); | 1785 | unlock_page(page); |
@@ -2215,8 +2222,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, | |||
2215 | else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) | 2222 | else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) |
2216 | error = -ENOMEM; | 2223 | error = -ENOMEM; |
2217 | else | 2224 | else |
2218 | error = shmem_getpage(inode, index, &page, SGP_FALLOC, | 2225 | error = shmem_getpage(inode, index, &page, SGP_FALLOC); |
2219 | NULL); | ||
2220 | if (error) { | 2226 | if (error) { |
2221 | /* Remove the !PageUptodate pages we added */ | 2227 | /* Remove the !PageUptodate pages we added */ |
2222 | shmem_undo_range(inode, | 2228 | shmem_undo_range(inode, |
@@ -2534,7 +2540,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s | |||
2534 | inode->i_op = &shmem_short_symlink_operations; | 2540 | inode->i_op = &shmem_short_symlink_operations; |
2535 | } else { | 2541 | } else { |
2536 | inode_nohighmem(inode); | 2542 | inode_nohighmem(inode); |
2537 | error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); | 2543 | error = shmem_getpage(inode, 0, &page, SGP_WRITE); |
2538 | if (error) { | 2544 | if (error) { |
2539 | iput(inode); | 2545 | iput(inode); |
2540 | return error; | 2546 | return error; |
@@ -2575,7 +2581,7 @@ static const char *shmem_get_link(struct dentry *dentry, | |||
2575 | return ERR_PTR(-ECHILD); | 2581 | return ERR_PTR(-ECHILD); |
2576 | } | 2582 | } |
2577 | } else { | 2583 | } else { |
2578 | error = shmem_getpage(inode, 0, &page, SGP_READ, NULL); | 2584 | error = shmem_getpage(inode, 0, &page, SGP_READ); |
2579 | if (error) | 2585 | if (error) |
2580 | return ERR_PTR(error); | 2586 | return ERR_PTR(error); |
2581 | unlock_page(page); | 2587 | unlock_page(page); |
@@ -3479,7 +3485,8 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, | |||
3479 | int error; | 3485 | int error; |
3480 | 3486 | ||
3481 | BUG_ON(mapping->a_ops != &shmem_aops); | 3487 | BUG_ON(mapping->a_ops != &shmem_aops); |
3482 | error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, gfp, NULL); | 3488 | error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE, |
3489 | gfp, NULL, NULL); | ||
3483 | if (error) | 3490 | if (error) |
3484 | page = ERR_PTR(error); | 3491 | page = ERR_PTR(error); |
3485 | else | 3492 | else |