diff options
author | Michal Hocko <mhocko@suse.cz> | 2014-05-22 14:54:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-05-23 12:37:29 -0400 |
commit | 6f6acb00514c10be35529402f36ad7a288f08c2e (patch) | |
tree | 9b024724687b638e16ae111a8801eb47a34a18fe /mm | |
parent | 55231e5c898c5c03c14194001e349f40f59bd300 (diff) |
memcg: fix swapcache charge from kernel thread context
Commit 284f39afeaa4 ("mm: memcg: push !mm handling out to page cache
charge function") explicitly checks for page cache charges without any
mm context (from kernel thread context[1]).
This seemed to be the only possible case where memory could be charged
without mm context so commit 03583f1a631c ("memcg: remove unnecessary
!mm check from try_get_mem_cgroup_from_mm()") removed the mm check from
get_mem_cgroup_from_mm(). This however caused another NULL ptr
dereference during early boot when loopback kernel thread splices to
tmpfs as reported by Stephan Kulow:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000360
IP: get_mem_cgroup_from_mm.isra.42+0x2b/0x60
Oops: 0000 [#1] SMP
Modules linked in: btrfs dm_multipath dm_mod scsi_dh multipath raid10 raid456 async_raid6_recov async_memcpy async_pq raid6_pq async_xor xor async_tx raid1 raid0 md_mod parport_pc parport nls_utf8 isofs usb_storage iscsi_ibft iscsi_boot_sysfs arc4 ecb fan thermal nfs lockd fscache nls_iso8859_1 nls_cp437 sg st hid_generic usbhid af_packet sunrpc sr_mod cdrom ata_generic uhci_hcd virtio_net virtio_blk ehci_hcd usbcore ata_piix floppy processor button usb_common virtio_pci virtio_ring virtio edd squashfs loop ppa]
CPU: 0 PID: 97 Comm: loop1 Not tainted 3.15.0-rc5-5-default #1
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
Call Trace:
__mem_cgroup_try_charge_swapin+0x40/0xe0
mem_cgroup_charge_file+0x8b/0xd0
shmem_getpage_gfp+0x66b/0x7b0
shmem_file_splice_read+0x18f/0x430
splice_direct_to_actor+0xa2/0x1c0
do_lo_receive+0x5a/0x60 [loop]
loop_thread+0x298/0x720 [loop]
kthread+0xc6/0xe0
ret_from_fork+0x7c/0xb0
Also Branimir Maksimovic reported the following oops which is tiggered
for the swapcache charge path from the accounting code for kernel threads:
CPU: 1 PID: 160 Comm: kworker/u8:5 Tainted: P OE 3.15.0-rc5-core2-custom #159
Hardware name: System manufacturer System Product Name/MAXIMUSV GENE, BIOS 1903 08/19/2013
task: ffff880404e349b0 ti: ffff88040486a000 task.ti: ffff88040486a000
RIP: get_mem_cgroup_from_mm.isra.42+0x2b/0x60
Call Trace:
__mem_cgroup_try_charge_swapin+0x45/0xf0
mem_cgroup_charge_file+0x9c/0xe0
shmem_getpage_gfp+0x62c/0x770
shmem_write_begin+0x38/0x40
generic_perform_write+0xc5/0x1c0
__generic_file_aio_write+0x1d1/0x3f0
generic_file_aio_write+0x4f/0xc0
do_sync_write+0x5a/0x90
do_acct_process+0x4b1/0x550
acct_process+0x6d/0xa0
do_exit+0x827/0xa70
kthread+0xc3/0xf0
This patch fixes the issue by reintroducing mm check into
get_mem_cgroup_from_mm. We could do the same trick in
__mem_cgroup_try_charge_swapin as we do for the regular page cache path
but it is not worth troubles. The check is not that expensive and it is
better to have get_mem_cgroup_from_mm more robust.
[1] - http://marc.info/?l=linux-mm&m=139463617808941&w=2
Fixes: 03583f1a631c ("memcg: remove unnecessary !mm check from try_get_mem_cgroup_from_mm()")
Reported-and-tested-by: Stephan Kulow <coolo@suse.com>
Reported-by: Branimir Maksimovic <branimir.maksimovic@gmail.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 27 |
1 files changed, 14 insertions, 13 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c47dffdcb246..5177c6d4a2dd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1077,9 +1077,18 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) | |||
1077 | 1077 | ||
1078 | rcu_read_lock(); | 1078 | rcu_read_lock(); |
1079 | do { | 1079 | do { |
1080 | memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 1080 | /* |
1081 | if (unlikely(!memcg)) | 1081 | * Page cache insertions can happen withou an |
1082 | * actual mm context, e.g. during disk probing | ||
1083 | * on boot, loopback IO, acct() writes etc. | ||
1084 | */ | ||
1085 | if (unlikely(!mm)) | ||
1082 | memcg = root_mem_cgroup; | 1086 | memcg = root_mem_cgroup; |
1087 | else { | ||
1088 | memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); | ||
1089 | if (unlikely(!memcg)) | ||
1090 | memcg = root_mem_cgroup; | ||
1091 | } | ||
1083 | } while (!css_tryget(&memcg->css)); | 1092 | } while (!css_tryget(&memcg->css)); |
1084 | rcu_read_unlock(); | 1093 | rcu_read_unlock(); |
1085 | return memcg; | 1094 | return memcg; |
@@ -3958,17 +3967,9 @@ int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm, | |||
3958 | return 0; | 3967 | return 0; |
3959 | } | 3968 | } |
3960 | 3969 | ||
3961 | /* | 3970 | memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true); |
3962 | * Page cache insertions can happen without an actual mm | 3971 | if (!memcg) |
3963 | * context, e.g. during disk probing on boot. | 3972 | return -ENOMEM; |
3964 | */ | ||
3965 | if (unlikely(!mm)) | ||
3966 | memcg = root_mem_cgroup; | ||
3967 | else { | ||
3968 | memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true); | ||
3969 | if (!memcg) | ||
3970 | return -ENOMEM; | ||
3971 | } | ||
3972 | __mem_cgroup_commit_charge(memcg, page, 1, type, false); | 3973 | __mem_cgroup_commit_charge(memcg, page, 1, type, false); |
3973 | return 0; | 3974 | return 0; |
3974 | } | 3975 | } |