diff options
-rw-r--r-- | Documentation/cgroups/memory.txt | 4 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 22 | ||||
-rw-r--r-- | include/net/sock.h | 156 | ||||
-rw-r--r-- | mm/memcontrol.c | 46 | ||||
-rw-r--r-- | net/core/sock.c | 24 |
5 files changed, 235 insertions, 17 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index f2453241142b..23a8dc5319a3 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt | |||
@@ -289,7 +289,9 @@ to trigger slab reclaim when those limits are reached. | |||
289 | 289 | ||
290 | 2.7.1 Current Kernel Memory resources accounted | 290 | 2.7.1 Current Kernel Memory resources accounted |
291 | 291 | ||
292 | None | 292 | * sockets memory pressure: some sockets protocols have memory pressure |
293 | thresholds. The Memory Controller allows them to be controlled individually | ||
294 | per cgroup, instead of globally. | ||
293 | 295 | ||
294 | 3. User Interface | 296 | 3. User Interface |
295 | 297 | ||
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b87068a1a09e..f15021b9f734 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -85,6 +85,8 @@ extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); | |||
85 | extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); | 85 | extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); |
86 | extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); | 86 | extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); |
87 | 87 | ||
88 | extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); | ||
89 | |||
88 | static inline | 90 | static inline |
89 | int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) | 91 | int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) |
90 | { | 92 | { |
@@ -381,5 +383,25 @@ mem_cgroup_print_bad_page(struct page *page) | |||
381 | } | 383 | } |
382 | #endif | 384 | #endif |
383 | 385 | ||
386 | #ifdef CONFIG_INET | ||
387 | enum { | ||
388 | UNDER_LIMIT, | ||
389 | SOFT_LIMIT, | ||
390 | OVER_LIMIT, | ||
391 | }; | ||
392 | |||
393 | struct sock; | ||
394 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | ||
395 | void sock_update_memcg(struct sock *sk); | ||
396 | void sock_release_memcg(struct sock *sk); | ||
397 | #else | ||
398 | static inline void sock_update_memcg(struct sock *sk) | ||
399 | { | ||
400 | } | ||
401 | static inline void sock_release_memcg(struct sock *sk) | ||
402 | { | ||
403 | } | ||
404 | #endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ | ||
405 | #endif /* CONFIG_INET */ | ||
384 | #endif /* _LINUX_MEMCONTROL_H */ | 406 | #endif /* _LINUX_MEMCONTROL_H */ |
385 | 407 | ||
diff --git a/include/net/sock.h b/include/net/sock.h index ed0dbf034539..d5eab256167c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <linux/slab.h> | 54 | #include <linux/slab.h> |
55 | #include <linux/uaccess.h> | 55 | #include <linux/uaccess.h> |
56 | #include <linux/memcontrol.h> | 56 | #include <linux/memcontrol.h> |
57 | #include <linux/res_counter.h> | ||
57 | 58 | ||
58 | #include <linux/filter.h> | 59 | #include <linux/filter.h> |
59 | #include <linux/rculist_nulls.h> | 60 | #include <linux/rculist_nulls.h> |
@@ -168,6 +169,7 @@ struct sock_common { | |||
168 | /* public: */ | 169 | /* public: */ |
169 | }; | 170 | }; |
170 | 171 | ||
172 | struct cg_proto; | ||
171 | /** | 173 | /** |
172 | * struct sock - network layer representation of sockets | 174 | * struct sock - network layer representation of sockets |
173 | * @__sk_common: shared layout with inet_timewait_sock | 175 | * @__sk_common: shared layout with inet_timewait_sock |
@@ -228,6 +230,7 @@ struct sock_common { | |||
228 | * @sk_security: used by security modules | 230 | * @sk_security: used by security modules |
229 | * @sk_mark: generic packet mark | 231 | * @sk_mark: generic packet mark |
230 | * @sk_classid: this socket's cgroup classid | 232 | * @sk_classid: this socket's cgroup classid |
233 | * @sk_cgrp: this socket's cgroup-specific proto data | ||
231 | * @sk_write_pending: a write to stream socket waits to start | 234 | * @sk_write_pending: a write to stream socket waits to start |
232 | * @sk_state_change: callback to indicate change in the state of the sock | 235 | * @sk_state_change: callback to indicate change in the state of the sock |
233 | * @sk_data_ready: callback to indicate there is data to be processed | 236 | * @sk_data_ready: callback to indicate there is data to be processed |
@@ -342,6 +345,7 @@ struct sock { | |||
342 | #endif | 345 | #endif |
343 | __u32 sk_mark; | 346 | __u32 sk_mark; |
344 | u32 sk_classid; | 347 | u32 sk_classid; |
348 | struct cg_proto *sk_cgrp; | ||
345 | void (*sk_state_change)(struct sock *sk); | 349 | void (*sk_state_change)(struct sock *sk); |
346 | void (*sk_data_ready)(struct sock *sk, int bytes); | 350 | void (*sk_data_ready)(struct sock *sk, int bytes); |
347 | void (*sk_write_space)(struct sock *sk); | 351 | void (*sk_write_space)(struct sock *sk); |
@@ -838,6 +842,37 @@ struct proto { | |||
838 | #ifdef SOCK_REFCNT_DEBUG | 842 | #ifdef SOCK_REFCNT_DEBUG |
839 | atomic_t socks; | 843 | atomic_t socks; |
840 | #endif | 844 | #endif |
845 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | ||
846 | /* | ||
847 | * cgroup specific init/deinit functions. Called once for all | ||
848 | * protocols that implement it, from cgroups populate function. | ||
849 | * This function has to setup any files the protocol want to | ||
850 | * appear in the kmem cgroup filesystem. | ||
851 | */ | ||
852 | int (*init_cgroup)(struct cgroup *cgrp, | ||
853 | struct cgroup_subsys *ss); | ||
854 | void (*destroy_cgroup)(struct cgroup *cgrp, | ||
855 | struct cgroup_subsys *ss); | ||
856 | struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); | ||
857 | #endif | ||
858 | }; | ||
859 | |||
860 | struct cg_proto { | ||
861 | void (*enter_memory_pressure)(struct sock *sk); | ||
862 | struct res_counter *memory_allocated; /* Current allocated memory. */ | ||
863 | struct percpu_counter *sockets_allocated; /* Current number of sockets. */ | ||
864 | int *memory_pressure; | ||
865 | long *sysctl_mem; | ||
866 | /* | ||
867 | * memcg field is used to find which memcg we belong directly | ||
868 | * Each memcg struct can hold more than one cg_proto, so container_of | ||
869 | * won't really cut. | ||
870 | * | ||
871 | * The elegant solution would be having an inverse function to | ||
872 | * proto_cgroup in struct proto, but that means polluting the structure | ||
873 | * for everybody, instead of just for memcg users. | ||
874 | */ | ||
875 | struct mem_cgroup *memcg; | ||
841 | }; | 876 | }; |
842 | 877 | ||
843 | extern int proto_register(struct proto *prot, int alloc_slab); | 878 | extern int proto_register(struct proto *prot, int alloc_slab); |
@@ -856,7 +891,7 @@ static inline void sk_refcnt_debug_dec(struct sock *sk) | |||
856 | sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); | 891 | sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); |
857 | } | 892 | } |
858 | 893 | ||
859 | static inline void sk_refcnt_debug_release(const struct sock *sk) | 894 | inline void sk_refcnt_debug_release(const struct sock *sk) |
860 | { | 895 | { |
861 | if (atomic_read(&sk->sk_refcnt) != 1) | 896 | if (atomic_read(&sk->sk_refcnt) != 1) |
862 | printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", | 897 | printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", |
@@ -868,6 +903,24 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) | |||
868 | #define sk_refcnt_debug_release(sk) do { } while (0) | 903 | #define sk_refcnt_debug_release(sk) do { } while (0) |
869 | #endif /* SOCK_REFCNT_DEBUG */ | 904 | #endif /* SOCK_REFCNT_DEBUG */ |
870 | 905 | ||
906 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | ||
907 | extern struct jump_label_key memcg_socket_limit_enabled; | ||
908 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, | ||
909 | struct cg_proto *cg_proto) | ||
910 | { | ||
911 | return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); | ||
912 | } | ||
913 | #define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled) | ||
914 | #else | ||
915 | #define mem_cgroup_sockets_enabled 0 | ||
916 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, | ||
917 | struct cg_proto *cg_proto) | ||
918 | { | ||
919 | return NULL; | ||
920 | } | ||
921 | #endif | ||
922 | |||
923 | |||
871 | static inline bool sk_has_memory_pressure(const struct sock *sk) | 924 | static inline bool sk_has_memory_pressure(const struct sock *sk) |
872 | { | 925 | { |
873 | return sk->sk_prot->memory_pressure != NULL; | 926 | return sk->sk_prot->memory_pressure != NULL; |
@@ -877,6 +930,10 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) | |||
877 | { | 930 | { |
878 | if (!sk->sk_prot->memory_pressure) | 931 | if (!sk->sk_prot->memory_pressure) |
879 | return false; | 932 | return false; |
933 | |||
934 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
935 | return !!*sk->sk_cgrp->memory_pressure; | ||
936 | |||
880 | return !!*sk->sk_prot->memory_pressure; | 937 | return !!*sk->sk_prot->memory_pressure; |
881 | } | 938 | } |
882 | 939 | ||
@@ -884,52 +941,136 @@ static inline void sk_leave_memory_pressure(struct sock *sk) | |||
884 | { | 941 | { |
885 | int *memory_pressure = sk->sk_prot->memory_pressure; | 942 | int *memory_pressure = sk->sk_prot->memory_pressure; |
886 | 943 | ||
887 | if (memory_pressure && *memory_pressure) | 944 | if (!memory_pressure) |
945 | return; | ||
946 | |||
947 | if (*memory_pressure) | ||
888 | *memory_pressure = 0; | 948 | *memory_pressure = 0; |
949 | |||
950 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
951 | struct cg_proto *cg_proto = sk->sk_cgrp; | ||
952 | struct proto *prot = sk->sk_prot; | ||
953 | |||
954 | for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) | ||
955 | if (*cg_proto->memory_pressure) | ||
956 | *cg_proto->memory_pressure = 0; | ||
957 | } | ||
958 | |||
889 | } | 959 | } |
890 | 960 | ||
891 | static inline void sk_enter_memory_pressure(struct sock *sk) | 961 | static inline void sk_enter_memory_pressure(struct sock *sk) |
892 | { | 962 | { |
893 | if (sk->sk_prot->enter_memory_pressure) | 963 | if (!sk->sk_prot->enter_memory_pressure) |
894 | sk->sk_prot->enter_memory_pressure(sk); | 964 | return; |
965 | |||
966 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
967 | struct cg_proto *cg_proto = sk->sk_cgrp; | ||
968 | struct proto *prot = sk->sk_prot; | ||
969 | |||
970 | for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) | ||
971 | cg_proto->enter_memory_pressure(sk); | ||
972 | } | ||
973 | |||
974 | sk->sk_prot->enter_memory_pressure(sk); | ||
895 | } | 975 | } |
896 | 976 | ||
897 | static inline long sk_prot_mem_limits(const struct sock *sk, int index) | 977 | static inline long sk_prot_mem_limits(const struct sock *sk, int index) |
898 | { | 978 | { |
899 | long *prot = sk->sk_prot->sysctl_mem; | 979 | long *prot = sk->sk_prot->sysctl_mem; |
980 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
981 | prot = sk->sk_cgrp->sysctl_mem; | ||
900 | return prot[index]; | 982 | return prot[index]; |
901 | } | 983 | } |
902 | 984 | ||
985 | static inline void memcg_memory_allocated_add(struct cg_proto *prot, | ||
986 | unsigned long amt, | ||
987 | int *parent_status) | ||
988 | { | ||
989 | struct res_counter *fail; | ||
990 | int ret; | ||
991 | |||
992 | ret = res_counter_charge(prot->memory_allocated, | ||
993 | amt << PAGE_SHIFT, &fail); | ||
994 | |||
995 | if (ret < 0) | ||
996 | *parent_status = OVER_LIMIT; | ||
997 | } | ||
998 | |||
999 | static inline void memcg_memory_allocated_sub(struct cg_proto *prot, | ||
1000 | unsigned long amt) | ||
1001 | { | ||
1002 | res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT); | ||
1003 | } | ||
1004 | |||
1005 | static inline u64 memcg_memory_allocated_read(struct cg_proto *prot) | ||
1006 | { | ||
1007 | u64 ret; | ||
1008 | ret = res_counter_read_u64(prot->memory_allocated, RES_USAGE); | ||
1009 | return ret >> PAGE_SHIFT; | ||
1010 | } | ||
1011 | |||
903 | static inline long | 1012 | static inline long |
904 | sk_memory_allocated(const struct sock *sk) | 1013 | sk_memory_allocated(const struct sock *sk) |
905 | { | 1014 | { |
906 | struct proto *prot = sk->sk_prot; | 1015 | struct proto *prot = sk->sk_prot; |
1016 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
1017 | return memcg_memory_allocated_read(sk->sk_cgrp); | ||
1018 | |||
907 | return atomic_long_read(prot->memory_allocated); | 1019 | return atomic_long_read(prot->memory_allocated); |
908 | } | 1020 | } |
909 | 1021 | ||
910 | static inline long | 1022 | static inline long |
911 | sk_memory_allocated_add(struct sock *sk, int amt) | 1023 | sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status) |
912 | { | 1024 | { |
913 | struct proto *prot = sk->sk_prot; | 1025 | struct proto *prot = sk->sk_prot; |
1026 | |||
1027 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
1028 | memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status); | ||
1029 | /* update the root cgroup regardless */ | ||
1030 | atomic_long_add_return(amt, prot->memory_allocated); | ||
1031 | return memcg_memory_allocated_read(sk->sk_cgrp); | ||
1032 | } | ||
1033 | |||
914 | return atomic_long_add_return(amt, prot->memory_allocated); | 1034 | return atomic_long_add_return(amt, prot->memory_allocated); |
915 | } | 1035 | } |
916 | 1036 | ||
917 | static inline void | 1037 | static inline void |
918 | sk_memory_allocated_sub(struct sock *sk, int amt) | 1038 | sk_memory_allocated_sub(struct sock *sk, int amt, int parent_status) |
919 | { | 1039 | { |
920 | struct proto *prot = sk->sk_prot; | 1040 | struct proto *prot = sk->sk_prot; |
1041 | |||
1042 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp && | ||
1043 | parent_status != OVER_LIMIT) /* Otherwise was uncharged already */ | ||
1044 | memcg_memory_allocated_sub(sk->sk_cgrp, amt); | ||
1045 | |||
921 | atomic_long_sub(amt, prot->memory_allocated); | 1046 | atomic_long_sub(amt, prot->memory_allocated); |
922 | } | 1047 | } |
923 | 1048 | ||
924 | static inline void sk_sockets_allocated_dec(struct sock *sk) | 1049 | static inline void sk_sockets_allocated_dec(struct sock *sk) |
925 | { | 1050 | { |
926 | struct proto *prot = sk->sk_prot; | 1051 | struct proto *prot = sk->sk_prot; |
1052 | |||
1053 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
1054 | struct cg_proto *cg_proto = sk->sk_cgrp; | ||
1055 | |||
1056 | for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) | ||
1057 | percpu_counter_dec(cg_proto->sockets_allocated); | ||
1058 | } | ||
1059 | |||
927 | percpu_counter_dec(prot->sockets_allocated); | 1060 | percpu_counter_dec(prot->sockets_allocated); |
928 | } | 1061 | } |
929 | 1062 | ||
930 | static inline void sk_sockets_allocated_inc(struct sock *sk) | 1063 | static inline void sk_sockets_allocated_inc(struct sock *sk) |
931 | { | 1064 | { |
932 | struct proto *prot = sk->sk_prot; | 1065 | struct proto *prot = sk->sk_prot; |
1066 | |||
1067 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
1068 | struct cg_proto *cg_proto = sk->sk_cgrp; | ||
1069 | |||
1070 | for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) | ||
1071 | percpu_counter_inc(cg_proto->sockets_allocated); | ||
1072 | } | ||
1073 | |||
933 | percpu_counter_inc(prot->sockets_allocated); | 1074 | percpu_counter_inc(prot->sockets_allocated); |
934 | } | 1075 | } |
935 | 1076 | ||
@@ -938,6 +1079,9 @@ sk_sockets_allocated_read_positive(struct sock *sk) | |||
938 | { | 1079 | { |
939 | struct proto *prot = sk->sk_prot; | 1080 | struct proto *prot = sk->sk_prot; |
940 | 1081 | ||
1082 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
1083 | return percpu_counter_sum_positive(sk->sk_cgrp->sockets_allocated); | ||
1084 | |||
941 | return percpu_counter_sum_positive(prot->sockets_allocated); | 1085 | return percpu_counter_sum_positive(prot->sockets_allocated); |
942 | } | 1086 | } |
943 | 1087 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9fbcff71245e..3de3901ae0a7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -379,7 +379,48 @@ enum mem_type { | |||
379 | 379 | ||
380 | static void mem_cgroup_get(struct mem_cgroup *memcg); | 380 | static void mem_cgroup_get(struct mem_cgroup *memcg); |
381 | static void mem_cgroup_put(struct mem_cgroup *memcg); | 381 | static void mem_cgroup_put(struct mem_cgroup *memcg); |
382 | static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); | 382 | |
383 | /* Writing them here to avoid exposing memcg's inner layout */ | ||
384 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | ||
385 | #ifdef CONFIG_INET | ||
386 | #include <net/sock.h> | ||
387 | |||
388 | static bool mem_cgroup_is_root(struct mem_cgroup *memcg); | ||
389 | void sock_update_memcg(struct sock *sk) | ||
390 | { | ||
391 | /* A socket spends its whole life in the same cgroup */ | ||
392 | if (sk->sk_cgrp) { | ||
393 | WARN_ON(1); | ||
394 | return; | ||
395 | } | ||
396 | if (static_branch(&memcg_socket_limit_enabled)) { | ||
397 | struct mem_cgroup *memcg; | ||
398 | |||
399 | BUG_ON(!sk->sk_prot->proto_cgroup); | ||
400 | |||
401 | rcu_read_lock(); | ||
402 | memcg = mem_cgroup_from_task(current); | ||
403 | if (!mem_cgroup_is_root(memcg)) { | ||
404 | mem_cgroup_get(memcg); | ||
405 | sk->sk_cgrp = sk->sk_prot->proto_cgroup(memcg); | ||
406 | } | ||
407 | rcu_read_unlock(); | ||
408 | } | ||
409 | } | ||
410 | EXPORT_SYMBOL(sock_update_memcg); | ||
411 | |||
412 | void sock_release_memcg(struct sock *sk) | ||
413 | { | ||
414 | if (static_branch(&memcg_socket_limit_enabled) && sk->sk_cgrp) { | ||
415 | struct mem_cgroup *memcg; | ||
416 | WARN_ON(!sk->sk_cgrp->memcg); | ||
417 | memcg = sk->sk_cgrp->memcg; | ||
418 | mem_cgroup_put(memcg); | ||
419 | } | ||
420 | } | ||
421 | #endif /* CONFIG_INET */ | ||
422 | #endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ | ||
423 | |||
383 | static void drain_all_stock_async(struct mem_cgroup *memcg); | 424 | static void drain_all_stock_async(struct mem_cgroup *memcg); |
384 | 425 | ||
385 | static struct mem_cgroup_per_zone * | 426 | static struct mem_cgroup_per_zone * |
@@ -4932,12 +4973,13 @@ static void mem_cgroup_put(struct mem_cgroup *memcg) | |||
4932 | /* | 4973 | /* |
4933 | * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. | 4974 | * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. |
4934 | */ | 4975 | */ |
4935 | static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) | 4976 | struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) |
4936 | { | 4977 | { |
4937 | if (!memcg->res.parent) | 4978 | if (!memcg->res.parent) |
4938 | return NULL; | 4979 | return NULL; |
4939 | return mem_cgroup_from_res_counter(memcg->res.parent, res); | 4980 | return mem_cgroup_from_res_counter(memcg->res.parent, res); |
4940 | } | 4981 | } |
4982 | EXPORT_SYMBOL(parent_mem_cgroup); | ||
4941 | 4983 | ||
4942 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 4984 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
4943 | static void __init enable_swap_cgroup(void) | 4985 | static void __init enable_swap_cgroup(void) |
diff --git a/net/core/sock.c b/net/core/sock.c index a3d4205e7238..6a871b8fdd20 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -111,6 +111,7 @@ | |||
111 | #include <linux/init.h> | 111 | #include <linux/init.h> |
112 | #include <linux/highmem.h> | 112 | #include <linux/highmem.h> |
113 | #include <linux/user_namespace.h> | 113 | #include <linux/user_namespace.h> |
114 | #include <linux/jump_label.h> | ||
114 | 115 | ||
115 | #include <asm/uaccess.h> | 116 | #include <asm/uaccess.h> |
116 | #include <asm/system.h> | 117 | #include <asm/system.h> |
@@ -142,6 +143,9 @@ | |||
142 | static struct lock_class_key af_family_keys[AF_MAX]; | 143 | static struct lock_class_key af_family_keys[AF_MAX]; |
143 | static struct lock_class_key af_family_slock_keys[AF_MAX]; | 144 | static struct lock_class_key af_family_slock_keys[AF_MAX]; |
144 | 145 | ||
146 | struct jump_label_key memcg_socket_limit_enabled; | ||
147 | EXPORT_SYMBOL(memcg_socket_limit_enabled); | ||
148 | |||
145 | /* | 149 | /* |
146 | * Make lock validator output more readable. (we pre-construct these | 150 | * Make lock validator output more readable. (we pre-construct these |
147 | * strings build-time, so that runtime initialization of socket | 151 | * strings build-time, so that runtime initialization of socket |
@@ -1711,23 +1715,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) | |||
1711 | struct proto *prot = sk->sk_prot; | 1715 | struct proto *prot = sk->sk_prot; |
1712 | int amt = sk_mem_pages(size); | 1716 | int amt = sk_mem_pages(size); |
1713 | long allocated; | 1717 | long allocated; |
1718 | int parent_status = UNDER_LIMIT; | ||
1714 | 1719 | ||
1715 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; | 1720 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; |
1716 | 1721 | ||
1717 | allocated = sk_memory_allocated_add(sk, amt); | 1722 | allocated = sk_memory_allocated_add(sk, amt, &parent_status); |
1718 | 1723 | ||
1719 | /* Under limit. */ | 1724 | /* Under limit. */ |
1720 | if (allocated <= sk_prot_mem_limits(sk, 0)) { | 1725 | if (parent_status == UNDER_LIMIT && |
1726 | allocated <= sk_prot_mem_limits(sk, 0)) { | ||
1721 | sk_leave_memory_pressure(sk); | 1727 | sk_leave_memory_pressure(sk); |
1722 | return 1; | 1728 | return 1; |
1723 | } | 1729 | } |
1724 | 1730 | ||
1725 | /* Under pressure. */ | 1731 | /* Under pressure. (we or our parents) */ |
1726 | if (allocated > sk_prot_mem_limits(sk, 1)) | 1732 | if ((parent_status > SOFT_LIMIT) || |
1733 | allocated > sk_prot_mem_limits(sk, 1)) | ||
1727 | sk_enter_memory_pressure(sk); | 1734 | sk_enter_memory_pressure(sk); |
1728 | 1735 | ||
1729 | /* Over hard limit. */ | 1736 | /* Over hard limit (we or our parents) */ |
1730 | if (allocated > sk_prot_mem_limits(sk, 2)) | 1737 | if ((parent_status == OVER_LIMIT) || |
1738 | (allocated > sk_prot_mem_limits(sk, 2))) | ||
1731 | goto suppress_allocation; | 1739 | goto suppress_allocation; |
1732 | 1740 | ||
1733 | /* guarantee minimum buffer size under pressure */ | 1741 | /* guarantee minimum buffer size under pressure */ |
@@ -1774,7 +1782,7 @@ suppress_allocation: | |||
1774 | /* Alas. Undo changes. */ | 1782 | /* Alas. Undo changes. */ |
1775 | sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; | 1783 | sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; |
1776 | 1784 | ||
1777 | sk_memory_allocated_sub(sk, amt); | 1785 | sk_memory_allocated_sub(sk, amt, parent_status); |
1778 | 1786 | ||
1779 | return 0; | 1787 | return 0; |
1780 | } | 1788 | } |
@@ -1787,7 +1795,7 @@ EXPORT_SYMBOL(__sk_mem_schedule); | |||
1787 | void __sk_mem_reclaim(struct sock *sk) | 1795 | void __sk_mem_reclaim(struct sock *sk) |
1788 | { | 1796 | { |
1789 | sk_memory_allocated_sub(sk, | 1797 | sk_memory_allocated_sub(sk, |
1790 | sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT); | 1798 | sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 0); |
1791 | sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; | 1799 | sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; |
1792 | 1800 | ||
1793 | if (sk_under_memory_pressure(sk) && | 1801 | if (sk_under_memory_pressure(sk) && |