diff options
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 156 |
1 files changed, 150 insertions, 6 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index ed0dbf034539..d5eab256167c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -54,6 +54,7 @@ | |||
54 | #include <linux/slab.h> | 54 | #include <linux/slab.h> |
55 | #include <linux/uaccess.h> | 55 | #include <linux/uaccess.h> |
56 | #include <linux/memcontrol.h> | 56 | #include <linux/memcontrol.h> |
57 | #include <linux/res_counter.h> | ||
57 | 58 | ||
58 | #include <linux/filter.h> | 59 | #include <linux/filter.h> |
59 | #include <linux/rculist_nulls.h> | 60 | #include <linux/rculist_nulls.h> |
@@ -168,6 +169,7 @@ struct sock_common { | |||
168 | /* public: */ | 169 | /* public: */ |
169 | }; | 170 | }; |
170 | 171 | ||
172 | struct cg_proto; | ||
171 | /** | 173 | /** |
172 | * struct sock - network layer representation of sockets | 174 | * struct sock - network layer representation of sockets |
173 | * @__sk_common: shared layout with inet_timewait_sock | 175 | * @__sk_common: shared layout with inet_timewait_sock |
@@ -228,6 +230,7 @@ struct sock_common { | |||
228 | * @sk_security: used by security modules | 230 | * @sk_security: used by security modules |
229 | * @sk_mark: generic packet mark | 231 | * @sk_mark: generic packet mark |
230 | * @sk_classid: this socket's cgroup classid | 232 | * @sk_classid: this socket's cgroup classid |
233 | * @sk_cgrp: this socket's cgroup-specific proto data | ||
231 | * @sk_write_pending: a write to stream socket waits to start | 234 | * @sk_write_pending: a write to stream socket waits to start |
232 | * @sk_state_change: callback to indicate change in the state of the sock | 235 | * @sk_state_change: callback to indicate change in the state of the sock |
233 | * @sk_data_ready: callback to indicate there is data to be processed | 236 | * @sk_data_ready: callback to indicate there is data to be processed |
@@ -342,6 +345,7 @@ struct sock { | |||
342 | #endif | 345 | #endif |
343 | __u32 sk_mark; | 346 | __u32 sk_mark; |
344 | u32 sk_classid; | 347 | u32 sk_classid; |
348 | struct cg_proto *sk_cgrp; | ||
345 | void (*sk_state_change)(struct sock *sk); | 349 | void (*sk_state_change)(struct sock *sk); |
346 | void (*sk_data_ready)(struct sock *sk, int bytes); | 350 | void (*sk_data_ready)(struct sock *sk, int bytes); |
347 | void (*sk_write_space)(struct sock *sk); | 351 | void (*sk_write_space)(struct sock *sk); |
@@ -838,6 +842,37 @@ struct proto { | |||
838 | #ifdef SOCK_REFCNT_DEBUG | 842 | #ifdef SOCK_REFCNT_DEBUG |
839 | atomic_t socks; | 843 | atomic_t socks; |
840 | #endif | 844 | #endif |
845 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | ||
846 | /* | ||
847 | * cgroup specific init/deinit functions. Called once for all | ||
848 | * protocols that implement it, from cgroups populate function. | ||
849 | * This function has to setup any files the protocol want to | ||
850 | * appear in the kmem cgroup filesystem. | ||
851 | */ | ||
852 | int (*init_cgroup)(struct cgroup *cgrp, | ||
853 | struct cgroup_subsys *ss); | ||
854 | void (*destroy_cgroup)(struct cgroup *cgrp, | ||
855 | struct cgroup_subsys *ss); | ||
856 | struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); | ||
857 | #endif | ||
858 | }; | ||
859 | |||
860 | struct cg_proto { | ||
861 | void (*enter_memory_pressure)(struct sock *sk); | ||
862 | struct res_counter *memory_allocated; /* Current allocated memory. */ | ||
863 | struct percpu_counter *sockets_allocated; /* Current number of sockets. */ | ||
864 | int *memory_pressure; | ||
865 | long *sysctl_mem; | ||
866 | /* | ||
867 | * memcg field is used to find which memcg we belong directly | ||
868 | * Each memcg struct can hold more than one cg_proto, so container_of | ||
869 | * won't really cut. | ||
870 | * | ||
871 | * The elegant solution would be having an inverse function to | ||
872 | * proto_cgroup in struct proto, but that means polluting the structure | ||
873 | * for everybody, instead of just for memcg users. | ||
874 | */ | ||
875 | struct mem_cgroup *memcg; | ||
841 | }; | 876 | }; |
842 | 877 | ||
843 | extern int proto_register(struct proto *prot, int alloc_slab); | 878 | extern int proto_register(struct proto *prot, int alloc_slab); |
@@ -856,7 +891,7 @@ static inline void sk_refcnt_debug_dec(struct sock *sk) | |||
856 | sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); | 891 | sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); |
857 | } | 892 | } |
858 | 893 | ||
859 | static inline void sk_refcnt_debug_release(const struct sock *sk) | 894 | inline void sk_refcnt_debug_release(const struct sock *sk) |
860 | { | 895 | { |
861 | if (atomic_read(&sk->sk_refcnt) != 1) | 896 | if (atomic_read(&sk->sk_refcnt) != 1) |
862 | printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", | 897 | printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", |
@@ -868,6 +903,24 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) | |||
868 | #define sk_refcnt_debug_release(sk) do { } while (0) | 903 | #define sk_refcnt_debug_release(sk) do { } while (0) |
869 | #endif /* SOCK_REFCNT_DEBUG */ | 904 | #endif /* SOCK_REFCNT_DEBUG */ |
870 | 905 | ||
906 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | ||
907 | extern struct jump_label_key memcg_socket_limit_enabled; | ||
908 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, | ||
909 | struct cg_proto *cg_proto) | ||
910 | { | ||
911 | return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); | ||
912 | } | ||
913 | #define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled) | ||
914 | #else | ||
915 | #define mem_cgroup_sockets_enabled 0 | ||
916 | static inline struct cg_proto *parent_cg_proto(struct proto *proto, | ||
917 | struct cg_proto *cg_proto) | ||
918 | { | ||
919 | return NULL; | ||
920 | } | ||
921 | #endif | ||
922 | |||
923 | |||
871 | static inline bool sk_has_memory_pressure(const struct sock *sk) | 924 | static inline bool sk_has_memory_pressure(const struct sock *sk) |
872 | { | 925 | { |
873 | return sk->sk_prot->memory_pressure != NULL; | 926 | return sk->sk_prot->memory_pressure != NULL; |
@@ -877,6 +930,10 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) | |||
877 | { | 930 | { |
878 | if (!sk->sk_prot->memory_pressure) | 931 | if (!sk->sk_prot->memory_pressure) |
879 | return false; | 932 | return false; |
933 | |||
934 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
935 | return !!*sk->sk_cgrp->memory_pressure; | ||
936 | |||
880 | return !!*sk->sk_prot->memory_pressure; | 937 | return !!*sk->sk_prot->memory_pressure; |
881 | } | 938 | } |
882 | 939 | ||
@@ -884,52 +941,136 @@ static inline void sk_leave_memory_pressure(struct sock *sk) | |||
884 | { | 941 | { |
885 | int *memory_pressure = sk->sk_prot->memory_pressure; | 942 | int *memory_pressure = sk->sk_prot->memory_pressure; |
886 | 943 | ||
887 | if (memory_pressure && *memory_pressure) | 944 | if (!memory_pressure) |
945 | return; | ||
946 | |||
947 | if (*memory_pressure) | ||
888 | *memory_pressure = 0; | 948 | *memory_pressure = 0; |
949 | |||
950 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
951 | struct cg_proto *cg_proto = sk->sk_cgrp; | ||
952 | struct proto *prot = sk->sk_prot; | ||
953 | |||
954 | for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) | ||
955 | if (*cg_proto->memory_pressure) | ||
956 | *cg_proto->memory_pressure = 0; | ||
957 | } | ||
958 | |||
889 | } | 959 | } |
890 | 960 | ||
891 | static inline void sk_enter_memory_pressure(struct sock *sk) | 961 | static inline void sk_enter_memory_pressure(struct sock *sk) |
892 | { | 962 | { |
893 | if (sk->sk_prot->enter_memory_pressure) | 963 | if (!sk->sk_prot->enter_memory_pressure) |
894 | sk->sk_prot->enter_memory_pressure(sk); | 964 | return; |
965 | |||
966 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
967 | struct cg_proto *cg_proto = sk->sk_cgrp; | ||
968 | struct proto *prot = sk->sk_prot; | ||
969 | |||
970 | for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) | ||
971 | cg_proto->enter_memory_pressure(sk); | ||
972 | } | ||
973 | |||
974 | sk->sk_prot->enter_memory_pressure(sk); | ||
895 | } | 975 | } |
896 | 976 | ||
897 | static inline long sk_prot_mem_limits(const struct sock *sk, int index) | 977 | static inline long sk_prot_mem_limits(const struct sock *sk, int index) |
898 | { | 978 | { |
899 | long *prot = sk->sk_prot->sysctl_mem; | 979 | long *prot = sk->sk_prot->sysctl_mem; |
980 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
981 | prot = sk->sk_cgrp->sysctl_mem; | ||
900 | return prot[index]; | 982 | return prot[index]; |
901 | } | 983 | } |
902 | 984 | ||
985 | static inline void memcg_memory_allocated_add(struct cg_proto *prot, | ||
986 | unsigned long amt, | ||
987 | int *parent_status) | ||
988 | { | ||
989 | struct res_counter *fail; | ||
990 | int ret; | ||
991 | |||
992 | ret = res_counter_charge(prot->memory_allocated, | ||
993 | amt << PAGE_SHIFT, &fail); | ||
994 | |||
995 | if (ret < 0) | ||
996 | *parent_status = OVER_LIMIT; | ||
997 | } | ||
998 | |||
999 | static inline void memcg_memory_allocated_sub(struct cg_proto *prot, | ||
1000 | unsigned long amt) | ||
1001 | { | ||
1002 | res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT); | ||
1003 | } | ||
1004 | |||
1005 | static inline u64 memcg_memory_allocated_read(struct cg_proto *prot) | ||
1006 | { | ||
1007 | u64 ret; | ||
1008 | ret = res_counter_read_u64(prot->memory_allocated, RES_USAGE); | ||
1009 | return ret >> PAGE_SHIFT; | ||
1010 | } | ||
1011 | |||
903 | static inline long | 1012 | static inline long |
904 | sk_memory_allocated(const struct sock *sk) | 1013 | sk_memory_allocated(const struct sock *sk) |
905 | { | 1014 | { |
906 | struct proto *prot = sk->sk_prot; | 1015 | struct proto *prot = sk->sk_prot; |
1016 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
1017 | return memcg_memory_allocated_read(sk->sk_cgrp); | ||
1018 | |||
907 | return atomic_long_read(prot->memory_allocated); | 1019 | return atomic_long_read(prot->memory_allocated); |
908 | } | 1020 | } |
909 | 1021 | ||
910 | static inline long | 1022 | static inline long |
911 | sk_memory_allocated_add(struct sock *sk, int amt) | 1023 | sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status) |
912 | { | 1024 | { |
913 | struct proto *prot = sk->sk_prot; | 1025 | struct proto *prot = sk->sk_prot; |
1026 | |||
1027 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
1028 | memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status); | ||
1029 | /* update the root cgroup regardless */ | ||
1030 | atomic_long_add_return(amt, prot->memory_allocated); | ||
1031 | return memcg_memory_allocated_read(sk->sk_cgrp); | ||
1032 | } | ||
1033 | |||
914 | return atomic_long_add_return(amt, prot->memory_allocated); | 1034 | return atomic_long_add_return(amt, prot->memory_allocated); |
915 | } | 1035 | } |
916 | 1036 | ||
917 | static inline void | 1037 | static inline void |
918 | sk_memory_allocated_sub(struct sock *sk, int amt) | 1038 | sk_memory_allocated_sub(struct sock *sk, int amt, int parent_status) |
919 | { | 1039 | { |
920 | struct proto *prot = sk->sk_prot; | 1040 | struct proto *prot = sk->sk_prot; |
1041 | |||
1042 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp && | ||
1043 | parent_status != OVER_LIMIT) /* Otherwise was uncharged already */ | ||
1044 | memcg_memory_allocated_sub(sk->sk_cgrp, amt); | ||
1045 | |||
921 | atomic_long_sub(amt, prot->memory_allocated); | 1046 | atomic_long_sub(amt, prot->memory_allocated); |
922 | } | 1047 | } |
923 | 1048 | ||
924 | static inline void sk_sockets_allocated_dec(struct sock *sk) | 1049 | static inline void sk_sockets_allocated_dec(struct sock *sk) |
925 | { | 1050 | { |
926 | struct proto *prot = sk->sk_prot; | 1051 | struct proto *prot = sk->sk_prot; |
1052 | |||
1053 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
1054 | struct cg_proto *cg_proto = sk->sk_cgrp; | ||
1055 | |||
1056 | for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) | ||
1057 | percpu_counter_dec(cg_proto->sockets_allocated); | ||
1058 | } | ||
1059 | |||
927 | percpu_counter_dec(prot->sockets_allocated); | 1060 | percpu_counter_dec(prot->sockets_allocated); |
928 | } | 1061 | } |
929 | 1062 | ||
930 | static inline void sk_sockets_allocated_inc(struct sock *sk) | 1063 | static inline void sk_sockets_allocated_inc(struct sock *sk) |
931 | { | 1064 | { |
932 | struct proto *prot = sk->sk_prot; | 1065 | struct proto *prot = sk->sk_prot; |
1066 | |||
1067 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { | ||
1068 | struct cg_proto *cg_proto = sk->sk_cgrp; | ||
1069 | |||
1070 | for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto)) | ||
1071 | percpu_counter_inc(cg_proto->sockets_allocated); | ||
1072 | } | ||
1073 | |||
933 | percpu_counter_inc(prot->sockets_allocated); | 1074 | percpu_counter_inc(prot->sockets_allocated); |
934 | } | 1075 | } |
935 | 1076 | ||
@@ -938,6 +1079,9 @@ sk_sockets_allocated_read_positive(struct sock *sk) | |||
938 | { | 1079 | { |
939 | struct proto *prot = sk->sk_prot; | 1080 | struct proto *prot = sk->sk_prot; |
940 | 1081 | ||
1082 | if (mem_cgroup_sockets_enabled && sk->sk_cgrp) | ||
1083 | return percpu_counter_sum_positive(sk->sk_cgrp->sockets_allocated); | ||
1084 | |||
941 | return percpu_counter_sum_positive(prot->sockets_allocated); | 1085 | return percpu_counter_sum_positive(prot->sockets_allocated); |
942 | } | 1086 | } |
943 | 1087 | ||