aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
authorGlauber Costa <glommer@parallels.com>2011-12-11 16:47:03 -0500
committerDavid S. Miller <davem@davemloft.net>2011-12-12 19:04:10 -0500
commite1aab161e0135aafcd439be20b4f35e4b0922d95 (patch)
treed0bcdf7a34a34020079238027b431ffc6dece307 /include/net/sock.h
parent180d8cd942ce336b2c869d324855c40c5db478ad (diff)
socket: initial cgroup code.
The goal of this work is to move the memory pressure tcp controls to a cgroup, instead of just relying on global conditions. To avoid excessive overhead in the network fast paths, the code that accounts allocated memory to a cgroup is hidden inside a static_branch(). This branch is patched out until the first non-root cgroup is created. So when nobody is using cgroups, even if it is mounted, no significant performance penalty should be seen. This patch handles the generic part of the code, and has nothing tcp-specific. Signed-off-by: Glauber Costa <glommer@parallels.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujtsu.com> CC: Kirill A. Shutemov <kirill@shutemov.name> CC: David S. Miller <davem@davemloft.net> CC: Eric W. Biederman <ebiederm@xmission.com> CC: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h156
1 files changed, 150 insertions, 6 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index ed0dbf034539..d5eab256167c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -54,6 +54,7 @@
54#include <linux/slab.h> 54#include <linux/slab.h>
55#include <linux/uaccess.h> 55#include <linux/uaccess.h>
56#include <linux/memcontrol.h> 56#include <linux/memcontrol.h>
57#include <linux/res_counter.h>
57 58
58#include <linux/filter.h> 59#include <linux/filter.h>
59#include <linux/rculist_nulls.h> 60#include <linux/rculist_nulls.h>
@@ -168,6 +169,7 @@ struct sock_common {
168 /* public: */ 169 /* public: */
169}; 170};
170 171
172struct cg_proto;
171/** 173/**
172 * struct sock - network layer representation of sockets 174 * struct sock - network layer representation of sockets
173 * @__sk_common: shared layout with inet_timewait_sock 175 * @__sk_common: shared layout with inet_timewait_sock
@@ -228,6 +230,7 @@ struct sock_common {
228 * @sk_security: used by security modules 230 * @sk_security: used by security modules
229 * @sk_mark: generic packet mark 231 * @sk_mark: generic packet mark
230 * @sk_classid: this socket's cgroup classid 232 * @sk_classid: this socket's cgroup classid
233 * @sk_cgrp: this socket's cgroup-specific proto data
231 * @sk_write_pending: a write to stream socket waits to start 234 * @sk_write_pending: a write to stream socket waits to start
232 * @sk_state_change: callback to indicate change in the state of the sock 235 * @sk_state_change: callback to indicate change in the state of the sock
233 * @sk_data_ready: callback to indicate there is data to be processed 236 * @sk_data_ready: callback to indicate there is data to be processed
@@ -342,6 +345,7 @@ struct sock {
342#endif 345#endif
343 __u32 sk_mark; 346 __u32 sk_mark;
344 u32 sk_classid; 347 u32 sk_classid;
348 struct cg_proto *sk_cgrp;
345 void (*sk_state_change)(struct sock *sk); 349 void (*sk_state_change)(struct sock *sk);
346 void (*sk_data_ready)(struct sock *sk, int bytes); 350 void (*sk_data_ready)(struct sock *sk, int bytes);
347 void (*sk_write_space)(struct sock *sk); 351 void (*sk_write_space)(struct sock *sk);
@@ -838,6 +842,37 @@ struct proto {
838#ifdef SOCK_REFCNT_DEBUG 842#ifdef SOCK_REFCNT_DEBUG
839 atomic_t socks; 843 atomic_t socks;
840#endif 844#endif
845#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
846 /*
847 * cgroup specific init/deinit functions. Called once for all
848 * protocols that implement it, from cgroups populate function.
849 * This function has to setup any files the protocol want to
850 * appear in the kmem cgroup filesystem.
851 */
852 int (*init_cgroup)(struct cgroup *cgrp,
853 struct cgroup_subsys *ss);
854 void (*destroy_cgroup)(struct cgroup *cgrp,
855 struct cgroup_subsys *ss);
856 struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg);
857#endif
858};
859
860struct cg_proto {
861 void (*enter_memory_pressure)(struct sock *sk);
862 struct res_counter *memory_allocated; /* Current allocated memory. */
863 struct percpu_counter *sockets_allocated; /* Current number of sockets. */
864 int *memory_pressure;
865 long *sysctl_mem;
866 /*
867 * memcg field is used to find which memcg we belong directly
868 * Each memcg struct can hold more than one cg_proto, so container_of
869 * won't really cut.
870 *
871 * The elegant solution would be having an inverse function to
872 * proto_cgroup in struct proto, but that means polluting the structure
873 * for everybody, instead of just for memcg users.
874 */
875 struct mem_cgroup *memcg;
841}; 876};
842 877
843extern int proto_register(struct proto *prot, int alloc_slab); 878extern int proto_register(struct proto *prot, int alloc_slab);
@@ -856,7 +891,7 @@ static inline void sk_refcnt_debug_dec(struct sock *sk)
856 sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); 891 sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks));
857} 892}
858 893
859static inline void sk_refcnt_debug_release(const struct sock *sk) 894inline void sk_refcnt_debug_release(const struct sock *sk)
860{ 895{
861 if (atomic_read(&sk->sk_refcnt) != 1) 896 if (atomic_read(&sk->sk_refcnt) != 1)
862 printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", 897 printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
@@ -868,6 +903,24 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
868#define sk_refcnt_debug_release(sk) do { } while (0) 903#define sk_refcnt_debug_release(sk) do { } while (0)
869#endif /* SOCK_REFCNT_DEBUG */ 904#endif /* SOCK_REFCNT_DEBUG */
870 905
906#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
907extern struct jump_label_key memcg_socket_limit_enabled;
908static inline struct cg_proto *parent_cg_proto(struct proto *proto,
909 struct cg_proto *cg_proto)
910{
911 return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg));
912}
913#define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled)
914#else
915#define mem_cgroup_sockets_enabled 0
916static inline struct cg_proto *parent_cg_proto(struct proto *proto,
917 struct cg_proto *cg_proto)
918{
919 return NULL;
920}
921#endif
922
923
871static inline bool sk_has_memory_pressure(const struct sock *sk) 924static inline bool sk_has_memory_pressure(const struct sock *sk)
872{ 925{
873 return sk->sk_prot->memory_pressure != NULL; 926 return sk->sk_prot->memory_pressure != NULL;
@@ -877,6 +930,10 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
877{ 930{
878 if (!sk->sk_prot->memory_pressure) 931 if (!sk->sk_prot->memory_pressure)
879 return false; 932 return false;
933
934 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
935 return !!*sk->sk_cgrp->memory_pressure;
936
880 return !!*sk->sk_prot->memory_pressure; 937 return !!*sk->sk_prot->memory_pressure;
881} 938}
882 939
@@ -884,52 +941,136 @@ static inline void sk_leave_memory_pressure(struct sock *sk)
884{ 941{
885 int *memory_pressure = sk->sk_prot->memory_pressure; 942 int *memory_pressure = sk->sk_prot->memory_pressure;
886 943
887 if (memory_pressure && *memory_pressure) 944 if (!memory_pressure)
945 return;
946
947 if (*memory_pressure)
888 *memory_pressure = 0; 948 *memory_pressure = 0;
949
950 if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
951 struct cg_proto *cg_proto = sk->sk_cgrp;
952 struct proto *prot = sk->sk_prot;
953
954 for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
955 if (*cg_proto->memory_pressure)
956 *cg_proto->memory_pressure = 0;
957 }
958
889} 959}
890 960
891static inline void sk_enter_memory_pressure(struct sock *sk) 961static inline void sk_enter_memory_pressure(struct sock *sk)
892{ 962{
893 if (sk->sk_prot->enter_memory_pressure) 963 if (!sk->sk_prot->enter_memory_pressure)
894 sk->sk_prot->enter_memory_pressure(sk); 964 return;
965
966 if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
967 struct cg_proto *cg_proto = sk->sk_cgrp;
968 struct proto *prot = sk->sk_prot;
969
970 for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
971 cg_proto->enter_memory_pressure(sk);
972 }
973
974 sk->sk_prot->enter_memory_pressure(sk);
895} 975}
896 976
897static inline long sk_prot_mem_limits(const struct sock *sk, int index) 977static inline long sk_prot_mem_limits(const struct sock *sk, int index)
898{ 978{
899 long *prot = sk->sk_prot->sysctl_mem; 979 long *prot = sk->sk_prot->sysctl_mem;
980 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
981 prot = sk->sk_cgrp->sysctl_mem;
900 return prot[index]; 982 return prot[index];
901} 983}
902 984
985static inline void memcg_memory_allocated_add(struct cg_proto *prot,
986 unsigned long amt,
987 int *parent_status)
988{
989 struct res_counter *fail;
990 int ret;
991
992 ret = res_counter_charge(prot->memory_allocated,
993 amt << PAGE_SHIFT, &fail);
994
995 if (ret < 0)
996 *parent_status = OVER_LIMIT;
997}
998
999static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
1000 unsigned long amt)
1001{
1002 res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT);
1003}
1004
1005static inline u64 memcg_memory_allocated_read(struct cg_proto *prot)
1006{
1007 u64 ret;
1008 ret = res_counter_read_u64(prot->memory_allocated, RES_USAGE);
1009 return ret >> PAGE_SHIFT;
1010}
1011
903static inline long 1012static inline long
904sk_memory_allocated(const struct sock *sk) 1013sk_memory_allocated(const struct sock *sk)
905{ 1014{
906 struct proto *prot = sk->sk_prot; 1015 struct proto *prot = sk->sk_prot;
1016 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1017 return memcg_memory_allocated_read(sk->sk_cgrp);
1018
907 return atomic_long_read(prot->memory_allocated); 1019 return atomic_long_read(prot->memory_allocated);
908} 1020}
909 1021
910static inline long 1022static inline long
911sk_memory_allocated_add(struct sock *sk, int amt) 1023sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
912{ 1024{
913 struct proto *prot = sk->sk_prot; 1025 struct proto *prot = sk->sk_prot;
1026
1027 if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
1028 memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status);
1029 /* update the root cgroup regardless */
1030 atomic_long_add_return(amt, prot->memory_allocated);
1031 return memcg_memory_allocated_read(sk->sk_cgrp);
1032 }
1033
914 return atomic_long_add_return(amt, prot->memory_allocated); 1034 return atomic_long_add_return(amt, prot->memory_allocated);
915} 1035}
916 1036
917static inline void 1037static inline void
918sk_memory_allocated_sub(struct sock *sk, int amt) 1038sk_memory_allocated_sub(struct sock *sk, int amt, int parent_status)
919{ 1039{
920 struct proto *prot = sk->sk_prot; 1040 struct proto *prot = sk->sk_prot;
1041
1042 if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
1043 parent_status != OVER_LIMIT) /* Otherwise was uncharged already */
1044 memcg_memory_allocated_sub(sk->sk_cgrp, amt);
1045
921 atomic_long_sub(amt, prot->memory_allocated); 1046 atomic_long_sub(amt, prot->memory_allocated);
922} 1047}
923 1048
924static inline void sk_sockets_allocated_dec(struct sock *sk) 1049static inline void sk_sockets_allocated_dec(struct sock *sk)
925{ 1050{
926 struct proto *prot = sk->sk_prot; 1051 struct proto *prot = sk->sk_prot;
1052
1053 if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
1054 struct cg_proto *cg_proto = sk->sk_cgrp;
1055
1056 for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
1057 percpu_counter_dec(cg_proto->sockets_allocated);
1058 }
1059
927 percpu_counter_dec(prot->sockets_allocated); 1060 percpu_counter_dec(prot->sockets_allocated);
928} 1061}
929 1062
930static inline void sk_sockets_allocated_inc(struct sock *sk) 1063static inline void sk_sockets_allocated_inc(struct sock *sk)
931{ 1064{
932 struct proto *prot = sk->sk_prot; 1065 struct proto *prot = sk->sk_prot;
1066
1067 if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
1068 struct cg_proto *cg_proto = sk->sk_cgrp;
1069
1070 for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
1071 percpu_counter_inc(cg_proto->sockets_allocated);
1072 }
1073
933 percpu_counter_inc(prot->sockets_allocated); 1074 percpu_counter_inc(prot->sockets_allocated);
934} 1075}
935 1076
@@ -938,6 +1079,9 @@ sk_sockets_allocated_read_positive(struct sock *sk)
938{ 1079{
939 struct proto *prot = sk->sk_prot; 1080 struct proto *prot = sk->sk_prot;
940 1081
1082 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1083 return percpu_counter_sum_positive(sk->sk_cgrp->sockets_allocated);
1084
941 return percpu_counter_sum_positive(prot->sockets_allocated); 1085 return percpu_counter_sum_positive(prot->sockets_allocated);
942} 1086}
943 1087