diff options
author | Glauber Costa <glommer@parallels.com> | 2011-12-11 16:47:02 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-12-12 19:04:10 -0500 |
commit | 180d8cd942ce336b2c869d324855c40c5db478ad (patch) | |
tree | 2424d854345d81464d6030ef8090a8e22bd414b0 /net | |
parent | e5671dfae59b165e2adfd4dfbdeab11ac8db5bda (diff) |
foundations of per-cgroup memory pressure controlling.
This patch replaces all uses of struct sock fields' memory_pressure,
memory_allocated, sockets_allocated, and sysctl_mem to acessor
macros. Those macros can either receive a socket argument, or a mem_cgroup
argument, depending on the context they live in.
Since we're only doing a macro wrapping here, no performance impact at all is
expected in the case where we don't have cgroups disabled.
Signed-off-by: Glauber Costa <glommer@parallels.com>
Reviewed-by: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com>
CC: David S. Miller <davem@davemloft.net>
CC: Eric W. Biederman <ebiederm@xmission.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/sock.c | 57 | ||||
-rw-r--r-- | net/ipv4/proc.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 2 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 2 |
7 files changed, 48 insertions, 37 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index 9777da86aeac..a3d4205e7238 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1323,7 +1323,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) | |||
1323 | newsk->sk_wq = NULL; | 1323 | newsk->sk_wq = NULL; |
1324 | 1324 | ||
1325 | if (newsk->sk_prot->sockets_allocated) | 1325 | if (newsk->sk_prot->sockets_allocated) |
1326 | percpu_counter_inc(newsk->sk_prot->sockets_allocated); | 1326 | sk_sockets_allocated_inc(newsk); |
1327 | 1327 | ||
1328 | if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) | 1328 | if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) |
1329 | net_enable_timestamp(); | 1329 | net_enable_timestamp(); |
@@ -1713,28 +1713,28 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) | |||
1713 | long allocated; | 1713 | long allocated; |
1714 | 1714 | ||
1715 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; | 1715 | sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; |
1716 | allocated = atomic_long_add_return(amt, prot->memory_allocated); | 1716 | |
1717 | allocated = sk_memory_allocated_add(sk, amt); | ||
1717 | 1718 | ||
1718 | /* Under limit. */ | 1719 | /* Under limit. */ |
1719 | if (allocated <= prot->sysctl_mem[0]) { | 1720 | if (allocated <= sk_prot_mem_limits(sk, 0)) { |
1720 | if (prot->memory_pressure && *prot->memory_pressure) | 1721 | sk_leave_memory_pressure(sk); |
1721 | *prot->memory_pressure = 0; | ||
1722 | return 1; | 1722 | return 1; |
1723 | } | 1723 | } |
1724 | 1724 | ||
1725 | /* Under pressure. */ | 1725 | /* Under pressure. */ |
1726 | if (allocated > prot->sysctl_mem[1]) | 1726 | if (allocated > sk_prot_mem_limits(sk, 1)) |
1727 | if (prot->enter_memory_pressure) | 1727 | sk_enter_memory_pressure(sk); |
1728 | prot->enter_memory_pressure(sk); | ||
1729 | 1728 | ||
1730 | /* Over hard limit. */ | 1729 | /* Over hard limit. */ |
1731 | if (allocated > prot->sysctl_mem[2]) | 1730 | if (allocated > sk_prot_mem_limits(sk, 2)) |
1732 | goto suppress_allocation; | 1731 | goto suppress_allocation; |
1733 | 1732 | ||
1734 | /* guarantee minimum buffer size under pressure */ | 1733 | /* guarantee minimum buffer size under pressure */ |
1735 | if (kind == SK_MEM_RECV) { | 1734 | if (kind == SK_MEM_RECV) { |
1736 | if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) | 1735 | if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) |
1737 | return 1; | 1736 | return 1; |
1737 | |||
1738 | } else { /* SK_MEM_SEND */ | 1738 | } else { /* SK_MEM_SEND */ |
1739 | if (sk->sk_type == SOCK_STREAM) { | 1739 | if (sk->sk_type == SOCK_STREAM) { |
1740 | if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) | 1740 | if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) |
@@ -1744,13 +1744,13 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) | |||
1744 | return 1; | 1744 | return 1; |
1745 | } | 1745 | } |
1746 | 1746 | ||
1747 | if (prot->memory_pressure) { | 1747 | if (sk_has_memory_pressure(sk)) { |
1748 | int alloc; | 1748 | int alloc; |
1749 | 1749 | ||
1750 | if (!*prot->memory_pressure) | 1750 | if (!sk_under_memory_pressure(sk)) |
1751 | return 1; | 1751 | return 1; |
1752 | alloc = percpu_counter_read_positive(prot->sockets_allocated); | 1752 | alloc = sk_sockets_allocated_read_positive(sk); |
1753 | if (prot->sysctl_mem[2] > alloc * | 1753 | if (sk_prot_mem_limits(sk, 2) > alloc * |
1754 | sk_mem_pages(sk->sk_wmem_queued + | 1754 | sk_mem_pages(sk->sk_wmem_queued + |
1755 | atomic_read(&sk->sk_rmem_alloc) + | 1755 | atomic_read(&sk->sk_rmem_alloc) + |
1756 | sk->sk_forward_alloc)) | 1756 | sk->sk_forward_alloc)) |
@@ -1773,7 +1773,9 @@ suppress_allocation: | |||
1773 | 1773 | ||
1774 | /* Alas. Undo changes. */ | 1774 | /* Alas. Undo changes. */ |
1775 | sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; | 1775 | sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; |
1776 | atomic_long_sub(amt, prot->memory_allocated); | 1776 | |
1777 | sk_memory_allocated_sub(sk, amt); | ||
1778 | |||
1777 | return 0; | 1779 | return 0; |
1778 | } | 1780 | } |
1779 | EXPORT_SYMBOL(__sk_mem_schedule); | 1781 | EXPORT_SYMBOL(__sk_mem_schedule); |
@@ -1784,15 +1786,13 @@ EXPORT_SYMBOL(__sk_mem_schedule); | |||
1784 | */ | 1786 | */ |
1785 | void __sk_mem_reclaim(struct sock *sk) | 1787 | void __sk_mem_reclaim(struct sock *sk) |
1786 | { | 1788 | { |
1787 | struct proto *prot = sk->sk_prot; | 1789 | sk_memory_allocated_sub(sk, |
1788 | 1790 | sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT); | |
1789 | atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, | ||
1790 | prot->memory_allocated); | ||
1791 | sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; | 1791 | sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; |
1792 | 1792 | ||
1793 | if (prot->memory_pressure && *prot->memory_pressure && | 1793 | if (sk_under_memory_pressure(sk) && |
1794 | (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0])) | 1794 | (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) |
1795 | *prot->memory_pressure = 0; | 1795 | sk_leave_memory_pressure(sk); |
1796 | } | 1796 | } |
1797 | EXPORT_SYMBOL(__sk_mem_reclaim); | 1797 | EXPORT_SYMBOL(__sk_mem_reclaim); |
1798 | 1798 | ||
@@ -2507,16 +2507,27 @@ static char proto_method_implemented(const void *method) | |||
2507 | { | 2507 | { |
2508 | return method == NULL ? 'n' : 'y'; | 2508 | return method == NULL ? 'n' : 'y'; |
2509 | } | 2509 | } |
2510 | static long sock_prot_memory_allocated(struct proto *proto) | ||
2511 | { | ||
2512 | return proto->memory_allocated != NULL ? proto_memory_allocated(proto): -1L; | ||
2513 | } | ||
2514 | |||
2515 | static char *sock_prot_memory_pressure(struct proto *proto) | ||
2516 | { | ||
2517 | return proto->memory_pressure != NULL ? | ||
2518 | proto_memory_pressure(proto) ? "yes" : "no" : "NI"; | ||
2519 | } | ||
2510 | 2520 | ||
2511 | static void proto_seq_printf(struct seq_file *seq, struct proto *proto) | 2521 | static void proto_seq_printf(struct seq_file *seq, struct proto *proto) |
2512 | { | 2522 | { |
2523 | |||
2513 | seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " | 2524 | seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " |
2514 | "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", | 2525 | "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", |
2515 | proto->name, | 2526 | proto->name, |
2516 | proto->obj_size, | 2527 | proto->obj_size, |
2517 | sock_prot_inuse_get(seq_file_net(seq), proto), | 2528 | sock_prot_inuse_get(seq_file_net(seq), proto), |
2518 | proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L, | 2529 | sock_prot_memory_allocated(proto), |
2519 | proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", | 2530 | sock_prot_memory_pressure(proto), |
2520 | proto->max_header, | 2531 | proto->max_header, |
2521 | proto->slab == NULL ? "no" : "yes", | 2532 | proto->slab == NULL ? "no" : "yes", |
2522 | module_name(proto->owner), | 2533 | module_name(proto->owner), |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 961eed4f510a..3569d8ecaeac 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -56,17 +56,17 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) | |||
56 | 56 | ||
57 | local_bh_disable(); | 57 | local_bh_disable(); |
58 | orphans = percpu_counter_sum_positive(&tcp_orphan_count); | 58 | orphans = percpu_counter_sum_positive(&tcp_orphan_count); |
59 | sockets = percpu_counter_sum_positive(&tcp_sockets_allocated); | 59 | sockets = proto_sockets_allocated_sum_positive(&tcp_prot); |
60 | local_bh_enable(); | 60 | local_bh_enable(); |
61 | 61 | ||
62 | socket_seq_show(seq); | 62 | socket_seq_show(seq); |
63 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", | 63 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", |
64 | sock_prot_inuse_get(net, &tcp_prot), orphans, | 64 | sock_prot_inuse_get(net, &tcp_prot), orphans, |
65 | tcp_death_row.tw_count, sockets, | 65 | tcp_death_row.tw_count, sockets, |
66 | atomic_long_read(&tcp_memory_allocated)); | 66 | proto_memory_allocated(&tcp_prot)); |
67 | seq_printf(seq, "UDP: inuse %d mem %ld\n", | 67 | seq_printf(seq, "UDP: inuse %d mem %ld\n", |
68 | sock_prot_inuse_get(net, &udp_prot), | 68 | sock_prot_inuse_get(net, &udp_prot), |
69 | atomic_long_read(&udp_memory_allocated)); | 69 | proto_memory_allocated(&udp_prot)); |
70 | seq_printf(seq, "UDPLITE: inuse %d\n", | 70 | seq_printf(seq, "UDPLITE: inuse %d\n", |
71 | sock_prot_inuse_get(net, &udplite_prot)); | 71 | sock_prot_inuse_get(net, &udplite_prot)); |
72 | seq_printf(seq, "RAW: inuse %d\n", | 72 | seq_printf(seq, "RAW: inuse %d\n", |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b9cbc351c511..f131d92d25ee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -322,7 +322,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) | |||
322 | /* Check #1 */ | 322 | /* Check #1 */ |
323 | if (tp->rcv_ssthresh < tp->window_clamp && | 323 | if (tp->rcv_ssthresh < tp->window_clamp && |
324 | (int)tp->rcv_ssthresh < tcp_space(sk) && | 324 | (int)tp->rcv_ssthresh < tcp_space(sk) && |
325 | !tcp_memory_pressure) { | 325 | !sk_under_memory_pressure(sk)) { |
326 | int incr; | 326 | int incr; |
327 | 327 | ||
328 | /* Check #2. Increase window, if skb with such overhead | 328 | /* Check #2. Increase window, if skb with such overhead |
@@ -411,8 +411,8 @@ static void tcp_clamp_window(struct sock *sk) | |||
411 | 411 | ||
412 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && | 412 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && |
413 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && | 413 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && |
414 | !tcp_memory_pressure && | 414 | !sk_under_memory_pressure(sk) && |
415 | atomic_long_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { | 415 | sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { |
416 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), | 416 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), |
417 | sysctl_tcp_rmem[2]); | 417 | sysctl_tcp_rmem[2]); |
418 | } | 418 | } |
@@ -4866,7 +4866,7 @@ static int tcp_prune_queue(struct sock *sk) | |||
4866 | 4866 | ||
4867 | if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) | 4867 | if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) |
4868 | tcp_clamp_window(sk); | 4868 | tcp_clamp_window(sk); |
4869 | else if (tcp_memory_pressure) | 4869 | else if (sk_under_memory_pressure(sk)) |
4870 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); | 4870 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); |
4871 | 4871 | ||
4872 | tcp_collapse_ofo_queue(sk); | 4872 | tcp_collapse_ofo_queue(sk); |
@@ -4932,11 +4932,11 @@ static int tcp_should_expand_sndbuf(const struct sock *sk) | |||
4932 | return 0; | 4932 | return 0; |
4933 | 4933 | ||
4934 | /* If we are under global TCP memory pressure, do not expand. */ | 4934 | /* If we are under global TCP memory pressure, do not expand. */ |
4935 | if (tcp_memory_pressure) | 4935 | if (sk_under_memory_pressure(sk)) |
4936 | return 0; | 4936 | return 0; |
4937 | 4937 | ||
4938 | /* If we are under soft global TCP memory pressure, do not expand. */ | 4938 | /* If we are under soft global TCP memory pressure, do not expand. */ |
4939 | if (atomic_long_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0]) | 4939 | if (sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0)) |
4940 | return 0; | 4940 | return 0; |
4941 | 4941 | ||
4942 | /* If we filled the congestion window, do not expand. */ | 4942 | /* If we filled the congestion window, do not expand. */ |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c4b8b09db9f5..f48bf312cfe8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1917,7 +1917,7 @@ static int tcp_v4_init_sock(struct sock *sk) | |||
1917 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; | 1917 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; |
1918 | 1918 | ||
1919 | local_bh_disable(); | 1919 | local_bh_disable(); |
1920 | percpu_counter_inc(&tcp_sockets_allocated); | 1920 | sk_sockets_allocated_inc(sk); |
1921 | local_bh_enable(); | 1921 | local_bh_enable(); |
1922 | 1922 | ||
1923 | return 0; | 1923 | return 0; |
@@ -1973,7 +1973,7 @@ void tcp_v4_destroy_sock(struct sock *sk) | |||
1973 | tp->cookie_values = NULL; | 1973 | tp->cookie_values = NULL; |
1974 | } | 1974 | } |
1975 | 1975 | ||
1976 | percpu_counter_dec(&tcp_sockets_allocated); | 1976 | sk_sockets_allocated_dec(sk); |
1977 | } | 1977 | } |
1978 | EXPORT_SYMBOL(tcp_v4_destroy_sock); | 1978 | EXPORT_SYMBOL(tcp_v4_destroy_sock); |
1979 | 1979 | ||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index cf3068038942..8c8de2780c7a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -1922,7 +1922,7 @@ u32 __tcp_select_window(struct sock *sk) | |||
1922 | if (free_space < (full_space >> 1)) { | 1922 | if (free_space < (full_space >> 1)) { |
1923 | icsk->icsk_ack.quick = 0; | 1923 | icsk->icsk_ack.quick = 0; |
1924 | 1924 | ||
1925 | if (tcp_memory_pressure) | 1925 | if (sk_under_memory_pressure(sk)) |
1926 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, | 1926 | tp->rcv_ssthresh = min(tp->rcv_ssthresh, |
1927 | 4U * tp->advmss); | 1927 | 4U * tp->advmss); |
1928 | 1928 | ||
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index aa39a692f4c8..40a41f077981 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -261,7 +261,7 @@ static void tcp_delack_timer(unsigned long data) | |||
261 | } | 261 | } |
262 | 262 | ||
263 | out: | 263 | out: |
264 | if (tcp_memory_pressure) | 264 | if (sk_under_memory_pressure(sk)) |
265 | sk_mem_reclaim(sk); | 265 | sk_mem_reclaim(sk); |
266 | out_unlock: | 266 | out_unlock: |
267 | bh_unlock_sock(sk); | 267 | bh_unlock_sock(sk); |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 9d74eee334d6..b69c7030aba9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -1994,7 +1994,7 @@ static int tcp_v6_init_sock(struct sock *sk) | |||
1994 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; | 1994 | sk->sk_rcvbuf = sysctl_tcp_rmem[1]; |
1995 | 1995 | ||
1996 | local_bh_disable(); | 1996 | local_bh_disable(); |
1997 | percpu_counter_inc(&tcp_sockets_allocated); | 1997 | sk_sockets_allocated_inc(sk); |
1998 | local_bh_enable(); | 1998 | local_bh_enable(); |
1999 | 1999 | ||
2000 | return 0; | 2000 | return 0; |