diff options
author | Alexei Starovoitov <ast@plumgrid.com> | 2014-07-30 23:34:12 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-08-02 18:02:37 -0400 |
commit | 278571baca2aecf5fb5cb5c8b002dbfa0a6c524c (patch) | |
tree | 51ff4143d22b2e550308681c938afc7c9c10dcdc /net/core | |
parent | 4330487acfff0cf1d7b14d238583a182e0a444bb (diff) |
net: filter: simplify socket charging
attaching bpf program to a socket involves multiple socket memory arithmetic,
since size of 'sk_filter' is changing when classic BPF is converted to eBPF.
Also common path of program creation has to deal with two ways of freeing
the memory.
Simplify the code by delaying socket charging until program is ready and
its size is known
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/filter.c | 87 | ||||
-rw-r--r-- | net/core/sock.c | 9 |
2 files changed, 44 insertions, 52 deletions
diff --git a/net/core/filter.c b/net/core/filter.c index 42c1944b0c63..5a6aeb1d40b8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -872,41 +872,30 @@ static void sk_filter_release(struct sk_filter *fp) | |||
872 | 872 | ||
873 | void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) | 873 | void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) |
874 | { | 874 | { |
875 | atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc); | 875 | u32 filter_size = sk_filter_size(fp->len); |
876 | sk_filter_release(fp); | ||
877 | } | ||
878 | 876 | ||
879 | void sk_filter_charge(struct sock *sk, struct sk_filter *fp) | 877 | atomic_sub(filter_size, &sk->sk_omem_alloc); |
880 | { | 878 | sk_filter_release(fp); |
881 | atomic_inc(&fp->refcnt); | ||
882 | atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc); | ||
883 | } | 879 | } |
884 | 880 | ||
885 | static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp, | 881 | /* try to charge the socket memory if there is space available |
886 | struct sock *sk, | 882 | * return true on success |
887 | unsigned int len) | 883 | */ |
884 | bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) | ||
888 | { | 885 | { |
889 | struct sk_filter *fp_new; | 886 | u32 filter_size = sk_filter_size(fp->len); |
890 | 887 | ||
891 | if (sk == NULL) | 888 | /* same check as in sock_kmalloc() */ |
892 | return krealloc(fp, len, GFP_KERNEL); | 889 | if (filter_size <= sysctl_optmem_max && |
893 | 890 | atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) { | |
894 | fp_new = sock_kmalloc(sk, len, GFP_KERNEL); | 891 | atomic_inc(&fp->refcnt); |
895 | if (fp_new) { | 892 | atomic_add(filter_size, &sk->sk_omem_alloc); |
896 | *fp_new = *fp; | 893 | return true; |
897 | /* As we're keeping orig_prog in fp_new along, | ||
898 | * we need to make sure we're not evicting it | ||
899 | * from the old fp. | ||
900 | */ | ||
901 | fp->orig_prog = NULL; | ||
902 | sk_filter_uncharge(sk, fp); | ||
903 | } | 894 | } |
904 | 895 | return false; | |
905 | return fp_new; | ||
906 | } | 896 | } |
907 | 897 | ||
908 | static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, | 898 | static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp) |
909 | struct sock *sk) | ||
910 | { | 899 | { |
911 | struct sock_filter *old_prog; | 900 | struct sock_filter *old_prog; |
912 | struct sk_filter *old_fp; | 901 | struct sk_filter *old_fp; |
@@ -938,7 +927,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, | |||
938 | 927 | ||
939 | /* Expand fp for appending the new filter representation. */ | 928 | /* Expand fp for appending the new filter representation. */ |
940 | old_fp = fp; | 929 | old_fp = fp; |
941 | fp = __sk_migrate_realloc(old_fp, sk, sk_filter_size(new_len)); | 930 | fp = krealloc(old_fp, sk_filter_size(new_len), GFP_KERNEL); |
942 | if (!fp) { | 931 | if (!fp) { |
943 | /* The old_fp is still around in case we couldn't | 932 | /* The old_fp is still around in case we couldn't |
944 | * allocate new memory, so uncharge on that one. | 933 | * allocate new memory, so uncharge on that one. |
@@ -956,7 +945,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, | |||
956 | /* 2nd sk_convert_filter() can fail only if it fails | 945 | /* 2nd sk_convert_filter() can fail only if it fails |
957 | * to allocate memory, remapping must succeed. Note, | 946 | * to allocate memory, remapping must succeed. Note, |
958 | * that at this time old_fp has already been released | 947 | * that at this time old_fp has already been released |
959 | * by __sk_migrate_realloc(). | 948 | * by krealloc(). |
960 | */ | 949 | */ |
961 | goto out_err_free; | 950 | goto out_err_free; |
962 | 951 | ||
@@ -968,16 +957,11 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp, | |||
968 | out_err_free: | 957 | out_err_free: |
969 | kfree(old_prog); | 958 | kfree(old_prog); |
970 | out_err: | 959 | out_err: |
971 | /* Rollback filter setup. */ | 960 | __sk_filter_release(fp); |
972 | if (sk != NULL) | ||
973 | sk_filter_uncharge(sk, fp); | ||
974 | else | ||
975 | kfree(fp); | ||
976 | return ERR_PTR(err); | 961 | return ERR_PTR(err); |
977 | } | 962 | } |
978 | 963 | ||
979 | static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, | 964 | static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp) |
980 | struct sock *sk) | ||
981 | { | 965 | { |
982 | int err; | 966 | int err; |
983 | 967 | ||
@@ -986,10 +970,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, | |||
986 | 970 | ||
987 | err = sk_chk_filter(fp->insns, fp->len); | 971 | err = sk_chk_filter(fp->insns, fp->len); |
988 | if (err) { | 972 | if (err) { |
989 | if (sk != NULL) | 973 | __sk_filter_release(fp); |
990 | sk_filter_uncharge(sk, fp); | ||
991 | else | ||
992 | kfree(fp); | ||
993 | return ERR_PTR(err); | 974 | return ERR_PTR(err); |
994 | } | 975 | } |
995 | 976 | ||
@@ -1002,7 +983,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, | |||
1002 | * internal BPF translation for the optimized interpreter. | 983 | * internal BPF translation for the optimized interpreter. |
1003 | */ | 984 | */ |
1004 | if (!fp->jited) | 985 | if (!fp->jited) |
1005 | fp = __sk_migrate_filter(fp, sk); | 986 | fp = __sk_migrate_filter(fp); |
1006 | 987 | ||
1007 | return fp; | 988 | return fp; |
1008 | } | 989 | } |
@@ -1041,10 +1022,10 @@ int sk_unattached_filter_create(struct sk_filter **pfp, | |||
1041 | */ | 1022 | */ |
1042 | fp->orig_prog = NULL; | 1023 | fp->orig_prog = NULL; |
1043 | 1024 | ||
1044 | /* __sk_prepare_filter() already takes care of uncharging | 1025 | /* __sk_prepare_filter() already takes care of freeing |
1045 | * memory in case something goes wrong. | 1026 | * memory in case something goes wrong. |
1046 | */ | 1027 | */ |
1047 | fp = __sk_prepare_filter(fp, NULL); | 1028 | fp = __sk_prepare_filter(fp); |
1048 | if (IS_ERR(fp)) | 1029 | if (IS_ERR(fp)) |
1049 | return PTR_ERR(fp); | 1030 | return PTR_ERR(fp); |
1050 | 1031 | ||
@@ -1083,31 +1064,37 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) | |||
1083 | if (fprog->filter == NULL) | 1064 | if (fprog->filter == NULL) |
1084 | return -EINVAL; | 1065 | return -EINVAL; |
1085 | 1066 | ||
1086 | fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL); | 1067 | fp = kmalloc(sk_fsize, GFP_KERNEL); |
1087 | if (!fp) | 1068 | if (!fp) |
1088 | return -ENOMEM; | 1069 | return -ENOMEM; |
1089 | 1070 | ||
1090 | if (copy_from_user(fp->insns, fprog->filter, fsize)) { | 1071 | if (copy_from_user(fp->insns, fprog->filter, fsize)) { |
1091 | sock_kfree_s(sk, fp, sk_fsize); | 1072 | kfree(fp); |
1092 | return -EFAULT; | 1073 | return -EFAULT; |
1093 | } | 1074 | } |
1094 | 1075 | ||
1095 | atomic_set(&fp->refcnt, 1); | ||
1096 | fp->len = fprog->len; | 1076 | fp->len = fprog->len; |
1097 | 1077 | ||
1098 | err = sk_store_orig_filter(fp, fprog); | 1078 | err = sk_store_orig_filter(fp, fprog); |
1099 | if (err) { | 1079 | if (err) { |
1100 | sk_filter_uncharge(sk, fp); | 1080 | kfree(fp); |
1101 | return -ENOMEM; | 1081 | return -ENOMEM; |
1102 | } | 1082 | } |
1103 | 1083 | ||
1104 | /* __sk_prepare_filter() already takes care of uncharging | 1084 | /* __sk_prepare_filter() already takes care of freeing |
1105 | * memory in case something goes wrong. | 1085 | * memory in case something goes wrong. |
1106 | */ | 1086 | */ |
1107 | fp = __sk_prepare_filter(fp, sk); | 1087 | fp = __sk_prepare_filter(fp); |
1108 | if (IS_ERR(fp)) | 1088 | if (IS_ERR(fp)) |
1109 | return PTR_ERR(fp); | 1089 | return PTR_ERR(fp); |
1110 | 1090 | ||
1091 | atomic_set(&fp->refcnt, 0); | ||
1092 | |||
1093 | if (!sk_filter_charge(sk, fp)) { | ||
1094 | __sk_filter_release(fp); | ||
1095 | return -ENOMEM; | ||
1096 | } | ||
1097 | |||
1111 | old_fp = rcu_dereference_protected(sk->sk_filter, | 1098 | old_fp = rcu_dereference_protected(sk->sk_filter, |
1112 | sock_owned_by_user(sk)); | 1099 | sock_owned_by_user(sk)); |
1113 | rcu_assign_pointer(sk->sk_filter, fp); | 1100 | rcu_assign_pointer(sk->sk_filter, fp); |
diff --git a/net/core/sock.c b/net/core/sock.c index 134291d73fcd..a741163568fa 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1474,6 +1474,7 @@ static void sk_update_clone(const struct sock *sk, struct sock *newsk) | |||
1474 | struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) | 1474 | struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) |
1475 | { | 1475 | { |
1476 | struct sock *newsk; | 1476 | struct sock *newsk; |
1477 | bool is_charged = true; | ||
1477 | 1478 | ||
1478 | newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); | 1479 | newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); |
1479 | if (newsk != NULL) { | 1480 | if (newsk != NULL) { |
@@ -1518,9 +1519,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) | |||
1518 | 1519 | ||
1519 | filter = rcu_dereference_protected(newsk->sk_filter, 1); | 1520 | filter = rcu_dereference_protected(newsk->sk_filter, 1); |
1520 | if (filter != NULL) | 1521 | if (filter != NULL) |
1521 | sk_filter_charge(newsk, filter); | 1522 | /* though it's an empty new sock, the charging may fail |
1523 | * if sysctl_optmem_max was changed between creation of | ||
1524 | * original socket and cloning | ||
1525 | */ | ||
1526 | is_charged = sk_filter_charge(newsk, filter); | ||
1522 | 1527 | ||
1523 | if (unlikely(xfrm_sk_clone_policy(newsk))) { | 1528 | if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) { |
1524 | /* It is still raw copy of parent, so invalidate | 1529 | /* It is still raw copy of parent, so invalidate |
1525 | * destructor and make plain sk_free() */ | 1530 | * destructor and make plain sk_free() */ |
1526 | newsk->sk_destruct = NULL; | 1531 | newsk->sk_destruct = NULL; |