aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorStanislav Fomichev <sdf@google.com>2019-08-14 13:37:49 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2019-08-17 17:18:54 -0400
commit8f51dfc73bf181f2304e1498f55d5f452e060cbe (patch)
tree1b3877adff06cd12dbeac725521cef44d2ebf7d7 /net/core
parentb0e4701ce15d0381cdea0643c7f0a35dc529cec2 (diff)
bpf: support cloning sk storage on accept()
Add new helper bpf_sk_storage_clone which optionally clones sk storage and call it from sk_clone_lock. Cc: Martin KaFai Lau <kafai@fb.com> Cc: Yonghong Song <yhs@fb.com> Acked-by: Martin KaFai Lau <kafai@fb.com> Acked-by: Yonghong Song <yhs@fb.com> Signed-off-by: Stanislav Fomichev <sdf@google.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/bpf_sk_storage.c104
-rw-r--r--net/core/sock.c9
2 files changed, 107 insertions, 6 deletions
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 94c7f77ecb6b..da5639a5bd3b 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -12,6 +12,9 @@
12 12
13static atomic_t cache_idx; 13static atomic_t cache_idx;
14 14
15#define SK_STORAGE_CREATE_FLAG_MASK \
16 (BPF_F_NO_PREALLOC | BPF_F_CLONE)
17
15struct bucket { 18struct bucket {
16 struct hlist_head list; 19 struct hlist_head list;
17 raw_spinlock_t lock; 20 raw_spinlock_t lock;
@@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
209 kfree_rcu(sk_storage, rcu); 212 kfree_rcu(sk_storage, rcu);
210} 213}
211 214
212/* sk_storage->lock must be held and sk_storage->list cannot be empty */
213static void __selem_link_sk(struct bpf_sk_storage *sk_storage, 215static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
214 struct bpf_sk_storage_elem *selem) 216 struct bpf_sk_storage_elem *selem)
215{ 217{
@@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
509 return 0; 511 return 0;
510} 512}
511 513
512/* Called by __sk_destruct() */ 514/* Called by __sk_destruct() & bpf_sk_storage_clone() */
513void bpf_sk_storage_free(struct sock *sk) 515void bpf_sk_storage_free(struct sock *sk)
514{ 516{
515 struct bpf_sk_storage_elem *selem; 517 struct bpf_sk_storage_elem *selem;
@@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
557 559
558 smap = (struct bpf_sk_storage_map *)map; 560 smap = (struct bpf_sk_storage_map *)map;
559 561
562 /* Note that this map might be concurrently cloned from
563 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
564 * RCU read section to finish before proceeding. New RCU
565 * read sections should be prevented via bpf_map_inc_not_zero.
566 */
560 synchronize_rcu(); 567 synchronize_rcu();
561 568
562 /* bpf prog and the userspace can no longer access this map 569 /* bpf prog and the userspace can no longer access this map
@@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
601 608
602static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) 609static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
603{ 610{
604 if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries || 611 if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
612 !(attr->map_flags & BPF_F_NO_PREALLOC) ||
613 attr->max_entries ||
605 attr->key_size != sizeof(int) || !attr->value_size || 614 attr->key_size != sizeof(int) || !attr->value_size ||
606 /* Enforce BTF for userspace sk dumping */ 615 /* Enforce BTF for userspace sk dumping */
607 !attr->btf_key_type_id || !attr->btf_value_type_id) 616 !attr->btf_key_type_id || !attr->btf_value_type_id)
@@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
739 return err; 748 return err;
740} 749}
741 750
751static struct bpf_sk_storage_elem *
752bpf_sk_storage_clone_elem(struct sock *newsk,
753 struct bpf_sk_storage_map *smap,
754 struct bpf_sk_storage_elem *selem)
755{
756 struct bpf_sk_storage_elem *copy_selem;
757
758 copy_selem = selem_alloc(smap, newsk, NULL, true);
759 if (!copy_selem)
760 return NULL;
761
762 if (map_value_has_spin_lock(&smap->map))
763 copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
764 SDATA(selem)->data, true);
765 else
766 copy_map_value(&smap->map, SDATA(copy_selem)->data,
767 SDATA(selem)->data);
768
769 return copy_selem;
770}
771
772int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
773{
774 struct bpf_sk_storage *new_sk_storage = NULL;
775 struct bpf_sk_storage *sk_storage;
776 struct bpf_sk_storage_elem *selem;
777 int ret = 0;
778
779 RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
780
781 rcu_read_lock();
782 sk_storage = rcu_dereference(sk->sk_bpf_storage);
783
784 if (!sk_storage || hlist_empty(&sk_storage->list))
785 goto out;
786
787 hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
788 struct bpf_sk_storage_elem *copy_selem;
789 struct bpf_sk_storage_map *smap;
790 struct bpf_map *map;
791
792 smap = rcu_dereference(SDATA(selem)->smap);
793 if (!(smap->map.map_flags & BPF_F_CLONE))
794 continue;
795
796 /* Note that for lockless listeners adding new element
797 * here can race with cleanup in bpf_sk_storage_map_free.
798 * Try to grab map refcnt to make sure that it's still
799 * alive and prevent concurrent removal.
800 */
801 map = bpf_map_inc_not_zero(&smap->map, false);
802 if (IS_ERR(map))
803 continue;
804
805 copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
806 if (!copy_selem) {
807 ret = -ENOMEM;
808 bpf_map_put(map);
809 goto out;
810 }
811
812 if (new_sk_storage) {
813 selem_link_map(smap, copy_selem);
814 __selem_link_sk(new_sk_storage, copy_selem);
815 } else {
816 ret = sk_storage_alloc(newsk, smap, copy_selem);
817 if (ret) {
818 kfree(copy_selem);
819 atomic_sub(smap->elem_size,
820 &newsk->sk_omem_alloc);
821 bpf_map_put(map);
822 goto out;
823 }
824
825 new_sk_storage = rcu_dereference(copy_selem->sk_storage);
826 }
827 bpf_map_put(map);
828 }
829
830out:
831 rcu_read_unlock();
832
833 /* In case of an error, don't free anything explicitly here, the
834 * caller is responsible to call bpf_sk_storage_free.
835 */
836
837 return ret;
838}
839
742BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, 840BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
743 void *, value, u64, flags) 841 void *, value, u64, flags)
744{ 842{
diff --git a/net/core/sock.c b/net/core/sock.c
index d57b0cc995a0..f5e801a9cea4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1851 goto out; 1851 goto out;
1852 } 1852 }
1853 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); 1853 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1854#ifdef CONFIG_BPF_SYSCALL 1854
1855 RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); 1855 if (bpf_sk_storage_clone(sk, newsk)) {
1856#endif 1856 sk_free_unlock_clone(newsk);
1857 newsk = NULL;
1858 goto out;
1859 }
1857 1860
1858 newsk->sk_err = 0; 1861 newsk->sk_err = 0;
1859 newsk->sk_err_soft = 0; 1862 newsk->sk_err_soft = 0;