aboutsummaryrefslogtreecommitdiffstats
path: root/net/xfrm
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-09-08 16:09:41 -0400
committerDavid S. Miller <davem@davemloft.net>2016-09-08 16:09:41 -0400
commit575f9c43e709ab5008047eb5c68b99fe04592400 (patch)
treeb9b4dffd0a31afa68a89b1ca7c585485926d7c79 /net/xfrm
parent0f76d2564469fd3a337de088f533364cef206130 (diff)
parent35db57bbc4b7ab810bba6e6d6954a0faf5a842cf (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next
Steffen Klassert says: ==================== ipsec-next 2016-09-08 1) Constify the xfrm_replay structures. From Julia Lawall 2) Protect xfrm state hash tables with rcu, lookups can be done now without acquiring xfrm_state_lock. From Florian Westphal. 3) Protect xfrm policy hash tables with rcu, lookups can be done now without acquiring xfrm_policy_lock. From Florian Westphal. 4) We don't need to have a garbage collector list per namespace anymore, so use a global one instead. From Florian Westphal. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/xfrm')
-rw-r--r--net/xfrm/xfrm_policy.c145
-rw-r--r--net/xfrm/xfrm_replay.c6
-rw-r--r--net/xfrm/xfrm_state.c121
3 files changed, 167 insertions, 105 deletions
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b5e665b3cfb0..f7ce6265961a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -49,6 +49,7 @@ static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
49 __read_mostly; 49 __read_mostly;
50 50
51static struct kmem_cache *xfrm_dst_cache __read_mostly; 51static struct kmem_cache *xfrm_dst_cache __read_mostly;
52static __read_mostly seqcount_t xfrm_policy_hash_generation;
52 53
53static void xfrm_init_pmtu(struct dst_entry *dst); 54static void xfrm_init_pmtu(struct dst_entry *dst);
54static int stale_bundle(struct dst_entry *dst); 55static int stale_bundle(struct dst_entry *dst);
@@ -59,6 +60,11 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
59static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 60static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
60 int dir); 61 int dir);
61 62
63static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy)
64{
65 return atomic_inc_not_zero(&policy->refcnt);
66}
67
62static inline bool 68static inline bool
63__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) 69__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
64{ 70{
@@ -385,9 +391,11 @@ static struct hlist_head *policy_hash_bysel(struct net *net,
385 __get_hash_thresh(net, family, dir, &dbits, &sbits); 391 __get_hash_thresh(net, family, dir, &dbits, &sbits);
386 hash = __sel_hash(sel, family, hmask, dbits, sbits); 392 hash = __sel_hash(sel, family, hmask, dbits, sbits);
387 393
388 return (hash == hmask + 1 ? 394 if (hash == hmask + 1)
389 &net->xfrm.policy_inexact[dir] : 395 return &net->xfrm.policy_inexact[dir];
390 net->xfrm.policy_bydst[dir].table + hash); 396
397 return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
398 lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
391} 399}
392 400
393static struct hlist_head *policy_hash_direct(struct net *net, 401static struct hlist_head *policy_hash_direct(struct net *net,
@@ -403,7 +411,8 @@ static struct hlist_head *policy_hash_direct(struct net *net,
403 __get_hash_thresh(net, family, dir, &dbits, &sbits); 411 __get_hash_thresh(net, family, dir, &dbits, &sbits);
404 hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits); 412 hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
405 413
406 return net->xfrm.policy_bydst[dir].table + hash; 414 return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
415 lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
407} 416}
408 417
409static void xfrm_dst_hash_transfer(struct net *net, 418static void xfrm_dst_hash_transfer(struct net *net,
@@ -426,14 +435,14 @@ redo:
426 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, 435 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
427 pol->family, nhashmask, dbits, sbits); 436 pol->family, nhashmask, dbits, sbits);
428 if (!entry0) { 437 if (!entry0) {
429 hlist_del(&pol->bydst); 438 hlist_del_rcu(&pol->bydst);
430 hlist_add_head(&pol->bydst, ndsttable+h); 439 hlist_add_head_rcu(&pol->bydst, ndsttable + h);
431 h0 = h; 440 h0 = h;
432 } else { 441 } else {
433 if (h != h0) 442 if (h != h0)
434 continue; 443 continue;
435 hlist_del(&pol->bydst); 444 hlist_del_rcu(&pol->bydst);
436 hlist_add_behind(&pol->bydst, entry0); 445 hlist_add_behind_rcu(&pol->bydst, entry0);
437 } 446 }
438 entry0 = &pol->bydst; 447 entry0 = &pol->bydst;
439 } 448 }
@@ -468,22 +477,32 @@ static void xfrm_bydst_resize(struct net *net, int dir)
468 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; 477 unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
469 unsigned int nhashmask = xfrm_new_hash_mask(hmask); 478 unsigned int nhashmask = xfrm_new_hash_mask(hmask);
470 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); 479 unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
471 struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
472 struct hlist_head *ndst = xfrm_hash_alloc(nsize); 480 struct hlist_head *ndst = xfrm_hash_alloc(nsize);
481 struct hlist_head *odst;
473 int i; 482 int i;
474 483
475 if (!ndst) 484 if (!ndst)
476 return; 485 return;
477 486
478 write_lock_bh(&net->xfrm.xfrm_policy_lock); 487 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
488 write_seqcount_begin(&xfrm_policy_hash_generation);
489
490 odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
491 lockdep_is_held(&net->xfrm.xfrm_policy_lock));
492
493 odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
494 lockdep_is_held(&net->xfrm.xfrm_policy_lock));
479 495
480 for (i = hmask; i >= 0; i--) 496 for (i = hmask; i >= 0; i--)
481 xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir); 497 xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
482 498
483 net->xfrm.policy_bydst[dir].table = ndst; 499 rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
484 net->xfrm.policy_bydst[dir].hmask = nhashmask; 500 net->xfrm.policy_bydst[dir].hmask = nhashmask;
485 501
486 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 502 write_seqcount_end(&xfrm_policy_hash_generation);
503 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
504
505 synchronize_rcu();
487 506
488 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); 507 xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
489} 508}
@@ -500,7 +519,7 @@ static void xfrm_byidx_resize(struct net *net, int total)
500 if (!nidx) 519 if (!nidx)
501 return; 520 return;
502 521
503 write_lock_bh(&net->xfrm.xfrm_policy_lock); 522 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
504 523
505 for (i = hmask; i >= 0; i--) 524 for (i = hmask; i >= 0; i--)
506 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); 525 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
@@ -508,7 +527,7 @@ static void xfrm_byidx_resize(struct net *net, int total)
508 net->xfrm.policy_byidx = nidx; 527 net->xfrm.policy_byidx = nidx;
509 net->xfrm.policy_idx_hmask = nhashmask; 528 net->xfrm.policy_idx_hmask = nhashmask;
510 529
511 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 530 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
512 531
513 xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); 532 xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
514} 533}
@@ -541,7 +560,6 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total)
541 560
542void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) 561void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
543{ 562{
544 read_lock_bh(&net->xfrm.xfrm_policy_lock);
545 si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; 563 si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
546 si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; 564 si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
547 si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; 565 si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
@@ -550,7 +568,6 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
550 si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; 568 si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
551 si->spdhcnt = net->xfrm.policy_idx_hmask; 569 si->spdhcnt = net->xfrm.policy_idx_hmask;
552 si->spdhmcnt = xfrm_policy_hashmax; 570 si->spdhmcnt = xfrm_policy_hashmax;
553 read_unlock_bh(&net->xfrm.xfrm_policy_lock);
554} 571}
555EXPORT_SYMBOL(xfrm_spd_getinfo); 572EXPORT_SYMBOL(xfrm_spd_getinfo);
556 573
@@ -600,7 +617,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
600 rbits6 = net->xfrm.policy_hthresh.rbits6; 617 rbits6 = net->xfrm.policy_hthresh.rbits6;
601 } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); 618 } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
602 619
603 write_lock_bh(&net->xfrm.xfrm_policy_lock); 620 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
604 621
605 /* reset the bydst and inexact table in all directions */ 622 /* reset the bydst and inexact table in all directions */
606 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 623 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
@@ -642,7 +659,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
642 hlist_add_head(&policy->bydst, chain); 659 hlist_add_head(&policy->bydst, chain);
643 } 660 }
644 661
645 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 662 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
646 663
647 mutex_unlock(&hash_resize_mutex); 664 mutex_unlock(&hash_resize_mutex);
648} 665}
@@ -753,7 +770,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
753 struct hlist_head *chain; 770 struct hlist_head *chain;
754 struct hlist_node *newpos; 771 struct hlist_node *newpos;
755 772
756 write_lock_bh(&net->xfrm.xfrm_policy_lock); 773 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
757 chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); 774 chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
758 delpol = NULL; 775 delpol = NULL;
759 newpos = NULL; 776 newpos = NULL;
@@ -764,7 +781,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
764 xfrm_sec_ctx_match(pol->security, policy->security) && 781 xfrm_sec_ctx_match(pol->security, policy->security) &&
765 !WARN_ON(delpol)) { 782 !WARN_ON(delpol)) {
766 if (excl) { 783 if (excl) {
767 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 784 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
768 return -EEXIST; 785 return -EEXIST;
769 } 786 }
770 delpol = pol; 787 delpol = pol;
@@ -800,7 +817,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
800 policy->curlft.use_time = 0; 817 policy->curlft.use_time = 0;
801 if (!mod_timer(&policy->timer, jiffies + HZ)) 818 if (!mod_timer(&policy->timer, jiffies + HZ))
802 xfrm_pol_hold(policy); 819 xfrm_pol_hold(policy);
803 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 820 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
804 821
805 if (delpol) 822 if (delpol)
806 xfrm_policy_kill(delpol); 823 xfrm_policy_kill(delpol);
@@ -820,7 +837,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
820 struct hlist_head *chain; 837 struct hlist_head *chain;
821 838
822 *err = 0; 839 *err = 0;
823 write_lock_bh(&net->xfrm.xfrm_policy_lock); 840 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
824 chain = policy_hash_bysel(net, sel, sel->family, dir); 841 chain = policy_hash_bysel(net, sel, sel->family, dir);
825 ret = NULL; 842 ret = NULL;
826 hlist_for_each_entry(pol, chain, bydst) { 843 hlist_for_each_entry(pol, chain, bydst) {
@@ -833,7 +850,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
833 *err = security_xfrm_policy_delete( 850 *err = security_xfrm_policy_delete(
834 pol->security); 851 pol->security);
835 if (*err) { 852 if (*err) {
836 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 853 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
837 return pol; 854 return pol;
838 } 855 }
839 __xfrm_policy_unlink(pol, dir); 856 __xfrm_policy_unlink(pol, dir);
@@ -842,7 +859,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
842 break; 859 break;
843 } 860 }
844 } 861 }
845 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 862 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
846 863
847 if (ret && delete) 864 if (ret && delete)
848 xfrm_policy_kill(ret); 865 xfrm_policy_kill(ret);
@@ -861,7 +878,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
861 return NULL; 878 return NULL;
862 879
863 *err = 0; 880 *err = 0;
864 write_lock_bh(&net->xfrm.xfrm_policy_lock); 881 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
865 chain = net->xfrm.policy_byidx + idx_hash(net, id); 882 chain = net->xfrm.policy_byidx + idx_hash(net, id);
866 ret = NULL; 883 ret = NULL;
867 hlist_for_each_entry(pol, chain, byidx) { 884 hlist_for_each_entry(pol, chain, byidx) {
@@ -872,7 +889,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
872 *err = security_xfrm_policy_delete( 889 *err = security_xfrm_policy_delete(
873 pol->security); 890 pol->security);
874 if (*err) { 891 if (*err) {
875 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 892 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
876 return pol; 893 return pol;
877 } 894 }
878 __xfrm_policy_unlink(pol, dir); 895 __xfrm_policy_unlink(pol, dir);
@@ -881,7 +898,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
881 break; 898 break;
882 } 899 }
883 } 900 }
884 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 901 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
885 902
886 if (ret && delete) 903 if (ret && delete)
887 xfrm_policy_kill(ret); 904 xfrm_policy_kill(ret);
@@ -939,7 +956,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
939{ 956{
940 int dir, err = 0, cnt = 0; 957 int dir, err = 0, cnt = 0;
941 958
942 write_lock_bh(&net->xfrm.xfrm_policy_lock); 959 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
943 960
944 err = xfrm_policy_flush_secctx_check(net, type, task_valid); 961 err = xfrm_policy_flush_secctx_check(net, type, task_valid);
945 if (err) 962 if (err)
@@ -955,14 +972,14 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
955 if (pol->type != type) 972 if (pol->type != type)
956 continue; 973 continue;
957 __xfrm_policy_unlink(pol, dir); 974 __xfrm_policy_unlink(pol, dir);
958 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 975 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
959 cnt++; 976 cnt++;
960 977
961 xfrm_audit_policy_delete(pol, 1, task_valid); 978 xfrm_audit_policy_delete(pol, 1, task_valid);
962 979
963 xfrm_policy_kill(pol); 980 xfrm_policy_kill(pol);
964 981
965 write_lock_bh(&net->xfrm.xfrm_policy_lock); 982 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
966 goto again1; 983 goto again1;
967 } 984 }
968 985
@@ -974,13 +991,13 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
974 if (pol->type != type) 991 if (pol->type != type)
975 continue; 992 continue;
976 __xfrm_policy_unlink(pol, dir); 993 __xfrm_policy_unlink(pol, dir);
977 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 994 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
978 cnt++; 995 cnt++;
979 996
980 xfrm_audit_policy_delete(pol, 1, task_valid); 997 xfrm_audit_policy_delete(pol, 1, task_valid);
981 xfrm_policy_kill(pol); 998 xfrm_policy_kill(pol);
982 999
983 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1000 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
984 goto again2; 1001 goto again2;
985 } 1002 }
986 } 1003 }
@@ -989,7 +1006,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
989 if (!cnt) 1006 if (!cnt)
990 err = -ESRCH; 1007 err = -ESRCH;
991out: 1008out:
992 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1009 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
993 return err; 1010 return err;
994} 1011}
995EXPORT_SYMBOL(xfrm_policy_flush); 1012EXPORT_SYMBOL(xfrm_policy_flush);
@@ -1009,7 +1026,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
1009 if (list_empty(&walk->walk.all) && walk->seq != 0) 1026 if (list_empty(&walk->walk.all) && walk->seq != 0)
1010 return 0; 1027 return 0;
1011 1028
1012 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1029 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1013 if (list_empty(&walk->walk.all)) 1030 if (list_empty(&walk->walk.all))
1014 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); 1031 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
1015 else 1032 else
@@ -1037,7 +1054,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
1037 } 1054 }
1038 list_del_init(&walk->walk.all); 1055 list_del_init(&walk->walk.all);
1039out: 1056out:
1040 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1057 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1041 return error; 1058 return error;
1042} 1059}
1043EXPORT_SYMBOL(xfrm_policy_walk); 1060EXPORT_SYMBOL(xfrm_policy_walk);
@@ -1056,9 +1073,9 @@ void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
1056 if (list_empty(&walk->walk.all)) 1073 if (list_empty(&walk->walk.all))
1057 return; 1074 return;
1058 1075
1059 write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ 1076 spin_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
1060 list_del(&walk->walk.all); 1077 list_del(&walk->walk.all);
1061 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1078 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1062} 1079}
1063EXPORT_SYMBOL(xfrm_policy_walk_done); 1080EXPORT_SYMBOL(xfrm_policy_walk_done);
1064 1081
@@ -1096,17 +1113,24 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
1096 struct xfrm_policy *pol, *ret; 1113 struct xfrm_policy *pol, *ret;
1097 const xfrm_address_t *daddr, *saddr; 1114 const xfrm_address_t *daddr, *saddr;
1098 struct hlist_head *chain; 1115 struct hlist_head *chain;
1099 u32 priority = ~0U; 1116 unsigned int sequence;
1117 u32 priority;
1100 1118
1101 daddr = xfrm_flowi_daddr(fl, family); 1119 daddr = xfrm_flowi_daddr(fl, family);
1102 saddr = xfrm_flowi_saddr(fl, family); 1120 saddr = xfrm_flowi_saddr(fl, family);
1103 if (unlikely(!daddr || !saddr)) 1121 if (unlikely(!daddr || !saddr))
1104 return NULL; 1122 return NULL;
1105 1123
1106 read_lock_bh(&net->xfrm.xfrm_policy_lock); 1124 rcu_read_lock();
1107 chain = policy_hash_direct(net, daddr, saddr, family, dir); 1125 retry:
1126 do {
1127 sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
1128 chain = policy_hash_direct(net, daddr, saddr, family, dir);
1129 } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence));
1130
1131 priority = ~0U;
1108 ret = NULL; 1132 ret = NULL;
1109 hlist_for_each_entry(pol, chain, bydst) { 1133 hlist_for_each_entry_rcu(pol, chain, bydst) {
1110 err = xfrm_policy_match(pol, fl, type, family, dir); 1134 err = xfrm_policy_match(pol, fl, type, family, dir);
1111 if (err) { 1135 if (err) {
1112 if (err == -ESRCH) 1136 if (err == -ESRCH)
@@ -1122,7 +1146,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
1122 } 1146 }
1123 } 1147 }
1124 chain = &net->xfrm.policy_inexact[dir]; 1148 chain = &net->xfrm.policy_inexact[dir];
1125 hlist_for_each_entry(pol, chain, bydst) { 1149 hlist_for_each_entry_rcu(pol, chain, bydst) {
1126 if ((pol->priority >= priority) && ret) 1150 if ((pol->priority >= priority) && ret)
1127 break; 1151 break;
1128 1152
@@ -1140,9 +1164,13 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
1140 } 1164 }
1141 } 1165 }
1142 1166
1143 xfrm_pol_hold(ret); 1167 if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
1168 goto retry;
1169
1170 if (ret && !xfrm_pol_hold_rcu(ret))
1171 goto retry;
1144fail: 1172fail:
1145 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 1173 rcu_read_unlock();
1146 1174
1147 return ret; 1175 return ret;
1148} 1176}
@@ -1219,10 +1247,9 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
1219 const struct flowi *fl) 1247 const struct flowi *fl)
1220{ 1248{
1221 struct xfrm_policy *pol; 1249 struct xfrm_policy *pol;
1222 struct net *net = sock_net(sk);
1223 1250
1224 rcu_read_lock(); 1251 rcu_read_lock();
1225 read_lock_bh(&net->xfrm.xfrm_policy_lock); 1252 again:
1226 pol = rcu_dereference(sk->sk_policy[dir]); 1253 pol = rcu_dereference(sk->sk_policy[dir]);
1227 if (pol != NULL) { 1254 if (pol != NULL) {
1228 bool match = xfrm_selector_match(&pol->selector, fl, 1255 bool match = xfrm_selector_match(&pol->selector, fl,
@@ -1237,8 +1264,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
1237 err = security_xfrm_policy_lookup(pol->security, 1264 err = security_xfrm_policy_lookup(pol->security,
1238 fl->flowi_secid, 1265 fl->flowi_secid,
1239 policy_to_flow_dir(dir)); 1266 policy_to_flow_dir(dir));
1240 if (!err) 1267 if (!err && !xfrm_pol_hold_rcu(pol))
1241 xfrm_pol_hold(pol); 1268 goto again;
1242 else if (err == -ESRCH) 1269 else if (err == -ESRCH)
1243 pol = NULL; 1270 pol = NULL;
1244 else 1271 else
@@ -1247,7 +1274,6 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
1247 pol = NULL; 1274 pol = NULL;
1248 } 1275 }
1249out: 1276out:
1250 read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1251 rcu_read_unlock(); 1277 rcu_read_unlock();
1252 return pol; 1278 return pol;
1253} 1279}
@@ -1271,7 +1297,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1271 1297
1272 /* Socket policies are not hashed. */ 1298 /* Socket policies are not hashed. */
1273 if (!hlist_unhashed(&pol->bydst)) { 1299 if (!hlist_unhashed(&pol->bydst)) {
1274 hlist_del(&pol->bydst); 1300 hlist_del_rcu(&pol->bydst);
1275 hlist_del(&pol->byidx); 1301 hlist_del(&pol->byidx);
1276 } 1302 }
1277 1303
@@ -1295,9 +1321,9 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1295{ 1321{
1296 struct net *net = xp_net(pol); 1322 struct net *net = xp_net(pol);
1297 1323
1298 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1324 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1299 pol = __xfrm_policy_unlink(pol, dir); 1325 pol = __xfrm_policy_unlink(pol, dir);
1300 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1326 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1301 if (pol) { 1327 if (pol) {
1302 xfrm_policy_kill(pol); 1328 xfrm_policy_kill(pol);
1303 return 0; 1329 return 0;
@@ -1316,7 +1342,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1316 return -EINVAL; 1342 return -EINVAL;
1317#endif 1343#endif
1318 1344
1319 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1345 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1320 old_pol = rcu_dereference_protected(sk->sk_policy[dir], 1346 old_pol = rcu_dereference_protected(sk->sk_policy[dir],
1321 lockdep_is_held(&net->xfrm.xfrm_policy_lock)); 1347 lockdep_is_held(&net->xfrm.xfrm_policy_lock));
1322 if (pol) { 1348 if (pol) {
@@ -1334,7 +1360,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1334 */ 1360 */
1335 xfrm_sk_policy_unlink(old_pol, dir); 1361 xfrm_sk_policy_unlink(old_pol, dir);
1336 } 1362 }
1337 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1363 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1338 1364
1339 if (old_pol) { 1365 if (old_pol) {
1340 xfrm_policy_kill(old_pol); 1366 xfrm_policy_kill(old_pol);
@@ -1364,9 +1390,9 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1364 newp->type = old->type; 1390 newp->type = old->type;
1365 memcpy(newp->xfrm_vec, old->xfrm_vec, 1391 memcpy(newp->xfrm_vec, old->xfrm_vec,
1366 newp->xfrm_nr*sizeof(struct xfrm_tmpl)); 1392 newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1367 write_lock_bh(&net->xfrm.xfrm_policy_lock); 1393 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
1368 xfrm_sk_policy_link(newp, dir); 1394 xfrm_sk_policy_link(newp, dir);
1369 write_unlock_bh(&net->xfrm.xfrm_policy_lock); 1395 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
1370 xfrm_pol_put(newp); 1396 xfrm_pol_put(newp);
1371 } 1397 }
1372 return newp; 1398 return newp;
@@ -3048,7 +3074,7 @@ static int __net_init xfrm_net_init(struct net *net)
3048 3074
3049 /* Initialize the per-net locks here */ 3075 /* Initialize the per-net locks here */
3050 spin_lock_init(&net->xfrm.xfrm_state_lock); 3076 spin_lock_init(&net->xfrm.xfrm_state_lock);
3051 rwlock_init(&net->xfrm.xfrm_policy_lock); 3077 spin_lock_init(&net->xfrm.xfrm_policy_lock);
3052 mutex_init(&net->xfrm.xfrm_cfg_mutex); 3078 mutex_init(&net->xfrm.xfrm_cfg_mutex);
3053 3079
3054 return 0; 3080 return 0;
@@ -3082,6 +3108,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
3082void __init xfrm_init(void) 3108void __init xfrm_init(void)
3083{ 3109{
3084 register_pernet_subsys(&xfrm_net_ops); 3110 register_pernet_subsys(&xfrm_net_ops);
3111 seqcount_init(&xfrm_policy_hash_generation);
3085 xfrm_input_init(); 3112 xfrm_input_init();
3086} 3113}
3087 3114
@@ -3179,7 +3206,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
3179 struct hlist_head *chain; 3206 struct hlist_head *chain;
3180 u32 priority = ~0U; 3207 u32 priority = ~0U;
3181 3208
3182 read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ 3209 spin_lock_bh(&net->xfrm.xfrm_policy_lock);
3183 chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); 3210 chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
3184 hlist_for_each_entry(pol, chain, bydst) { 3211 hlist_for_each_entry(pol, chain, bydst) {
3185 if (xfrm_migrate_selector_match(sel, &pol->selector) && 3212 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
@@ -3203,7 +3230,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
3203 3230
3204 xfrm_pol_hold(ret); 3231 xfrm_pol_hold(ret);
3205 3232
3206 read_unlock_bh(&net->xfrm.xfrm_policy_lock); 3233 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
3207 3234
3208 return ret; 3235 return ret;
3209} 3236}
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 4fd725a0c500..cdc2e2e71bff 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -558,7 +558,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
558 x->repl->notify(x, XFRM_REPLAY_UPDATE); 558 x->repl->notify(x, XFRM_REPLAY_UPDATE);
559} 559}
560 560
561static struct xfrm_replay xfrm_replay_legacy = { 561static const struct xfrm_replay xfrm_replay_legacy = {
562 .advance = xfrm_replay_advance, 562 .advance = xfrm_replay_advance,
563 .check = xfrm_replay_check, 563 .check = xfrm_replay_check,
564 .recheck = xfrm_replay_check, 564 .recheck = xfrm_replay_check,
@@ -566,7 +566,7 @@ static struct xfrm_replay xfrm_replay_legacy = {
566 .overflow = xfrm_replay_overflow, 566 .overflow = xfrm_replay_overflow,
567}; 567};
568 568
569static struct xfrm_replay xfrm_replay_bmp = { 569static const struct xfrm_replay xfrm_replay_bmp = {
570 .advance = xfrm_replay_advance_bmp, 570 .advance = xfrm_replay_advance_bmp,
571 .check = xfrm_replay_check_bmp, 571 .check = xfrm_replay_check_bmp,
572 .recheck = xfrm_replay_check_bmp, 572 .recheck = xfrm_replay_check_bmp,
@@ -574,7 +574,7 @@ static struct xfrm_replay xfrm_replay_bmp = {
574 .overflow = xfrm_replay_overflow_bmp, 574 .overflow = xfrm_replay_overflow_bmp,
575}; 575};
576 576
577static struct xfrm_replay xfrm_replay_esn = { 577static const struct xfrm_replay xfrm_replay_esn = {
578 .advance = xfrm_replay_advance_esn, 578 .advance = xfrm_replay_advance_esn,
579 .check = xfrm_replay_check_esn, 579 .check = xfrm_replay_check_esn,
580 .recheck = xfrm_replay_recheck_esn, 580 .recheck = xfrm_replay_recheck_esn,
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9895a8c56d8c..ba8bf518ba14 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -28,6 +28,11 @@
28 28
29#include "xfrm_hash.h" 29#include "xfrm_hash.h"
30 30
31#define xfrm_state_deref_prot(table, net) \
32 rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
33
34static void xfrm_state_gc_task(struct work_struct *work);
35
31/* Each xfrm_state may be linked to two tables: 36/* Each xfrm_state may be linked to two tables:
32 37
33 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) 38 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
@@ -36,6 +41,15 @@
36 */ 41 */
37 42
38static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; 43static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
44static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
45
46static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
47static HLIST_HEAD(xfrm_state_gc_list);
48
49static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
50{
51 return atomic_inc_not_zero(&x->refcnt);
52}
39 53
40static inline unsigned int xfrm_dst_hash(struct net *net, 54static inline unsigned int xfrm_dst_hash(struct net *net,
41 const xfrm_address_t *daddr, 55 const xfrm_address_t *daddr,
@@ -76,18 +90,18 @@ static void xfrm_hash_transfer(struct hlist_head *list,
76 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, 90 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
77 x->props.reqid, x->props.family, 91 x->props.reqid, x->props.family,
78 nhashmask); 92 nhashmask);
79 hlist_add_head(&x->bydst, ndsttable+h); 93 hlist_add_head_rcu(&x->bydst, ndsttable + h);
80 94
81 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr, 95 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
82 x->props.family, 96 x->props.family,
83 nhashmask); 97 nhashmask);
84 hlist_add_head(&x->bysrc, nsrctable+h); 98 hlist_add_head_rcu(&x->bysrc, nsrctable + h);
85 99
86 if (x->id.spi) { 100 if (x->id.spi) {
87 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, 101 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
88 x->id.proto, x->props.family, 102 x->id.proto, x->props.family,
89 nhashmask); 103 nhashmask);
90 hlist_add_head(&x->byspi, nspitable+h); 104 hlist_add_head_rcu(&x->byspi, nspitable + h);
91 } 105 }
92 } 106 }
93} 107}
@@ -122,25 +136,29 @@ static void xfrm_hash_resize(struct work_struct *work)
122 } 136 }
123 137
124 spin_lock_bh(&net->xfrm.xfrm_state_lock); 138 spin_lock_bh(&net->xfrm.xfrm_state_lock);
139 write_seqcount_begin(&xfrm_state_hash_generation);
125 140
126 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; 141 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
142 odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
127 for (i = net->xfrm.state_hmask; i >= 0; i--) 143 for (i = net->xfrm.state_hmask; i >= 0; i--)
128 xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi, 144 xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
129 nhashmask);
130 145
131 odst = net->xfrm.state_bydst; 146 osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
132 osrc = net->xfrm.state_bysrc; 147 ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
133 ospi = net->xfrm.state_byspi;
134 ohashmask = net->xfrm.state_hmask; 148 ohashmask = net->xfrm.state_hmask;
135 149
136 net->xfrm.state_bydst = ndst; 150 rcu_assign_pointer(net->xfrm.state_bydst, ndst);
137 net->xfrm.state_bysrc = nsrc; 151 rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
138 net->xfrm.state_byspi = nspi; 152 rcu_assign_pointer(net->xfrm.state_byspi, nspi);
139 net->xfrm.state_hmask = nhashmask; 153 net->xfrm.state_hmask = nhashmask;
140 154
155 write_seqcount_end(&xfrm_state_hash_generation);
141 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 156 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
142 157
143 osize = (ohashmask + 1) * sizeof(struct hlist_head); 158 osize = (ohashmask + 1) * sizeof(struct hlist_head);
159
160 synchronize_rcu();
161
144 xfrm_hash_free(odst, osize); 162 xfrm_hash_free(odst, osize);
145 xfrm_hash_free(osrc, osize); 163 xfrm_hash_free(osrc, osize);
146 xfrm_hash_free(ospi, osize); 164 xfrm_hash_free(ospi, osize);
@@ -355,15 +373,16 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
355 373
356static void xfrm_state_gc_task(struct work_struct *work) 374static void xfrm_state_gc_task(struct work_struct *work)
357{ 375{
358 struct net *net = container_of(work, struct net, xfrm.state_gc_work);
359 struct xfrm_state *x; 376 struct xfrm_state *x;
360 struct hlist_node *tmp; 377 struct hlist_node *tmp;
361 struct hlist_head gc_list; 378 struct hlist_head gc_list;
362 379
363 spin_lock_bh(&xfrm_state_gc_lock); 380 spin_lock_bh(&xfrm_state_gc_lock);
364 hlist_move_list(&net->xfrm.state_gc_list, &gc_list); 381 hlist_move_list(&xfrm_state_gc_list, &gc_list);
365 spin_unlock_bh(&xfrm_state_gc_lock); 382 spin_unlock_bh(&xfrm_state_gc_lock);
366 383
384 synchronize_rcu();
385
367 hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) 386 hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
368 xfrm_state_gc_destroy(x); 387 xfrm_state_gc_destroy(x);
369} 388}
@@ -500,14 +519,12 @@ EXPORT_SYMBOL(xfrm_state_alloc);
500 519
501void __xfrm_state_destroy(struct xfrm_state *x) 520void __xfrm_state_destroy(struct xfrm_state *x)
502{ 521{
503 struct net *net = xs_net(x);
504
505 WARN_ON(x->km.state != XFRM_STATE_DEAD); 522 WARN_ON(x->km.state != XFRM_STATE_DEAD);
506 523
507 spin_lock_bh(&xfrm_state_gc_lock); 524 spin_lock_bh(&xfrm_state_gc_lock);
508 hlist_add_head(&x->gclist, &net->xfrm.state_gc_list); 525 hlist_add_head(&x->gclist, &xfrm_state_gc_list);
509 spin_unlock_bh(&xfrm_state_gc_lock); 526 spin_unlock_bh(&xfrm_state_gc_lock);
510 schedule_work(&net->xfrm.state_gc_work); 527 schedule_work(&xfrm_state_gc_work);
511} 528}
512EXPORT_SYMBOL(__xfrm_state_destroy); 529EXPORT_SYMBOL(__xfrm_state_destroy);
513 530
@@ -520,10 +537,10 @@ int __xfrm_state_delete(struct xfrm_state *x)
520 x->km.state = XFRM_STATE_DEAD; 537 x->km.state = XFRM_STATE_DEAD;
521 spin_lock(&net->xfrm.xfrm_state_lock); 538 spin_lock(&net->xfrm.xfrm_state_lock);
522 list_del(&x->km.all); 539 list_del(&x->km.all);
523 hlist_del(&x->bydst); 540 hlist_del_rcu(&x->bydst);
524 hlist_del(&x->bysrc); 541 hlist_del_rcu(&x->bysrc);
525 if (x->id.spi) 542 if (x->id.spi)
526 hlist_del(&x->byspi); 543 hlist_del_rcu(&x->byspi);
527 net->xfrm.state_num--; 544 net->xfrm.state_num--;
528 spin_unlock(&net->xfrm.xfrm_state_lock); 545 spin_unlock(&net->xfrm.xfrm_state_lock);
529 546
@@ -659,7 +676,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
659 unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); 676 unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
660 struct xfrm_state *x; 677 struct xfrm_state *x;
661 678
662 hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) { 679 hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
663 if (x->props.family != family || 680 if (x->props.family != family ||
664 x->id.spi != spi || 681 x->id.spi != spi ||
665 x->id.proto != proto || 682 x->id.proto != proto ||
@@ -668,7 +685,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
668 685
669 if ((mark & x->mark.m) != x->mark.v) 686 if ((mark & x->mark.m) != x->mark.v)
670 continue; 687 continue;
671 xfrm_state_hold(x); 688 if (!xfrm_state_hold_rcu(x))
689 continue;
672 return x; 690 return x;
673 } 691 }
674 692
@@ -683,7 +701,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
683 unsigned int h = xfrm_src_hash(net, daddr, saddr, family); 701 unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
684 struct xfrm_state *x; 702 struct xfrm_state *x;
685 703
686 hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { 704 hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
687 if (x->props.family != family || 705 if (x->props.family != family ||
688 x->id.proto != proto || 706 x->id.proto != proto ||
689 !xfrm_addr_equal(&x->id.daddr, daddr, family) || 707 !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
@@ -692,7 +710,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
692 710
693 if ((mark & x->mark.m) != x->mark.v) 711 if ((mark & x->mark.m) != x->mark.v)
694 continue; 712 continue;
695 xfrm_state_hold(x); 713 if (!xfrm_state_hold_rcu(x))
714 continue;
696 return x; 715 return x;
697 } 716 }
698 717
@@ -775,13 +794,16 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
775 struct xfrm_state *best = NULL; 794 struct xfrm_state *best = NULL;
776 u32 mark = pol->mark.v & pol->mark.m; 795 u32 mark = pol->mark.v & pol->mark.m;
777 unsigned short encap_family = tmpl->encap_family; 796 unsigned short encap_family = tmpl->encap_family;
797 unsigned int sequence;
778 struct km_event c; 798 struct km_event c;
779 799
780 to_put = NULL; 800 to_put = NULL;
781 801
782 spin_lock_bh(&net->xfrm.xfrm_state_lock); 802 sequence = read_seqcount_begin(&xfrm_state_hash_generation);
803
804 rcu_read_lock();
783 h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); 805 h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
784 hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { 806 hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
785 if (x->props.family == encap_family && 807 if (x->props.family == encap_family &&
786 x->props.reqid == tmpl->reqid && 808 x->props.reqid == tmpl->reqid &&
787 (mark & x->mark.m) == x->mark.v && 809 (mark & x->mark.m) == x->mark.v &&
@@ -797,7 +819,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
797 goto found; 819 goto found;
798 820
799 h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); 821 h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
800 hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) { 822 hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
801 if (x->props.family == encap_family && 823 if (x->props.family == encap_family &&
802 x->props.reqid == tmpl->reqid && 824 x->props.reqid == tmpl->reqid &&
803 (mark & x->mark.m) == x->mark.v && 825 (mark & x->mark.m) == x->mark.v &&
@@ -850,19 +872,21 @@ found:
850 } 872 }
851 873
852 if (km_query(x, tmpl, pol) == 0) { 874 if (km_query(x, tmpl, pol) == 0) {
875 spin_lock_bh(&net->xfrm.xfrm_state_lock);
853 x->km.state = XFRM_STATE_ACQ; 876 x->km.state = XFRM_STATE_ACQ;
854 list_add(&x->km.all, &net->xfrm.state_all); 877 list_add(&x->km.all, &net->xfrm.state_all);
855 hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); 878 hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
856 h = xfrm_src_hash(net, daddr, saddr, encap_family); 879 h = xfrm_src_hash(net, daddr, saddr, encap_family);
857 hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); 880 hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
858 if (x->id.spi) { 881 if (x->id.spi) {
859 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); 882 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
860 hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); 883 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
861 } 884 }
862 x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; 885 x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
863 tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); 886 tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
864 net->xfrm.state_num++; 887 net->xfrm.state_num++;
865 xfrm_hash_grow_check(net, x->bydst.next != NULL); 888 xfrm_hash_grow_check(net, x->bydst.next != NULL);
889 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
866 } else { 890 } else {
867 x->km.state = XFRM_STATE_DEAD; 891 x->km.state = XFRM_STATE_DEAD;
868 to_put = x; 892 to_put = x;
@@ -871,13 +895,26 @@ found:
871 } 895 }
872 } 896 }
873out: 897out:
874 if (x) 898 if (x) {
875 xfrm_state_hold(x); 899 if (!xfrm_state_hold_rcu(x)) {
876 else 900 *err = -EAGAIN;
901 x = NULL;
902 }
903 } else {
877 *err = acquire_in_progress ? -EAGAIN : error; 904 *err = acquire_in_progress ? -EAGAIN : error;
878 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 905 }
906 rcu_read_unlock();
879 if (to_put) 907 if (to_put)
880 xfrm_state_put(to_put); 908 xfrm_state_put(to_put);
909
910 if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
911 *err = -EAGAIN;
912 if (x) {
913 xfrm_state_put(x);
914 x = NULL;
915 }
916 }
917
881 return x; 918 return x;
882} 919}
883 920
@@ -945,16 +982,16 @@ static void __xfrm_state_insert(struct xfrm_state *x)
945 982
946 h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, 983 h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
947 x->props.reqid, x->props.family); 984 x->props.reqid, x->props.family);
948 hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); 985 hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
949 986
950 h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); 987 h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
951 hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); 988 hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
952 989
953 if (x->id.spi) { 990 if (x->id.spi) {
954 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, 991 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
955 x->props.family); 992 x->props.family);
956 993
957 hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); 994 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
958 } 995 }
959 996
960 tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); 997 tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
@@ -1063,9 +1100,9 @@ static struct xfrm_state *__find_acq_core(struct net *net,
1063 xfrm_state_hold(x); 1100 xfrm_state_hold(x);
1064 tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); 1101 tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
1065 list_add(&x->km.all, &net->xfrm.state_all); 1102 list_add(&x->km.all, &net->xfrm.state_all);
1066 hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); 1103 hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1067 h = xfrm_src_hash(net, daddr, saddr, family); 1104 h = xfrm_src_hash(net, daddr, saddr, family);
1068 hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); 1105 hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1069 1106
1070 net->xfrm.state_num++; 1107 net->xfrm.state_num++;
1071 1108
@@ -1581,7 +1618,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
1581 if (x->id.spi) { 1618 if (x->id.spi) {
1582 spin_lock_bh(&net->xfrm.xfrm_state_lock); 1619 spin_lock_bh(&net->xfrm.xfrm_state_lock);
1583 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); 1620 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1584 hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); 1621 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1585 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 1622 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1586 1623
1587 err = 0; 1624 err = 0;
@@ -2099,8 +2136,6 @@ int __net_init xfrm_state_init(struct net *net)
2099 2136
2100 net->xfrm.state_num = 0; 2137 net->xfrm.state_num = 0;
2101 INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); 2138 INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
2102 INIT_HLIST_HEAD(&net->xfrm.state_gc_list);
2103 INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task);
2104 spin_lock_init(&net->xfrm.xfrm_state_lock); 2139 spin_lock_init(&net->xfrm.xfrm_state_lock);
2105 return 0; 2140 return 0;
2106 2141
@@ -2118,7 +2153,7 @@ void xfrm_state_fini(struct net *net)
2118 2153
2119 flush_work(&net->xfrm.state_hash_work); 2154 flush_work(&net->xfrm.state_hash_work);
2120 xfrm_state_flush(net, IPSEC_PROTO_ANY, false); 2155 xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
2121 flush_work(&net->xfrm.state_gc_work); 2156 flush_work(&xfrm_state_gc_work);
2122 2157
2123 WARN_ON(!list_empty(&net->xfrm.state_all)); 2158 WARN_ON(!list_empty(&net->xfrm.state_all));
2124 2159