diff options
Diffstat (limited to 'net/netlink/af_netlink.c')
-rw-r--r-- | net/netlink/af_netlink.c | 313 |
1 files changed, 120 insertions, 193 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index e6fac7e3db52..c416725d28c4 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -58,7 +58,9 @@ | |||
58 | #include <linux/mutex.h> | 58 | #include <linux/mutex.h> |
59 | #include <linux/vmalloc.h> | 59 | #include <linux/vmalloc.h> |
60 | #include <linux/if_arp.h> | 60 | #include <linux/if_arp.h> |
61 | #include <linux/rhashtable.h> | ||
61 | #include <asm/cacheflush.h> | 62 | #include <asm/cacheflush.h> |
63 | #include <linux/hash.h> | ||
62 | 64 | ||
63 | #include <net/net_namespace.h> | 65 | #include <net/net_namespace.h> |
64 | #include <net/sock.h> | 66 | #include <net/sock.h> |
@@ -100,6 +102,19 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); | |||
100 | 102 | ||
101 | #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); | 103 | #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); |
102 | 104 | ||
105 | /* Protects netlink socket hash table mutations */ | ||
106 | DEFINE_MUTEX(nl_sk_hash_lock); | ||
107 | EXPORT_SYMBOL_GPL(nl_sk_hash_lock); | ||
108 | |||
109 | static int lockdep_nl_sk_hash_is_held(void) | ||
110 | { | ||
111 | #ifdef CONFIG_LOCKDEP | ||
112 | return (debug_locks) ? lockdep_is_held(&nl_sk_hash_lock) : 1; | ||
113 | #else | ||
114 | return 1; | ||
115 | #endif | ||
116 | } | ||
117 | |||
103 | static ATOMIC_NOTIFIER_HEAD(netlink_chain); | 118 | static ATOMIC_NOTIFIER_HEAD(netlink_chain); |
104 | 119 | ||
105 | static DEFINE_SPINLOCK(netlink_tap_lock); | 120 | static DEFINE_SPINLOCK(netlink_tap_lock); |
@@ -110,11 +125,6 @@ static inline u32 netlink_group_mask(u32 group) | |||
110 | return group ? 1 << (group - 1) : 0; | 125 | return group ? 1 << (group - 1) : 0; |
111 | } | 126 | } |
112 | 127 | ||
113 | static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid) | ||
114 | { | ||
115 | return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; | ||
116 | } | ||
117 | |||
118 | int netlink_add_tap(struct netlink_tap *nt) | 128 | int netlink_add_tap(struct netlink_tap *nt) |
119 | { | 129 | { |
120 | if (unlikely(nt->dev->type != ARPHRD_NETLINK)) | 130 | if (unlikely(nt->dev->type != ARPHRD_NETLINK)) |
@@ -170,7 +180,6 @@ EXPORT_SYMBOL_GPL(netlink_remove_tap); | |||
170 | static bool netlink_filter_tap(const struct sk_buff *skb) | 180 | static bool netlink_filter_tap(const struct sk_buff *skb) |
171 | { | 181 | { |
172 | struct sock *sk = skb->sk; | 182 | struct sock *sk = skb->sk; |
173 | bool pass = false; | ||
174 | 183 | ||
175 | /* We take the more conservative approach and | 184 | /* We take the more conservative approach and |
176 | * whitelist socket protocols that may pass. | 185 | * whitelist socket protocols that may pass. |
@@ -184,11 +193,10 @@ static bool netlink_filter_tap(const struct sk_buff *skb) | |||
184 | case NETLINK_FIB_LOOKUP: | 193 | case NETLINK_FIB_LOOKUP: |
185 | case NETLINK_NETFILTER: | 194 | case NETLINK_NETFILTER: |
186 | case NETLINK_GENERIC: | 195 | case NETLINK_GENERIC: |
187 | pass = true; | 196 | return true; |
188 | break; | ||
189 | } | 197 | } |
190 | 198 | ||
191 | return pass; | 199 | return false; |
192 | } | 200 | } |
193 | 201 | ||
194 | static int __netlink_deliver_tap_skb(struct sk_buff *skb, | 202 | static int __netlink_deliver_tap_skb(struct sk_buff *skb, |
@@ -205,7 +213,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb, | |||
205 | nskb->protocol = htons((u16) sk->sk_protocol); | 213 | nskb->protocol = htons((u16) sk->sk_protocol); |
206 | nskb->pkt_type = netlink_is_kernel(sk) ? | 214 | nskb->pkt_type = netlink_is_kernel(sk) ? |
207 | PACKET_KERNEL : PACKET_USER; | 215 | PACKET_KERNEL : PACKET_USER; |
208 | 216 | skb_reset_network_header(nskb); | |
209 | ret = dev_queue_xmit(nskb); | 217 | ret = dev_queue_xmit(nskb); |
210 | if (unlikely(ret > 0)) | 218 | if (unlikely(ret > 0)) |
211 | ret = net_xmit_errno(ret); | 219 | ret = net_xmit_errno(ret); |
@@ -376,7 +384,7 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, | |||
376 | 384 | ||
377 | if ((int)req->nm_block_size <= 0) | 385 | if ((int)req->nm_block_size <= 0) |
378 | return -EINVAL; | 386 | return -EINVAL; |
379 | if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE)) | 387 | if (!PAGE_ALIGNED(req->nm_block_size)) |
380 | return -EINVAL; | 388 | return -EINVAL; |
381 | if (req->nm_frame_size < NL_MMAP_HDRLEN) | 389 | if (req->nm_frame_size < NL_MMAP_HDRLEN) |
382 | return -EINVAL; | 390 | return -EINVAL; |
@@ -985,105 +993,48 @@ netlink_unlock_table(void) | |||
985 | wake_up(&nl_table_wait); | 993 | wake_up(&nl_table_wait); |
986 | } | 994 | } |
987 | 995 | ||
988 | static bool netlink_compare(struct net *net, struct sock *sk) | 996 | struct netlink_compare_arg |
989 | { | 997 | { |
990 | return net_eq(sock_net(sk), net); | 998 | struct net *net; |
991 | } | 999 | u32 portid; |
992 | 1000 | }; | |
993 | static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) | ||
994 | { | ||
995 | struct netlink_table *table = &nl_table[protocol]; | ||
996 | struct nl_portid_hash *hash = &table->hash; | ||
997 | struct hlist_head *head; | ||
998 | struct sock *sk; | ||
999 | |||
1000 | read_lock(&nl_table_lock); | ||
1001 | head = nl_portid_hashfn(hash, portid); | ||
1002 | sk_for_each(sk, head) { | ||
1003 | if (table->compare(net, sk) && | ||
1004 | (nlk_sk(sk)->portid == portid)) { | ||
1005 | sock_hold(sk); | ||
1006 | goto found; | ||
1007 | } | ||
1008 | } | ||
1009 | sk = NULL; | ||
1010 | found: | ||
1011 | read_unlock(&nl_table_lock); | ||
1012 | return sk; | ||
1013 | } | ||
1014 | 1001 | ||
1015 | static struct hlist_head *nl_portid_hash_zalloc(size_t size) | 1002 | static bool netlink_compare(void *ptr, void *arg) |
1016 | { | 1003 | { |
1017 | if (size <= PAGE_SIZE) | 1004 | struct netlink_compare_arg *x = arg; |
1018 | return kzalloc(size, GFP_ATOMIC); | 1005 | struct sock *sk = ptr; |
1019 | else | ||
1020 | return (struct hlist_head *) | ||
1021 | __get_free_pages(GFP_ATOMIC | __GFP_ZERO, | ||
1022 | get_order(size)); | ||
1023 | } | ||
1024 | 1006 | ||
1025 | static void nl_portid_hash_free(struct hlist_head *table, size_t size) | 1007 | return nlk_sk(sk)->portid == x->portid && |
1026 | { | 1008 | net_eq(sock_net(sk), x->net); |
1027 | if (size <= PAGE_SIZE) | ||
1028 | kfree(table); | ||
1029 | else | ||
1030 | free_pages((unsigned long)table, get_order(size)); | ||
1031 | } | 1009 | } |
1032 | 1010 | ||
1033 | static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow) | 1011 | static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid, |
1012 | struct net *net) | ||
1034 | { | 1013 | { |
1035 | unsigned int omask, mask, shift; | 1014 | struct netlink_compare_arg arg = { |
1036 | size_t osize, size; | 1015 | .net = net, |
1037 | struct hlist_head *otable, *table; | 1016 | .portid = portid, |
1038 | int i; | 1017 | }; |
1039 | 1018 | u32 hash; | |
1040 | omask = mask = hash->mask; | ||
1041 | osize = size = (mask + 1) * sizeof(*table); | ||
1042 | shift = hash->shift; | ||
1043 | |||
1044 | if (grow) { | ||
1045 | if (++shift > hash->max_shift) | ||
1046 | return 0; | ||
1047 | mask = mask * 2 + 1; | ||
1048 | size *= 2; | ||
1049 | } | ||
1050 | |||
1051 | table = nl_portid_hash_zalloc(size); | ||
1052 | if (!table) | ||
1053 | return 0; | ||
1054 | |||
1055 | otable = hash->table; | ||
1056 | hash->table = table; | ||
1057 | hash->mask = mask; | ||
1058 | hash->shift = shift; | ||
1059 | get_random_bytes(&hash->rnd, sizeof(hash->rnd)); | ||
1060 | 1019 | ||
1061 | for (i = 0; i <= omask; i++) { | 1020 | hash = rhashtable_hashfn(&table->hash, &portid, sizeof(portid)); |
1062 | struct sock *sk; | ||
1063 | struct hlist_node *tmp; | ||
1064 | |||
1065 | sk_for_each_safe(sk, tmp, &otable[i]) | ||
1066 | __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid)); | ||
1067 | } | ||
1068 | 1021 | ||
1069 | nl_portid_hash_free(otable, osize); | 1022 | return rhashtable_lookup_compare(&table->hash, hash, |
1070 | hash->rehash_time = jiffies + 10 * 60 * HZ; | 1023 | &netlink_compare, &arg); |
1071 | return 1; | ||
1072 | } | 1024 | } |
1073 | 1025 | ||
1074 | static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len) | 1026 | static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) |
1075 | { | 1027 | { |
1076 | int avg = hash->entries >> hash->shift; | 1028 | struct netlink_table *table = &nl_table[protocol]; |
1077 | 1029 | struct sock *sk; | |
1078 | if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1)) | ||
1079 | return 1; | ||
1080 | 1030 | ||
1081 | if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { | 1031 | rcu_read_lock(); |
1082 | nl_portid_hash_rehash(hash, 0); | 1032 | sk = __netlink_lookup(table, portid, net); |
1083 | return 1; | 1033 | if (sk) |
1084 | } | 1034 | sock_hold(sk); |
1035 | rcu_read_unlock(); | ||
1085 | 1036 | ||
1086 | return 0; | 1037 | return sk; |
1087 | } | 1038 | } |
1088 | 1039 | ||
1089 | static const struct proto_ops netlink_ops; | 1040 | static const struct proto_ops netlink_ops; |
@@ -1115,22 +1066,10 @@ netlink_update_listeners(struct sock *sk) | |||
1115 | static int netlink_insert(struct sock *sk, struct net *net, u32 portid) | 1066 | static int netlink_insert(struct sock *sk, struct net *net, u32 portid) |
1116 | { | 1067 | { |
1117 | struct netlink_table *table = &nl_table[sk->sk_protocol]; | 1068 | struct netlink_table *table = &nl_table[sk->sk_protocol]; |
1118 | struct nl_portid_hash *hash = &table->hash; | ||
1119 | struct hlist_head *head; | ||
1120 | int err = -EADDRINUSE; | 1069 | int err = -EADDRINUSE; |
1121 | struct sock *osk; | ||
1122 | int len; | ||
1123 | 1070 | ||
1124 | netlink_table_grab(); | 1071 | mutex_lock(&nl_sk_hash_lock); |
1125 | head = nl_portid_hashfn(hash, portid); | 1072 | if (__netlink_lookup(table, portid, net)) |
1126 | len = 0; | ||
1127 | sk_for_each(osk, head) { | ||
1128 | if (table->compare(net, osk) && | ||
1129 | (nlk_sk(osk)->portid == portid)) | ||
1130 | break; | ||
1131 | len++; | ||
1132 | } | ||
1133 | if (osk) | ||
1134 | goto err; | 1073 | goto err; |
1135 | 1074 | ||
1136 | err = -EBUSY; | 1075 | err = -EBUSY; |
@@ -1138,26 +1077,31 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) | |||
1138 | goto err; | 1077 | goto err; |
1139 | 1078 | ||
1140 | err = -ENOMEM; | 1079 | err = -ENOMEM; |
1141 | if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) | 1080 | if (BITS_PER_LONG > 32 && unlikely(table->hash.nelems >= UINT_MAX)) |
1142 | goto err; | 1081 | goto err; |
1143 | 1082 | ||
1144 | if (len && nl_portid_hash_dilute(hash, len)) | ||
1145 | head = nl_portid_hashfn(hash, portid); | ||
1146 | hash->entries++; | ||
1147 | nlk_sk(sk)->portid = portid; | 1083 | nlk_sk(sk)->portid = portid; |
1148 | sk_add_node(sk, head); | 1084 | sock_hold(sk); |
1085 | rhashtable_insert(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL); | ||
1149 | err = 0; | 1086 | err = 0; |
1150 | |||
1151 | err: | 1087 | err: |
1152 | netlink_table_ungrab(); | 1088 | mutex_unlock(&nl_sk_hash_lock); |
1153 | return err; | 1089 | return err; |
1154 | } | 1090 | } |
1155 | 1091 | ||
1156 | static void netlink_remove(struct sock *sk) | 1092 | static void netlink_remove(struct sock *sk) |
1157 | { | 1093 | { |
1094 | struct netlink_table *table; | ||
1095 | |||
1096 | mutex_lock(&nl_sk_hash_lock); | ||
1097 | table = &nl_table[sk->sk_protocol]; | ||
1098 | if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL)) { | ||
1099 | WARN_ON(atomic_read(&sk->sk_refcnt) == 1); | ||
1100 | __sock_put(sk); | ||
1101 | } | ||
1102 | mutex_unlock(&nl_sk_hash_lock); | ||
1103 | |||
1158 | netlink_table_grab(); | 1104 | netlink_table_grab(); |
1159 | if (sk_del_node_init(sk)) | ||
1160 | nl_table[sk->sk_protocol].hash.entries--; | ||
1161 | if (nlk_sk(sk)->subscriptions) | 1105 | if (nlk_sk(sk)->subscriptions) |
1162 | __sk_del_bind_node(sk); | 1106 | __sk_del_bind_node(sk); |
1163 | netlink_table_ungrab(); | 1107 | netlink_table_ungrab(); |
@@ -1313,6 +1257,9 @@ static int netlink_release(struct socket *sock) | |||
1313 | } | 1257 | } |
1314 | netlink_table_ungrab(); | 1258 | netlink_table_ungrab(); |
1315 | 1259 | ||
1260 | /* Wait for readers to complete */ | ||
1261 | synchronize_net(); | ||
1262 | |||
1316 | kfree(nlk->groups); | 1263 | kfree(nlk->groups); |
1317 | nlk->groups = NULL; | 1264 | nlk->groups = NULL; |
1318 | 1265 | ||
@@ -1328,30 +1275,22 @@ static int netlink_autobind(struct socket *sock) | |||
1328 | struct sock *sk = sock->sk; | 1275 | struct sock *sk = sock->sk; |
1329 | struct net *net = sock_net(sk); | 1276 | struct net *net = sock_net(sk); |
1330 | struct netlink_table *table = &nl_table[sk->sk_protocol]; | 1277 | struct netlink_table *table = &nl_table[sk->sk_protocol]; |
1331 | struct nl_portid_hash *hash = &table->hash; | ||
1332 | struct hlist_head *head; | ||
1333 | struct sock *osk; | ||
1334 | s32 portid = task_tgid_vnr(current); | 1278 | s32 portid = task_tgid_vnr(current); |
1335 | int err; | 1279 | int err; |
1336 | static s32 rover = -4097; | 1280 | static s32 rover = -4097; |
1337 | 1281 | ||
1338 | retry: | 1282 | retry: |
1339 | cond_resched(); | 1283 | cond_resched(); |
1340 | netlink_table_grab(); | 1284 | rcu_read_lock(); |
1341 | head = nl_portid_hashfn(hash, portid); | 1285 | if (__netlink_lookup(table, portid, net)) { |
1342 | sk_for_each(osk, head) { | 1286 | /* Bind collision, search negative portid values. */ |
1343 | if (!table->compare(net, osk)) | 1287 | portid = rover--; |
1344 | continue; | 1288 | if (rover > -4097) |
1345 | if (nlk_sk(osk)->portid == portid) { | 1289 | rover = -4097; |
1346 | /* Bind collision, search negative portid values. */ | 1290 | rcu_read_unlock(); |
1347 | portid = rover--; | 1291 | goto retry; |
1348 | if (rover > -4097) | ||
1349 | rover = -4097; | ||
1350 | netlink_table_ungrab(); | ||
1351 | goto retry; | ||
1352 | } | ||
1353 | } | 1292 | } |
1354 | netlink_table_ungrab(); | 1293 | rcu_read_unlock(); |
1355 | 1294 | ||
1356 | err = netlink_insert(sk, net, portid); | 1295 | err = netlink_insert(sk, net, portid); |
1357 | if (err == -EADDRINUSE) | 1296 | if (err == -EADDRINUSE) |
@@ -1961,25 +1900,25 @@ struct netlink_broadcast_data { | |||
1961 | void *tx_data; | 1900 | void *tx_data; |
1962 | }; | 1901 | }; |
1963 | 1902 | ||
1964 | static int do_one_broadcast(struct sock *sk, | 1903 | static void do_one_broadcast(struct sock *sk, |
1965 | struct netlink_broadcast_data *p) | 1904 | struct netlink_broadcast_data *p) |
1966 | { | 1905 | { |
1967 | struct netlink_sock *nlk = nlk_sk(sk); | 1906 | struct netlink_sock *nlk = nlk_sk(sk); |
1968 | int val; | 1907 | int val; |
1969 | 1908 | ||
1970 | if (p->exclude_sk == sk) | 1909 | if (p->exclude_sk == sk) |
1971 | goto out; | 1910 | return; |
1972 | 1911 | ||
1973 | if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || | 1912 | if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || |
1974 | !test_bit(p->group - 1, nlk->groups)) | 1913 | !test_bit(p->group - 1, nlk->groups)) |
1975 | goto out; | 1914 | return; |
1976 | 1915 | ||
1977 | if (!net_eq(sock_net(sk), p->net)) | 1916 | if (!net_eq(sock_net(sk), p->net)) |
1978 | goto out; | 1917 | return; |
1979 | 1918 | ||
1980 | if (p->failure) { | 1919 | if (p->failure) { |
1981 | netlink_overrun(sk); | 1920 | netlink_overrun(sk); |
1982 | goto out; | 1921 | return; |
1983 | } | 1922 | } |
1984 | 1923 | ||
1985 | sock_hold(sk); | 1924 | sock_hold(sk); |
@@ -2017,9 +1956,6 @@ static int do_one_broadcast(struct sock *sk, | |||
2017 | p->skb2 = NULL; | 1956 | p->skb2 = NULL; |
2018 | } | 1957 | } |
2019 | sock_put(sk); | 1958 | sock_put(sk); |
2020 | |||
2021 | out: | ||
2022 | return 0; | ||
2023 | } | 1959 | } |
2024 | 1960 | ||
2025 | int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, | 1961 | int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, |
@@ -2958,14 +2894,18 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) | |||
2958 | { | 2894 | { |
2959 | struct nl_seq_iter *iter = seq->private; | 2895 | struct nl_seq_iter *iter = seq->private; |
2960 | int i, j; | 2896 | int i, j; |
2897 | struct netlink_sock *nlk; | ||
2961 | struct sock *s; | 2898 | struct sock *s; |
2962 | loff_t off = 0; | 2899 | loff_t off = 0; |
2963 | 2900 | ||
2964 | for (i = 0; i < MAX_LINKS; i++) { | 2901 | for (i = 0; i < MAX_LINKS; i++) { |
2965 | struct nl_portid_hash *hash = &nl_table[i].hash; | 2902 | struct rhashtable *ht = &nl_table[i].hash; |
2903 | const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); | ||
2904 | |||
2905 | for (j = 0; j < tbl->size; j++) { | ||
2906 | rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) { | ||
2907 | s = (struct sock *)nlk; | ||
2966 | 2908 | ||
2967 | for (j = 0; j <= hash->mask; j++) { | ||
2968 | sk_for_each(s, &hash->table[j]) { | ||
2969 | if (sock_net(s) != seq_file_net(seq)) | 2909 | if (sock_net(s) != seq_file_net(seq)) |
2970 | continue; | 2910 | continue; |
2971 | if (off == pos) { | 2911 | if (off == pos) { |
@@ -2981,15 +2921,15 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) | |||
2981 | } | 2921 | } |
2982 | 2922 | ||
2983 | static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) | 2923 | static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) |
2984 | __acquires(nl_table_lock) | 2924 | __acquires(RCU) |
2985 | { | 2925 | { |
2986 | read_lock(&nl_table_lock); | 2926 | rcu_read_lock(); |
2987 | return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; | 2927 | return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; |
2988 | } | 2928 | } |
2989 | 2929 | ||
2990 | static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2930 | static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
2991 | { | 2931 | { |
2992 | struct sock *s; | 2932 | struct netlink_sock *nlk; |
2993 | struct nl_seq_iter *iter; | 2933 | struct nl_seq_iter *iter; |
2994 | struct net *net; | 2934 | struct net *net; |
2995 | int i, j; | 2935 | int i, j; |
@@ -3001,28 +2941,26 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
3001 | 2941 | ||
3002 | net = seq_file_net(seq); | 2942 | net = seq_file_net(seq); |
3003 | iter = seq->private; | 2943 | iter = seq->private; |
3004 | s = v; | 2944 | nlk = v; |
3005 | do { | 2945 | |
3006 | s = sk_next(s); | 2946 | rht_for_each_entry_rcu(nlk, nlk->node.next, node) |
3007 | } while (s && !nl_table[s->sk_protocol].compare(net, s)); | 2947 | if (net_eq(sock_net((struct sock *)nlk), net)) |
3008 | if (s) | 2948 | return nlk; |
3009 | return s; | ||
3010 | 2949 | ||
3011 | i = iter->link; | 2950 | i = iter->link; |
3012 | j = iter->hash_idx + 1; | 2951 | j = iter->hash_idx + 1; |
3013 | 2952 | ||
3014 | do { | 2953 | do { |
3015 | struct nl_portid_hash *hash = &nl_table[i].hash; | 2954 | struct rhashtable *ht = &nl_table[i].hash; |
3016 | 2955 | const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); | |
3017 | for (; j <= hash->mask; j++) { | ||
3018 | s = sk_head(&hash->table[j]); | ||
3019 | 2956 | ||
3020 | while (s && !nl_table[s->sk_protocol].compare(net, s)) | 2957 | for (; j < tbl->size; j++) { |
3021 | s = sk_next(s); | 2958 | rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) { |
3022 | if (s) { | 2959 | if (net_eq(sock_net((struct sock *)nlk), net)) { |
3023 | iter->link = i; | 2960 | iter->link = i; |
3024 | iter->hash_idx = j; | 2961 | iter->hash_idx = j; |
3025 | return s; | 2962 | return nlk; |
2963 | } | ||
3026 | } | 2964 | } |
3027 | } | 2965 | } |
3028 | 2966 | ||
@@ -3033,9 +2971,9 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
3033 | } | 2971 | } |
3034 | 2972 | ||
3035 | static void netlink_seq_stop(struct seq_file *seq, void *v) | 2973 | static void netlink_seq_stop(struct seq_file *seq, void *v) |
3036 | __releases(nl_table_lock) | 2974 | __releases(RCU) |
3037 | { | 2975 | { |
3038 | read_unlock(&nl_table_lock); | 2976 | rcu_read_unlock(); |
3039 | } | 2977 | } |
3040 | 2978 | ||
3041 | 2979 | ||
@@ -3173,9 +3111,17 @@ static struct pernet_operations __net_initdata netlink_net_ops = { | |||
3173 | static int __init netlink_proto_init(void) | 3111 | static int __init netlink_proto_init(void) |
3174 | { | 3112 | { |
3175 | int i; | 3113 | int i; |
3176 | unsigned long limit; | ||
3177 | unsigned int order; | ||
3178 | int err = proto_register(&netlink_proto, 0); | 3114 | int err = proto_register(&netlink_proto, 0); |
3115 | struct rhashtable_params ht_params = { | ||
3116 | .head_offset = offsetof(struct netlink_sock, node), | ||
3117 | .key_offset = offsetof(struct netlink_sock, portid), | ||
3118 | .key_len = sizeof(u32), /* portid */ | ||
3119 | .hashfn = arch_fast_hash, | ||
3120 | .max_shift = 16, /* 64K */ | ||
3121 | .grow_decision = rht_grow_above_75, | ||
3122 | .shrink_decision = rht_shrink_below_30, | ||
3123 | .mutex_is_held = lockdep_nl_sk_hash_is_held, | ||
3124 | }; | ||
3179 | 3125 | ||
3180 | if (err != 0) | 3126 | if (err != 0) |
3181 | goto out; | 3127 | goto out; |
@@ -3186,32 +3132,13 @@ static int __init netlink_proto_init(void) | |||
3186 | if (!nl_table) | 3132 | if (!nl_table) |
3187 | goto panic; | 3133 | goto panic; |
3188 | 3134 | ||
3189 | if (totalram_pages >= (128 * 1024)) | ||
3190 | limit = totalram_pages >> (21 - PAGE_SHIFT); | ||
3191 | else | ||
3192 | limit = totalram_pages >> (23 - PAGE_SHIFT); | ||
3193 | |||
3194 | order = get_bitmask_order(limit) - 1 + PAGE_SHIFT; | ||
3195 | limit = (1UL << order) / sizeof(struct hlist_head); | ||
3196 | order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; | ||
3197 | |||
3198 | for (i = 0; i < MAX_LINKS; i++) { | 3135 | for (i = 0; i < MAX_LINKS; i++) { |
3199 | struct nl_portid_hash *hash = &nl_table[i].hash; | 3136 | if (rhashtable_init(&nl_table[i].hash, &ht_params) < 0) { |
3200 | 3137 | while (--i > 0) | |
3201 | hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table)); | 3138 | rhashtable_destroy(&nl_table[i].hash); |
3202 | if (!hash->table) { | ||
3203 | while (i-- > 0) | ||
3204 | nl_portid_hash_free(nl_table[i].hash.table, | ||
3205 | 1 * sizeof(*hash->table)); | ||
3206 | kfree(nl_table); | 3139 | kfree(nl_table); |
3207 | goto panic; | 3140 | goto panic; |
3208 | } | 3141 | } |
3209 | hash->max_shift = order; | ||
3210 | hash->shift = 0; | ||
3211 | hash->mask = 0; | ||
3212 | hash->rehash_time = jiffies; | ||
3213 | |||
3214 | nl_table[i].compare = netlink_compare; | ||
3215 | } | 3142 | } |
3216 | 3143 | ||
3217 | INIT_LIST_HEAD(&netlink_tap_all); | 3144 | INIT_LIST_HEAD(&netlink_tap_all); |