2 files changed, 22 insertions, 8 deletions
|
|
@@ -421,6 +421,16 @@ static int kernfs_file_mmap(struct file *file, struct vm_area_struct *vma) |
421 | const struct kernfs_ops *ops; |
421 | const struct kernfs_ops *ops; |
422 | int rc; |
422 | int rc; |
423 | |
423 | |
| |
424 | /* |
| |
425 | * mmap path and of->mutex are prone to triggering spurious lockdep |
| |
426 | * warnings and we don't want to add spurious locking dependency |
| |
427 | * between the two. Check whether mmap is actually implemented |
| |
428 | * without grabbing @of->mutex by testing HAS_MMAP flag. See the |
| |
429 | * comment in kernfs_file_open() for more details. |
| |
430 | */ |
| |
431 | if (!(of->sd->s_flags & SYSFS_FLAG_HAS_MMAP)) |
| |
432 | return -ENODEV; |
| |
433 | |
424 | mutex_lock(&of->mutex); |
434 | mutex_lock(&of->mutex); |
425 | |
435 | |
426 | rc = -ENODEV; |
436 | rc = -ENODEV; |
@@ -428,10 +438,7 @@ static int kernfs_file_mmap(struct file *file, struct vm_area_struct *vma) |
428 | goto out_unlock; |
438 | goto out_unlock; |
429 | |
439 | |
430 | ops = kernfs_ops(of->sd); |
440 | ops = kernfs_ops(of->sd); |
431 | if (ops->mmap) |
441 | rc = ops->mmap(of, vma); |
432 | rc = ops->mmap(of, vma); |
| |
433 | if (rc) |
| |
434 | goto out_put; |
| |
435 | |
442 | |
436 | /* |
443 | /* |
437 | * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() |
444 | * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() |
@@ -596,6 +603,9 @@ static int kernfs_file_open(struct inode *inode, struct file *file) |
596 | * happen on the same file. At this point, we can't easily give |
603 | * happen on the same file. At this point, we can't easily give |
597 | * each file a separate locking class. Let's differentiate on |
604 | * each file a separate locking class. Let's differentiate on |
598 | * whether the file has mmap or not for now. |
605 | * whether the file has mmap or not for now. |
| |
606 | * |
| |
607 | * Both paths of the branch look the same. They're supposed to |
| |
608 | * look that way and give @of->mutex different static lockdep keys. |
599 | */ |
609 | */ |
600 | if (has_mmap) |
610 | if (has_mmap) |
601 | mutex_init(&of->mutex); |
611 | mutex_init(&of->mutex); |
|
|
|
@@ -142,9 +142,6 @@ static int sysfs_kf_bin_mmap(struct sysfs_open_file *of, |
142 | struct bin_attribute *battr = of->sd->priv; |
142 | struct bin_attribute *battr = of->sd->priv; |
143 | struct kobject *kobj = of->sd->s_parent->priv; |
143 | struct kobject *kobj = of->sd->s_parent->priv; |
144 | |
144 | |
145 | if (!battr->mmap) |
| |
146 | return -ENODEV; |
| |
147 | |
| |
148 | return battr->mmap(of->file, kobj, battr, vma); |
145 | return battr->mmap(of->file, kobj, battr, vma); |
149 | } |
146 | } |
150 | |
147 | |
@@ -197,6 +194,11 @@ static const struct kernfs_ops sysfs_bin_kfops_wo = { |
197 | static const struct kernfs_ops sysfs_bin_kfops_rw = { |
194 | static const struct kernfs_ops sysfs_bin_kfops_rw = { |
198 | .read = sysfs_kf_bin_read, |
195 | .read = sysfs_kf_bin_read, |
199 | .write = sysfs_kf_bin_write, |
196 | .write = sysfs_kf_bin_write, |
| |
197 | }; |
| |
198 | |
| |
199 | static const struct kernfs_ops sysfs_bin_kfops_mmap = { |
| |
200 | .read = sysfs_kf_bin_read, |
| |
201 | .write = sysfs_kf_bin_write, |
200 | .mmap = sysfs_kf_bin_mmap, |
202 | .mmap = sysfs_kf_bin_mmap, |
201 | }; |
203 | }; |
202 | |
204 | |
@@ -232,7 +234,9 @@ int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, |
232 | } else { |
234 | } else { |
233 | struct bin_attribute *battr = (void *)attr; |
235 | struct bin_attribute *battr = (void *)attr; |
234 | |
236 | |
235 | if ((battr->read && battr->write) || battr->mmap) |
237 | if (battr->mmap) |
| |
238 | ops = &sysfs_bin_kfops_mmap; |
| |
239 | else if (battr->read && battr->write) |
236 | ops = &sysfs_bin_kfops_rw; |
240 | ops = &sysfs_bin_kfops_rw; |
237 | else if (battr->read) |
241 | else if (battr->read) |
238 | ops = &sysfs_bin_kfops_ro; |
242 | ops = &sysfs_bin_kfops_ro; |
|
id='n933' href='#n933'>933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
/*
* UDP over IPv6
* Linux INET6 implementation
*
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
* Based on linux/ipv4/udp.c
*
* Fixes:
* Hideaki YOSHIFUJI : sin6_scope_id support
* YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
* Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
* a single port at the same time.
* Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data
* YOSHIFUJI Hideaki @USAGI: convert /proc/net/udp6 to seq_file.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <asm/uaccess.h>
#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/raw.h>
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include "udp_impl.h"
int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
__be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr;
__be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
int sk_ipv6only = ipv6_only_sock(sk);
int sk2_ipv6only = inet_v6_ipv6only(sk2);
int addr_type = ipv6_addr_type(sk_rcv_saddr6);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
/* if both are mapped, treat as IPv4 */
if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
return (!sk2_ipv6only &&
(!sk1_rcv_saddr || !sk2_rcv_saddr ||
sk1_rcv_saddr == sk2_rcv_saddr));
if (addr_type2 == IPV6_ADDR_ANY &&
!(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
return 1;
if (addr_type == IPV6_ADDR_ANY &&
!(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
return 1;
if (sk2_rcv_saddr6 &&
ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
return 1;
return 0;
}
static unsigned int udp6_portaddr_hash(struct net *net,
const struct in6_addr *addr6,
unsigned int port)
{
unsigned int hash, mix = net_hash_mix(net);
if (ipv6_addr_any(addr6))
hash = jhash_1word(0, mix);
else if (ipv6_addr_v4mapped(addr6))
hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
else
hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
return hash ^ port;
}
int udp_v6_get_port(struct sock *sk, unsigned short snum)
{
unsigned int hash2_nulladdr =
udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
unsigned int hash2_partial =
udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
}
static inline int compute_score(struct sock *sk, struct net *net,
unsigned short hnum,
struct in6_addr *saddr, __be16 sport,
struct in6_addr *daddr, __be16 dport,
int dif)
{
int score = -1;
if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
sk->sk_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
score = 0;
if (inet->inet_dport) {
if (inet->inet_dport != sport)
return -1;
score++;
}
if (!ipv6_addr_any(&np->rcv_saddr)) {
if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
return -1;
score++;
}
if (!ipv6_addr_any(&np->daddr)) {
if (!ipv6_addr_equal(&np->daddr, saddr))
return -1;
score++;
}
if (sk->sk_bound_dev_if) {
if (sk->sk_bound_dev_if != dif)
return -1;
score++;
}
}
return score;
}
#define SCORE2_MAX (1 + 1 + 1)
static inline int compute_score2(struct sock *sk, struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned short hnum,
int dif)
{
int score = -1;
if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
sk->sk_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
return -1;
score = 0;
if (inet->inet_dport) {
if (inet->inet_dport != sport)
return -1;
score++;
}
if (!ipv6_addr_any(&np->daddr)) {
if (!ipv6_addr_equal(&np->daddr, saddr))
return -1;
score++;
}
if (sk->sk_bound_dev_if) {
if (sk->sk_bound_dev_if != dif)
return -1;
score++;
}
}
return score;
}
/* called with read_rcu_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned int hnum, int dif,
struct udp_hslot *hslot2, unsigned int slot2)
{
struct sock *sk, *result;
struct hlist_nulls_node *node;
int score, badness;
begin:
result = NULL;
badness = -1;
udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
score = compute_score2(sk, net, saddr, sport,
daddr, hnum, dif);
if (score > badness) {
result = sk;
badness = score;
if (score == SCORE2_MAX)
goto exact_match;
}
}
/*
* if the nulls value we got at the end of this lookup is
* not the expected one, we must restart lookup.
* We probably met an item that was moved to another chain.
*/
if (get_nulls_value(node) != slot2)
goto begin;
if (result) {
exact_match:
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
result = NULL;
else if (unlikely(compute_score2(result, net, saddr, sport,
daddr, hnum, dif) < badness)) {
sock_put(result);
goto begin;
}
}
return result;
}
static struct sock *__udp6_lib_lookup(struct net *net,
struct in6_addr *saddr, __be16 sport,
struct in6_addr *daddr, __be16 dport,
int dif, struct udp_table *udptable)
{
struct sock *sk, *result;
struct hlist_nulls_node *node;
unsigned short hnum = ntohs(dport);
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
int score, badness;
rcu_read_lock();
if (hslot->count > 10) {
hash2 = udp6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
goto begin;
result = udp6_lib_lookup2(net, saddr, sport,
daddr, hnum, dif,
hslot2, slot2);
if (!result) {
hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
goto begin;
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif,
hslot2, slot2);
}
rcu_read_unlock();
return result;
}
begin:
result = NULL;
badness = -1;
sk_nulls_for_each_rcu(sk, node, &hslot->head) {
score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
if (score > badness) {
result = sk;
badness = score;
}
}
/*
* if the nulls value we got at the end of this lookup is
* not the expected one, we must restart lookup.
* We probably met an item that was moved to another chain.
*/
if (get_nulls_value(node) != slot)
goto begin;
if (result) {
if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
result = NULL;
else if (unlikely(compute_score(result, net, hnum, saddr, sport,
daddr, dport, dif) < badness)) {
sock_put(result);
goto begin;
}
}
rcu_read_unlock();
return result;
}
static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport,
struct udp_table *udptable)
{
struct sock *sk;
struct ipv6hdr *iph = ipv6_hdr(skb);
if (unlikely(sk = skb_steal_sock(skb)))
return sk;
return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport,
&iph->daddr, dport, inet6_iif(skb),
udptable);
}
/*
* This should be easy, if there is something there we
* return it, otherwise we block.
*/
int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len,
int noblock, int flags, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
unsigned int ulen;
int peeked;
int err;
int is_udplite = IS_UDPLITE(sk);
int is_udp4;
if (addr_len)
*addr_len=sizeof(struct sockaddr_in6);
if (flags & MSG_ERRQUEUE)
return ipv6_recv_error(sk, msg, len);
if (np->rxpmtu && np->rxopt.bits.rxpmtu)
return ipv6_recv_rxpmtu(sk, msg, len);
try_again:
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
&peeked, &err);
if (!skb)
goto out;
ulen = skb->len - sizeof(struct udphdr);
if (len > ulen)
len = ulen;
else if (len < ulen)
msg->msg_flags |= MSG_TRUNC;
is_udp4 = (skb->protocol == htons(ETH_P_IP));
/*
* If checksum is needed at all, try to do it while copying the
* data. If the data is truncated, or if we only want a partial
* coverage checksum (UDP-Lite), do it before the copy.
*/
if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
if (udp_lib_checksum_complete(skb))
goto csum_copy_err;
}
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
msg->msg_iov,len);
else {
err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
if (err == -EINVAL)
goto csum_copy_err;
}
if (err)
goto out_free;
if (!peeked) {
if (is_udp4)
UDP_INC_STATS_USER(sock_net(sk),
UDP_MIB_INDATAGRAMS, is_udplite);
else
UDP6_INC_STATS_USER(sock_net(sk),
UDP_MIB_INDATAGRAMS, is_udplite);
}
sock_recv_ts_and_drops(msg, sk, skb);
/* Copy the address. */
if (msg->msg_name) {
struct sockaddr_in6 *sin6;
sin6 = (struct sockaddr_in6 *) msg->msg_name;
sin6->sin6_family = AF_INET6;
sin6->sin6_port = udp_hdr(skb)->source;
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = 0;
if (is_udp4)
ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
&sin6->sin6_addr);
else {
ipv6_addr_copy(&sin6->sin6_addr,
&ipv6_hdr(skb)->saddr);
if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
sin6->sin6_scope_id = IP6CB(skb)->iif;
}
}
if (is_udp4) {
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
} else {
if (np->rxopt.all)
datagram_recv_ctl(sk, msg, skb);
}
err = len;
if (flags & MSG_TRUNC)
err = ulen;
out_free:
skb_free_datagram_locked(sk, skb);
out:
return err;
csum_copy_err:
lock_sock(sk);
if (!skb_kill_datagram(sk, skb, flags)) {
if (is_udp4)
UDP_INC_STATS_USER(sock_net(sk),
UDP_MIB_INERRORS, is_udplite);
else
UDP6_INC_STATS_USER(sock_net(sk),
UDP_MIB_INERRORS, is_udplite);
}
release_sock(sk);
if (flags & MSG_DONTWAIT)
return -EAGAIN;
goto try_again;
}
void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info,
struct udp_table *udptable)
{
struct ipv6_pinfo *np;
struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
struct in6_addr *saddr = &hdr->saddr;
struct in6_addr *daddr = &hdr->daddr;
struct udphdr *uh = (struct udphdr*)(skb->data+offset);
struct sock *sk;
int err;
sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest,
saddr, uh->source, inet6_iif(skb), udptable);
if (sk == NULL)
return;
np = inet6_sk(sk);
if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
goto out;
if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
goto out;
if (np->recverr)
ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
sk->sk_err = err;
sk->sk_error_report(sk);
out:
sock_put(sk);
}
static __inline__ void udpv6_err(struct sk_buff *skb,
struct inet6_skb_parm *opt, u8 type,
u8 code, int offset, __be32 info )
{
__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
}
int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
int rc;
int is_udplite = IS_UDPLITE(sk);
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto drop;
/*
* UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
*/
if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
if (up->pcrlen == 0) { /* full coverage was set */
LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage"
" %d while full coverage %d requested\n",
UDP_SKB_CB(skb)->cscov, skb->len);
goto drop;
}
if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d "
"too small, need min %d\n",
UDP_SKB_CB(skb)->cscov, up->pcrlen);
goto drop;
}
}
if (sk->sk_filter) {
if (udp_lib_checksum_complete(skb))
goto drop;
}
if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
UDP6_INC_STATS_BH(sock_net(sk),
UDP_MIB_RCVBUFERRORS, is_udplite);
goto drop_no_sk_drops_inc;
}
return 0;
drop:
atomic_inc(&sk->sk_drops);
drop_no_sk_drops_inc:
UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
return -1;
}
static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
__be16 loc_port, struct in6_addr *loc_addr,
__be16 rmt_port, struct in6_addr *rmt_addr,
int dif)
{
struct hlist_nulls_node *node;
struct sock *s = sk;
unsigned short num = ntohs(loc_port);
sk_nulls_for_each_from(s, node) {
struct inet_sock *inet = inet_sk(s);
if (!net_eq(sock_net(s), net))
continue;
if (udp_sk(s)->udp_port_hash == num &&
s->sk_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(s);
if (inet->inet_dport) {
if (inet->inet_dport != rmt_port)
continue;
}
if (!ipv6_addr_any(&np->daddr) &&
!ipv6_addr_equal(&np->daddr, rmt_addr))
continue;
if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
continue;
if (!ipv6_addr_any(&np->rcv_saddr)) {
if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
continue;
}
if (!inet6_mc_check(s, loc_addr, rmt_addr))
continue;
return s;
}
}
return NULL;
}
static void flush_stack(struct sock **stack, unsigned int count,
struct sk_buff *skb, unsigned int final)
{
unsigned int i;
struct sock *sk;
struct sk_buff *skb1;
for (i = 0; i < count; i++) {
skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
sk = stack[i];
if (skb1) {
bh_lock_sock(sk);
if (!sock_owned_by_user(sk))
udpv6_queue_rcv_skb(sk, skb1);
else if (sk_add_backlog(sk, skb1)) {
kfree_skb(skb1);
bh_unlock_sock(sk);
goto drop;
}
bh_unlock_sock(sk);
continue;
}
drop:
atomic_inc(&sk->sk_drops);
UDP6_INC_STATS_BH(sock_net(sk),
UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
UDP6_INC_STATS_BH(sock_net(sk),
UDP_MIB_INERRORS, IS_UDPLITE(sk));
}
}
/*
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
*/
static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct in6_addr *saddr, struct in6_addr *daddr,
struct udp_table *udptable)
{
struct sock *sk, *stack[256 / sizeof(struct sock *)];
const struct udphdr *uh = udp_hdr(skb);
struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
int dif;
unsigned int i, count = 0;
spin_lock(&hslot->lock);
sk = sk_nulls_head(&hslot->head);
dif = inet6_iif(skb);
sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
while (sk) {
stack[count++] = sk;
sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
uh->source, saddr, dif);
if (unlikely(count == ARRAY_SIZE(stack))) {
if (!sk)
break;
flush_stack(stack, count, skb, ~0);
count = 0;
}
}
/*
* before releasing the lock, we must take reference on sockets
*/
for (i = 0; i < count; i++)
sock_hold(stack[i]);
spin_unlock(&hslot->lock);
if (count) {
flush_stack(stack, count, skb, count - 1);
for (i = 0; i < count; i++)
sock_put(stack[i]);
} else {
kfree_skb(skb);
}
return 0;
}
static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
int proto)
{
int err;
UDP_SKB_CB(skb)->partial_cov = 0;
UDP_SKB_CB(skb)->cscov = skb->len;
if (proto == IPPROTO_UDPLITE) {
err = udplite_checksum_init(skb, uh);
if (err)
return err;
}
if (uh->check == 0) {
/* RFC 2460 section 8.1 says that we SHOULD log
this error. Well, it is reasonable.
*/
LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
return 1;
}
if (skb->ip_summed == CHECKSUM_COMPLETE &&
!csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
skb->len, proto, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (!skb_csum_unnecessary(skb))
skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr,
skb->len, proto, 0));
return 0;
}
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
struct net *net = dev_net(skb->dev);
struct sock *sk;
struct udphdr *uh;
struct in6_addr *saddr, *daddr;
u32 ulen = 0;
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto short_packet;
saddr = &ipv6_hdr(skb)->saddr;
daddr = &ipv6_hdr(skb)->daddr;
uh = udp_hdr(skb);
ulen = ntohs(uh->len);
if (ulen > skb->len)
goto short_packet;
if (proto == IPPROTO_UDP) {
/* UDP validates ulen. */
/* Check for jumbo payload */
if (ulen == 0)
ulen = skb->len;
if (ulen < sizeof(*uh))
goto short_packet;
if (ulen < skb->len) {
if (pskb_trim_rcsum(skb, ulen))
goto short_packet;
saddr = &ipv6_hdr(skb)->saddr;
daddr = &ipv6_hdr(skb)->daddr;
uh = udp_hdr(skb);
}
}
if (udp6_csum_init(skb, uh, proto))
goto discard;
/*
* Multicast receive code
*/
if (ipv6_addr_is_multicast(daddr))
return __udp6_lib_mcast_deliver(net, skb,
saddr, daddr, udptable);
/* Unicast */
/*
* check socket cache ... must talk to Alan about his plans
* for sock caches... i'll skip this for now.
*/
sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk == NULL) {
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
if (udp_lib_checksum_complete(skb))
goto discard;
UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS,
proto == IPPROTO_UDPLITE);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
kfree_skb(skb);
return 0;
}
/* deliver */
bh_lock_sock(sk);
if (!sock_owned_by_user(sk))
udpv6_queue_rcv_skb(sk, skb);
else if (sk_add_backlog(sk, skb)) {
atomic_inc(&sk->sk_drops);
bh_unlock_sock(sk);
sock_put(sk);
goto discard;
}
bh_unlock_sock(sk);
sock_put(sk);
return 0;
short_packet:
LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n",
proto == IPPROTO_UDPLITE ? "-Lite" : "",
ulen, skb->len);
discard:
UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
return 0;
}
static __inline__ int udpv6_rcv(struct sk_buff *skb)
{
return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
}
/*
* Throw away all pending data and cancel the corking. Socket is locked.
*/
static void udp_v6_flush_pending_frames(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
if (up->pending == AF_INET)
udp_flush_pending_frames(sk);
else if (up->pending) {
up->len = 0;
up->pending = 0;
ip6_flush_pending_frames(sk);
}
}
/**
* udp6_hwcsum_outgoing - handle outgoing HW checksumming
* @sk: socket we are sending on
* @skb: sk_buff containing the filled-in UDP header
* (checksum field must be zeroed out)
*/
static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
const struct in6_addr *saddr,
const struct in6_addr *daddr, int len)
{
unsigned int offset;
struct udphdr *uh = udp_hdr(skb);
__wsum csum = 0;
if (skb_queue_len(&sk->sk_write_queue) == 1) {
/* Only one fragment on the socket. */
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0);
} else {
/*
* HW-checksum won't work as there are two or more
* fragments on the socket so that all csums of sk_buffs
* should be together
*/
offset = skb_transport_offset(skb);
skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
skb->ip_summed = CHECKSUM_NONE;
skb_queue_walk(&sk->sk_write_queue, skb) {
csum = csum_add(csum, skb->csum);
}
uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP,
csum);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
}
}
/*
* Sending
*/
static int udp_v6_push_pending_frames(struct sock *sk)
{
struct sk_buff *skb;
struct udphdr *uh;
struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct flowi *fl = &inet->cork.fl;
int err = 0;
int is_udplite = IS_UDPLITE(sk);
__wsum csum = 0;
/* Grab the skbuff where UDP header space exists. */
if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
goto out;
/*
* Create a UDP header
*/
uh = udp_hdr(skb);
uh->source = fl->fl_ip_sport;
uh->dest = fl->fl_ip_dport;
uh->len = htons(up->len);
uh->check = 0;
if (is_udplite)
csum = udplite_csum_outgoing(sk, skb);
else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst,
up->len);
goto send;
} else
csum = udp_csum_outgoing(sk, skb);
/* add protocol-dependent pseudo-header */
uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst,
up->len, fl->proto, csum );
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
send:
err = ip6_push_pending_frames(sk);
if (err) {
if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
UDP6_INC_STATS_USER(sock_net(sk),
UDP_MIB_SNDBUFERRORS, is_udplite);
err = 0;
}
} else
UDP6_INC_STATS_USER(sock_net(sk),
UDP_MIB_OUTDATAGRAMS, is_udplite);
out:
up->len = 0;
up->pending = 0;
return err;
}
int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len)
{
struct ipv6_txoptions opt_space;
struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
struct in6_addr *daddr, *final_p = NULL, final;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct flowi fl;
struct dst_entry *dst;
int addr_len = msg->msg_namelen;
int ulen = len;
int hlimit = -1;
int tclass = -1;
int dontfrag = -1;
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
int err;
int connected = 0;
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
/* destination address check */
if (sin6) {
if (addr_len < offsetof(struct sockaddr, sa_data))
return -EINVAL;
switch (sin6->sin6_family) {
case AF_INET6:
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
daddr = &sin6->sin6_addr;
break;
case AF_INET:
goto do_udp_sendmsg;
case AF_UNSPEC:
msg->msg_name = sin6 = NULL;
msg->msg_namelen = addr_len = 0;
daddr = NULL;
break;
default:
return -EINVAL;
}
} else if (!up->pending) {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
daddr = &np->daddr;
} else
daddr = NULL;
if (daddr) {
if (ipv6_addr_v4mapped(daddr)) {
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;
sin.sin_addr.s_addr = daddr->s6_addr32[3];
msg->msg_name = &sin;
msg->msg_namelen = sizeof(sin);
do_udp_sendmsg:
if (__ipv6_only_sock(sk))
return -ENETUNREACH;
return udp_sendmsg(iocb, sk, msg, len);
}
}
if (up->pending == AF_INET)
return udp_sendmsg(iocb, sk, msg, len);
/* Rough check on arithmetic overflow,
better check is made in ip6_append_data().
*/
if (len > INT_MAX - sizeof(struct udphdr))
return -EMSGSIZE;
if (up->pending) {
/*
* There are pending frames.
* The socket lock must be held while it's corked.
*/
lock_sock(sk);
if (likely(up->pending)) {
if (unlikely(up->pending != AF_INET6)) {
release_sock(sk);
return -EAFNOSUPPORT;
}
dst = NULL;
goto do_append_data;
}
release_sock(sk);
}
ulen += sizeof(struct udphdr);
memset(&fl, 0, sizeof(fl));
if (sin6) {
if (sin6->sin6_port == 0)
return -EINVAL;
fl.fl_ip_dport = sin6->sin6_port;
daddr = &sin6->sin6_addr;
if (np->sndflow) {
fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
if (flowlabel == NULL)
return -EINVAL;
daddr = &flowlabel->dst;
}
}
/*
* Otherwise it will be difficult to maintain
* sk->sk_dst_cache.
*/
if (sk->sk_state == TCP_ESTABLISHED &&
ipv6_addr_equal(daddr, &np->daddr))
daddr = &np->daddr;
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
fl.oif = sin6->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
fl.fl_ip_dport = inet->inet_dport;
daddr = &np->daddr;
fl.fl6_flowlabel = np->flow_label;
connected = 1;
}
if (!fl.oif)
fl.oif = sk->sk_bound_dev_if;
if (!fl.oif)
fl.oif = np->sticky_pktinfo.ipi6_ifindex;
fl.mark = sk->sk_mark;
if (msg->msg_controllen) {
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(*opt);
err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit,
&tclass, &dontfrag);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
}
if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
if (flowlabel == NULL)
return -EINVAL;
}
if (!(opt->opt_nflen|opt->opt_flen))
opt = NULL;
connected = 0;
}
if (opt == NULL)
opt = np->opt;
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
fl.proto = sk->sk_protocol;
if (!ipv6_addr_any(daddr))
ipv6_addr_copy(&fl.fl6_dst, daddr);
else
fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
ipv6_addr_copy(&fl.fl6_src, &np->saddr);
fl.fl_ip_sport = inet->inet_sport;
/* merge ip6_build_xmit from ip6_output */
if (opt && opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
ipv6_addr_copy(&final, &fl.fl6_dst);
ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
final_p = &final;
connected = 0;
}
if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) {
fl.oif = np->mcast_oif;
connected = 0;
}
security_sk_classify_flow(sk, &fl);
err = ip6_sk_dst_lookup(sk, &dst, &fl);
if (err)
goto out;
if (final_p)
ipv6_addr_copy(&fl.fl6_dst, final_p);
err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
if (err < 0) {
if (err == -EREMOTE)
err = ip6_dst_blackhole(sk, &dst, &fl);
if (err < 0)
goto out;
}
if (hlimit < 0) {
if (ipv6_addr_is_multicast(&fl.fl6_dst))
hlimit = np->mcast_hops;
else
hlimit = np->hop_limit;
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
}
if (tclass < 0)
tclass = np->tclass;
if (dontfrag < 0)
dontfrag = np->dontfrag;
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
back_from_confirm:
lock_sock(sk);
if (unlikely(up->pending)) {
/* The socket is already corked while preparing it. */
/* ... which is an evident application bug. --ANK */
release_sock(sk);
LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
err = -EINVAL;
goto out;
}
up->pending = AF_INET6;
do_append_data:
up->len += ulen;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
sizeof(struct udphdr), hlimit, tclass, opt, &fl,
(struct rt6_info*)dst,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
if (err)
udp_v6_flush_pending_frames(sk);
else if (!corkreq)
err = udp_v6_push_pending_frames(sk);
else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
up->pending = 0;
if (dst) {
if (connected) {
ip6_dst_store(sk, dst,
ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
&np->daddr : NULL,
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_equal(&fl.fl6_src, &np->saddr) ?
&np->saddr :
#endif
NULL);
} else {
dst_release(dst);
}