aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorPablo Neira Ayuso <pablo@netfilter.org>2012-09-03 09:28:30 -0400
committerPablo Neira Ayuso <pablo@netfilter.org>2012-09-03 09:34:51 -0400
commitace1fe1231bdfffd60b5e703aa5b7283fbf98dbd (patch)
tree06c7492a8f3cc65f916768616ca24c6bc7171761 /net/core
parentce9f3f31efb88841e4df98794b13dbac8c4901da (diff)
parenta2dc375e12334b3d8f787a48b2fb6172ccfb80ae (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
This merges (3f509c6 netfilter: nf_nat_sip: fix incorrect handling of EBUSY for RTCP expectation) to Patrick McHardy's IPv6 NAT changes.
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c24
-rw-r--r--net/core/fib_rules.c3
-rw-r--r--net/core/link_watch.c8
-rw-r--r--net/core/netpoll.c13
-rw-r--r--net/core/request_sock.c95
-rw-r--r--net/core/scm.c31
-rw-r--r--net/core/sock.c12
7 files changed, 152 insertions, 34 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 0640d2a859c6..b1e6d6385516 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1466,8 +1466,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1466 1466
1467int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1467int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1468{ 1468{
1469 if (val != NETDEV_UNREGISTER_FINAL) 1469 ASSERT_RTNL();
1470 ASSERT_RTNL();
1471 return raw_notifier_call_chain(&netdev_chain, val, dev); 1470 return raw_notifier_call_chain(&netdev_chain, val, dev);
1472} 1471}
1473EXPORT_SYMBOL(call_netdevice_notifiers); 1472EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -2185,9 +2184,7 @@ EXPORT_SYMBOL(netif_skb_features);
2185/* 2184/*
2186 * Returns true if either: 2185 * Returns true if either:
2187 * 1. skb has frag_list and the device doesn't support FRAGLIST, or 2186 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2188 * 2. skb is fragmented and the device does not support SG, or if 2187 * 2. skb is fragmented and the device does not support SG.
2189 * at least one of fragments is in highmem and device does not
2190 * support DMA from it.
2191 */ 2188 */
2192static inline int skb_needs_linearize(struct sk_buff *skb, 2189static inline int skb_needs_linearize(struct sk_buff *skb,
2193 int features) 2190 int features)
@@ -4521,8 +4518,8 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
4521static int __dev_set_promiscuity(struct net_device *dev, int inc) 4518static int __dev_set_promiscuity(struct net_device *dev, int inc)
4522{ 4519{
4523 unsigned int old_flags = dev->flags; 4520 unsigned int old_flags = dev->flags;
4524 uid_t uid; 4521 kuid_t uid;
4525 gid_t gid; 4522 kgid_t gid;
4526 4523
4527 ASSERT_RTNL(); 4524 ASSERT_RTNL();
4528 4525
@@ -4554,7 +4551,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
4554 dev->name, (dev->flags & IFF_PROMISC), 4551 dev->name, (dev->flags & IFF_PROMISC),
4555 (old_flags & IFF_PROMISC), 4552 (old_flags & IFF_PROMISC),
4556 audit_get_loginuid(current), 4553 audit_get_loginuid(current),
4557 uid, gid, 4554 from_kuid(&init_user_ns, uid),
4555 from_kgid(&init_user_ns, gid),
4558 audit_get_sessionid(current)); 4556 audit_get_sessionid(current));
4559 } 4557 }
4560 4558
@@ -5649,6 +5647,8 @@ int register_netdevice(struct net_device *dev)
5649 5647
5650 set_bit(__LINK_STATE_PRESENT, &dev->state); 5648 set_bit(__LINK_STATE_PRESENT, &dev->state);
5651 5649
5650 linkwatch_init_dev(dev);
5651
5652 dev_init_scheduler(dev); 5652 dev_init_scheduler(dev);
5653 dev_hold(dev); 5653 dev_hold(dev);
5654 list_netdevice(dev); 5654 list_netdevice(dev);
@@ -5782,7 +5782,11 @@ static void netdev_wait_allrefs(struct net_device *dev)
5782 5782
5783 /* Rebroadcast unregister notification */ 5783 /* Rebroadcast unregister notification */
5784 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5784 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5785
5786 __rtnl_unlock();
5785 rcu_barrier(); 5787 rcu_barrier();
5788 rtnl_lock();
5789
5786 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 5790 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
5787 if (test_bit(__LINK_STATE_LINKWATCH_PENDING, 5791 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5788 &dev->state)) { 5792 &dev->state)) {
@@ -5855,7 +5859,9 @@ void netdev_run_todo(void)
5855 = list_first_entry(&list, struct net_device, todo_list); 5859 = list_first_entry(&list, struct net_device, todo_list);
5856 list_del(&dev->todo_list); 5860 list_del(&dev->todo_list);
5857 5861
5862 rtnl_lock();
5858 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); 5863 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
5864 __rtnl_unlock();
5859 5865
5860 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { 5866 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5861 pr_err("network todo '%s' but state %d\n", 5867 pr_err("network todo '%s' but state %d\n",
@@ -6251,6 +6257,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6251 the device is just moving and can keep their slaves up. 6257 the device is just moving and can keep their slaves up.
6252 */ 6258 */
6253 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 6259 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6260 rcu_barrier();
6261 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6254 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); 6262 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
6255 6263
6256 /* 6264 /*
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 585093755c23..ab7db83236c9 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -711,16 +711,15 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
711 struct net *net = dev_net(dev); 711 struct net *net = dev_net(dev);
712 struct fib_rules_ops *ops; 712 struct fib_rules_ops *ops;
713 713
714 ASSERT_RTNL();
714 715
715 switch (event) { 716 switch (event) {
716 case NETDEV_REGISTER: 717 case NETDEV_REGISTER:
717 ASSERT_RTNL();
718 list_for_each_entry(ops, &net->rules_ops, list) 718 list_for_each_entry(ops, &net->rules_ops, list)
719 attach_rules(&ops->rules_list, dev); 719 attach_rules(&ops->rules_list, dev);
720 break; 720 break;
721 721
722 case NETDEV_UNREGISTER: 722 case NETDEV_UNREGISTER:
723 ASSERT_RTNL();
724 list_for_each_entry(ops, &net->rules_ops, list) 723 list_for_each_entry(ops, &net->rules_ops, list)
725 detach_rules(&ops->rules_list, dev); 724 detach_rules(&ops->rules_list, dev);
726 break; 725 break;
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index c3519c6d1b16..a01922219a23 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -76,6 +76,14 @@ static void rfc2863_policy(struct net_device *dev)
76} 76}
77 77
78 78
79void linkwatch_init_dev(struct net_device *dev)
80{
81 /* Handle pre-registration link state changes */
82 if (!netif_carrier_ok(dev) || netif_dormant(dev))
83 rfc2863_policy(dev);
84}
85
86
79static bool linkwatch_urgent_event(struct net_device *dev) 87static bool linkwatch_urgent_event(struct net_device *dev)
80{ 88{
81 if (!netif_running(dev)) 89 if (!netif_running(dev))
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 346b1eb83a1f..dd67818025d1 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -168,24 +168,16 @@ static void poll_napi(struct net_device *dev)
168 struct napi_struct *napi; 168 struct napi_struct *napi;
169 int budget = 16; 169 int budget = 16;
170 170
171 WARN_ON_ONCE(!irqs_disabled());
172
173 list_for_each_entry(napi, &dev->napi_list, dev_list) { 171 list_for_each_entry(napi, &dev->napi_list, dev_list) {
174 local_irq_enable();
175 if (napi->poll_owner != smp_processor_id() && 172 if (napi->poll_owner != smp_processor_id() &&
176 spin_trylock(&napi->poll_lock)) { 173 spin_trylock(&napi->poll_lock)) {
177 rcu_read_lock_bh();
178 budget = poll_one_napi(rcu_dereference_bh(dev->npinfo), 174 budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
179 napi, budget); 175 napi, budget);
180 rcu_read_unlock_bh();
181 spin_unlock(&napi->poll_lock); 176 spin_unlock(&napi->poll_lock);
182 177
183 if (!budget) { 178 if (!budget)
184 local_irq_disable();
185 break; 179 break;
186 }
187 } 180 }
188 local_irq_disable();
189 } 181 }
190} 182}
191 183
@@ -388,6 +380,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
388 struct udphdr *udph; 380 struct udphdr *udph;
389 struct iphdr *iph; 381 struct iphdr *iph;
390 struct ethhdr *eth; 382 struct ethhdr *eth;
383 static atomic_t ip_ident;
391 384
392 udp_len = len + sizeof(*udph); 385 udp_len = len + sizeof(*udph);
393 ip_len = udp_len + sizeof(*iph); 386 ip_len = udp_len + sizeof(*iph);
@@ -423,7 +416,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
423 put_unaligned(0x45, (unsigned char *)iph); 416 put_unaligned(0x45, (unsigned char *)iph);
424 iph->tos = 0; 417 iph->tos = 0;
425 put_unaligned(htons(ip_len), &(iph->tot_len)); 418 put_unaligned(htons(ip_len), &(iph->tot_len));
426 iph->id = 0; 419 iph->id = htons(atomic_inc_return(&ip_ident));
427 iph->frag_off = 0; 420 iph->frag_off = 0;
428 iph->ttl = 64; 421 iph->ttl = 64;
429 iph->protocol = IPPROTO_UDP; 422 iph->protocol = IPPROTO_UDP;
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 9b570a6a33c5..c31d9e8668c3 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -15,6 +15,7 @@
15#include <linux/random.h> 15#include <linux/random.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/string.h> 17#include <linux/string.h>
18#include <linux/tcp.h>
18#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
19 20
20#include <net/request_sock.h> 21#include <net/request_sock.h>
@@ -130,3 +131,97 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
130 kfree(lopt); 131 kfree(lopt);
131} 132}
132 133
134/*
135 * This function is called to set a Fast Open socket's "fastopen_rsk" field
136 * to NULL when a TFO socket no longer needs to access the request_sock.
137 * This happens only after 3WHS has been either completed or aborted (e.g.,
138 * RST is received).
139 *
140 * Before TFO, a child socket is created only after 3WHS is completed,
141 * hence it never needs to access the request_sock. things get a lot more
142 * complex with TFO. A child socket, accepted or not, has to access its
143 * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts,
144 * until 3WHS is either completed or aborted. Afterwards the req will stay
145 * until either the child socket is accepted, or in the rare case when the
146 * listener is closed before the child is accepted.
147 *
148 * In short, a request socket is only freed after BOTH 3WHS has completed
149 * (or aborted) and the child socket has been accepted (or listener closed).
150 * When a child socket is accepted, its corresponding req->sk is set to
151 * NULL since it's no longer needed. More importantly, "req->sk == NULL"
152 * will be used by the code below to determine if a child socket has been
153 * accepted or not, and the check is protected by the fastopenq->lock
154 * described below.
155 *
156 * Note that fastopen_rsk is only accessed from the child socket's context
157 * with its socket lock held. But a request_sock (req) can be accessed by
158 * both its child socket through fastopen_rsk, and a listener socket through
159 * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin
160 * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created.
161 * only in the rare case when both the listener and the child locks are held,
162 * e.g., in inet_csk_listen_stop() do we not need to acquire the lock.
163 * The lock also protects other fields such as fastopenq->qlen, which is
164 * decremented by this function when fastopen_rsk is no longer needed.
165 *
166 * Note that another solution was to simply use the existing socket lock
167 * from the listener. But first socket lock is difficult to use. It is not
168 * a simple spin lock - one must consider sock_owned_by_user() and arrange
169 * to use sk_add_backlog() stuff. But what really makes it infeasible is the
170 * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to
171 * acquire a child's lock while holding listener's socket lock. A corner
172 * case might also exist in tcp_v4_hnd_req() that will trigger this locking
173 * order.
174 *
175 * When a TFO req is created, it needs to sock_hold its listener to prevent
176 * the latter data structure from going away.
177 *
178 * This function also sets "treq->listener" to NULL and unreference listener
179 * socket. treq->listener is used by the listener so it is protected by the
180 * fastopenq->lock in this function.
181 */
182void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
183 bool reset)
184{
185 struct sock *lsk = tcp_rsk(req)->listener;
186 struct fastopen_queue *fastopenq =
187 inet_csk(lsk)->icsk_accept_queue.fastopenq;
188
189 BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk));
190
191 tcp_sk(sk)->fastopen_rsk = NULL;
192 spin_lock_bh(&fastopenq->lock);
193 fastopenq->qlen--;
194 tcp_rsk(req)->listener = NULL;
195 if (req->sk) /* the child socket hasn't been accepted yet */
196 goto out;
197
198 if (!reset || lsk->sk_state != TCP_LISTEN) {
199 /* If the listener has been closed don't bother with the
200 * special RST handling below.
201 */
202 spin_unlock_bh(&fastopenq->lock);
203 sock_put(lsk);
204 reqsk_free(req);
205 return;
206 }
207 /* Wait for 60secs before removing a req that has triggered RST.
208 * This is a simple defense against TFO spoofing attack - by
209 * counting the req against fastopen.max_qlen, and disabling
210 * TFO when the qlen exceeds max_qlen.
211 *
212 * For more details see CoNext'11 "TCP Fast Open" paper.
213 */
214 req->expires = jiffies + 60*HZ;
215 if (fastopenq->rskq_rst_head == NULL)
216 fastopenq->rskq_rst_head = req;
217 else
218 fastopenq->rskq_rst_tail->dl_next = req;
219
220 req->dl_next = NULL;
221 fastopenq->rskq_rst_tail = req;
222 fastopenq->qlen++;
223out:
224 spin_unlock_bh(&fastopenq->lock);
225 sock_put(lsk);
226 return;
227}
diff --git a/net/core/scm.c b/net/core/scm.c
index 040cebeed45b..6ab491d6c26f 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -45,12 +45,17 @@
45static __inline__ int scm_check_creds(struct ucred *creds) 45static __inline__ int scm_check_creds(struct ucred *creds)
46{ 46{
47 const struct cred *cred = current_cred(); 47 const struct cred *cred = current_cred();
48 kuid_t uid = make_kuid(cred->user_ns, creds->uid);
49 kgid_t gid = make_kgid(cred->user_ns, creds->gid);
50
51 if (!uid_valid(uid) || !gid_valid(gid))
52 return -EINVAL;
48 53
49 if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && 54 if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
50 ((creds->uid == cred->uid || creds->uid == cred->euid || 55 ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
51 creds->uid == cred->suid) || capable(CAP_SETUID)) && 56 uid_eq(uid, cred->suid)) || capable(CAP_SETUID)) &&
52 ((creds->gid == cred->gid || creds->gid == cred->egid || 57 ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
53 creds->gid == cred->sgid) || capable(CAP_SETGID))) { 58 gid_eq(gid, cred->sgid)) || capable(CAP_SETGID))) {
54 return 0; 59 return 0;
55 } 60 }
56 return -EPERM; 61 return -EPERM;
@@ -149,6 +154,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
149 goto error; 154 goto error;
150 break; 155 break;
151 case SCM_CREDENTIALS: 156 case SCM_CREDENTIALS:
157 {
158 kuid_t uid;
159 kgid_t gid;
152 if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred))) 160 if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
153 goto error; 161 goto error;
154 memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred)); 162 memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
@@ -166,22 +174,29 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
166 p->pid = pid; 174 p->pid = pid;
167 } 175 }
168 176
177 err = -EINVAL;
178 uid = make_kuid(current_user_ns(), p->creds.uid);
179 gid = make_kgid(current_user_ns(), p->creds.gid);
180 if (!uid_valid(uid) || !gid_valid(gid))
181 goto error;
182
169 if (!p->cred || 183 if (!p->cred ||
170 (p->cred->euid != p->creds.uid) || 184 !uid_eq(p->cred->euid, uid) ||
171 (p->cred->egid != p->creds.gid)) { 185 !gid_eq(p->cred->egid, gid)) {
172 struct cred *cred; 186 struct cred *cred;
173 err = -ENOMEM; 187 err = -ENOMEM;
174 cred = prepare_creds(); 188 cred = prepare_creds();
175 if (!cred) 189 if (!cred)
176 goto error; 190 goto error;
177 191
178 cred->uid = cred->euid = p->creds.uid; 192 cred->uid = cred->euid = uid;
179 cred->gid = cred->egid = p->creds.gid; 193 cred->gid = cred->egid = gid;
180 if (p->cred) 194 if (p->cred)
181 put_cred(p->cred); 195 put_cred(p->cred);
182 p->cred = cred; 196 p->cred = cred;
183 } 197 }
184 break; 198 break;
199 }
185 default: 200 default:
186 goto error; 201 goto error;
187 } 202 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 8f67ced8d6a8..d765156eab65 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -868,8 +868,8 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred,
868 if (cred) { 868 if (cred) {
869 struct user_namespace *current_ns = current_user_ns(); 869 struct user_namespace *current_ns = current_user_ns();
870 870
871 ucred->uid = from_kuid(current_ns, cred->euid); 871 ucred->uid = from_kuid_munged(current_ns, cred->euid);
872 ucred->gid = from_kgid(current_ns, cred->egid); 872 ucred->gid = from_kgid_munged(current_ns, cred->egid);
873 } 873 }
874} 874}
875EXPORT_SYMBOL_GPL(cred_to_ucred); 875EXPORT_SYMBOL_GPL(cred_to_ucred);
@@ -1230,7 +1230,7 @@ void sock_update_classid(struct sock *sk)
1230 rcu_read_lock(); /* doing current task, which cannot vanish. */ 1230 rcu_read_lock(); /* doing current task, which cannot vanish. */
1231 classid = task_cls_classid(current); 1231 classid = task_cls_classid(current);
1232 rcu_read_unlock(); 1232 rcu_read_unlock();
1233 if (classid && classid != sk->sk_classid) 1233 if (classid != sk->sk_classid)
1234 sk->sk_classid = classid; 1234 sk->sk_classid = classid;
1235} 1235}
1236EXPORT_SYMBOL(sock_update_classid); 1236EXPORT_SYMBOL(sock_update_classid);
@@ -1527,12 +1527,12 @@ void sock_edemux(struct sk_buff *skb)
1527} 1527}
1528EXPORT_SYMBOL(sock_edemux); 1528EXPORT_SYMBOL(sock_edemux);
1529 1529
1530int sock_i_uid(struct sock *sk) 1530kuid_t sock_i_uid(struct sock *sk)
1531{ 1531{
1532 int uid; 1532 kuid_t uid;
1533 1533
1534 read_lock_bh(&sk->sk_callback_lock); 1534 read_lock_bh(&sk->sk_callback_lock);
1535 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; 1535 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1536 read_unlock_bh(&sk->sk_callback_lock); 1536 read_unlock_bh(&sk->sk_callback_lock);
1537 return uid; 1537 return uid;
1538} 1538}