aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-22 10:38:37 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-22 10:38:37 -0500
commitfcc9d2e5a6c89d22b8b773a64fb4ad21ac318446 (patch)
treea57612d1888735a2ec7972891b68c1ac5ec8faea /net
parent8dea78da5cee153b8af9c07a2745f6c55057fe12 (diff)
Added missing tegra files.HEADmaster
Diffstat (limited to 'net')
-rw-r--r--net/802/tr.c677
-rw-r--r--net/activity_stats.c115
-rw-r--r--net/batman-adv/aggregation.c293
-rw-r--r--net/batman-adv/aggregation.h46
-rw-r--r--net/batman-adv/bat_debugfs.c359
-rw-r--r--net/batman-adv/bat_debugfs.h33
-rw-r--r--net/batman-adv/bat_sysfs.c674
-rw-r--r--net/batman-adv/bat_sysfs.h44
-rw-r--r--net/core/kmap_skb.h19
-rw-r--r--net/dsa/mv88e6060.c288
-rw-r--r--net/dsa/mv88e6123_61_65.c447
-rw-r--r--net/dsa/mv88e6131.c443
-rw-r--r--net/dsa/mv88e6xxx.c522
-rw-r--r--net/dsa/mv88e6xxx.h95
-rw-r--r--net/econet/Kconfig36
-rw-r--r--net/econet/Makefile7
-rw-r--r--net/econet/af_econet.c1170
-rw-r--r--net/ethernet/pe2.c37
-rw-r--r--net/ipv4/netfilter/ip_queue.c637
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c516
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c98
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c110
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c127
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c85
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c779
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c137
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c451
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c99
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c125
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_dccp.c108
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c97
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c92
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c83
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udplite.c99
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c53
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c214
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c561
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c326
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c51
-rw-r--r--net/ipv4/sysfs_net_ipv4.c88
-rw-r--r--net/ipv6/netfilter/ip6_queue.c638
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c527
-rw-r--r--net/mac80211/driver-trace.c9
-rw-r--r--net/mac80211/driver-trace.h1492
-rw-r--r--net/mac80211/work.c1302
-rw-r--r--net/netfilter/nfnetlink_queue.c1028
-rw-r--r--net/netfilter/xt_NOTRACK.c53
-rw-r--r--net/netfilter/xt_qtaguid.c2785
-rw-r--r--net/netfilter/xt_qtaguid_internal.h330
-rw-r--r--net/netfilter/xt_qtaguid_print.c556
-rw-r--r--net/netfilter/xt_qtaguid_print.h120
-rw-r--r--net/netfilter/xt_quota2.c381
-rw-r--r--net/rds/xlist.h80
-rw-r--r--net/tipc/log.h67
54 files changed, 19609 insertions, 0 deletions
diff --git a/net/802/tr.c b/net/802/tr.c
new file mode 100644
index 00000000000..5e20cf8a074
--- /dev/null
+++ b/net/802/tr.c
@@ -0,0 +1,677 @@
1/*
2 * NET3: Token ring device handling subroutines
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Fixes: 3 Feb 97 Paul Norton <pnorton@cts.com> Minor routing fixes.
10 * Added rif table to /proc/net/tr_rif and rif timeout to
11 * /proc/sys/net/token-ring/rif_timeout.
12 * 22 Jun 98 Paul Norton <p.norton@computer.org> Rearranged
13 * tr_header and tr_type_trans to handle passing IPX SNAP and
14 * 802.2 through the correct layers. Eliminated tr_reformat.
15 *
16 */
17
18#include <asm/uaccess.h>
19#include <asm/system.h>
20#include <linux/module.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/jiffies.h>
24#include <linux/string.h>
25#include <linux/mm.h>
26#include <linux/socket.h>
27#include <linux/in.h>
28#include <linux/inet.h>
29#include <linux/netdevice.h>
30#include <linux/trdevice.h>
31#include <linux/skbuff.h>
32#include <linux/errno.h>
33#include <linux/timer.h>
34#include <linux/net.h>
35#include <linux/proc_fs.h>
36#include <linux/seq_file.h>
37#include <linux/init.h>
38#include <linux/sysctl.h>
39#include <linux/slab.h>
40#include <net/arp.h>
41#include <net/net_namespace.h>
42
43static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev);
44static void rif_check_expire(unsigned long dummy);
45
46#define TR_SR_DEBUG 0
47
48/*
49 * Each RIF entry we learn is kept this way
50 */
51
52struct rif_cache {
53 unsigned char addr[TR_ALEN];
54 int iface;
55 __be16 rcf;
56 __be16 rseg[8];
57 struct rif_cache *next;
58 unsigned long last_used;
59 unsigned char local_ring;
60};
61
62#define RIF_TABLE_SIZE 32
63
64/*
65 * We hash the RIF cache 32 ways. We do after all have to look it
66 * up a lot.
67 */
68
69static struct rif_cache *rif_table[RIF_TABLE_SIZE];
70
71static DEFINE_SPINLOCK(rif_lock);
72
73
74/*
75 * Garbage disposal timer.
76 */
77
78static struct timer_list rif_timer;
79
80static int sysctl_tr_rif_timeout = 60*10*HZ;
81
82static inline unsigned long rif_hash(const unsigned char *addr)
83{
84 unsigned long x;
85
86 x = addr[0];
87 x = (x << 2) ^ addr[1];
88 x = (x << 2) ^ addr[2];
89 x = (x << 2) ^ addr[3];
90 x = (x << 2) ^ addr[4];
91 x = (x << 2) ^ addr[5];
92
93 x ^= x >> 8;
94
95 return x & (RIF_TABLE_SIZE - 1);
96}
97
98/*
99 * Put the headers on a token ring packet. Token ring source routing
100 * makes this a little more exciting than on ethernet.
101 */
102
103static int tr_header(struct sk_buff *skb, struct net_device *dev,
104 unsigned short type,
105 const void *daddr, const void *saddr, unsigned len)
106{
107 struct trh_hdr *trh;
108 int hdr_len;
109
110 /*
111 * Add the 802.2 SNAP header if IP as the IPv4/IPv6 code calls
112 * dev->hard_header directly.
113 */
114 if (type == ETH_P_IP || type == ETH_P_IPV6 || type == ETH_P_ARP)
115 {
116 struct trllc *trllc;
117
118 hdr_len = sizeof(struct trh_hdr) + sizeof(struct trllc);
119 trh = (struct trh_hdr *)skb_push(skb, hdr_len);
120 trllc = (struct trllc *)(trh+1);
121 trllc->dsap = trllc->ssap = EXTENDED_SAP;
122 trllc->llc = UI_CMD;
123 trllc->protid[0] = trllc->protid[1] = trllc->protid[2] = 0x00;
124 trllc->ethertype = htons(type);
125 }
126 else
127 {
128 hdr_len = sizeof(struct trh_hdr);
129 trh = (struct trh_hdr *)skb_push(skb, hdr_len);
130 }
131
132 trh->ac=AC;
133 trh->fc=LLC_FRAME;
134
135 if(saddr)
136 memcpy(trh->saddr,saddr,dev->addr_len);
137 else
138 memcpy(trh->saddr,dev->dev_addr,dev->addr_len);
139
140 /*
141 * Build the destination and then source route the frame
142 */
143
144 if(daddr)
145 {
146 memcpy(trh->daddr,daddr,dev->addr_len);
147 tr_source_route(skb, trh, dev);
148 return hdr_len;
149 }
150
151 return -hdr_len;
152}
153
154/*
155 * A neighbour discovery of some species (eg arp) has completed. We
156 * can now send the packet.
157 */
158
159static int tr_rebuild_header(struct sk_buff *skb)
160{
161 struct trh_hdr *trh=(struct trh_hdr *)skb->data;
162 struct trllc *trllc=(struct trllc *)(skb->data+sizeof(struct trh_hdr));
163 struct net_device *dev = skb->dev;
164
165 /*
166 * FIXME: We don't yet support IPv6 over token rings
167 */
168
169 if(trllc->ethertype != htons(ETH_P_IP)) {
170 printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(trllc->ethertype));
171 return 0;
172 }
173
174#ifdef CONFIG_INET
175 if(arp_find(trh->daddr, skb)) {
176 return 1;
177 }
178 else
179#endif
180 {
181 tr_source_route(skb,trh,dev);
182 return 0;
183 }
184}
185
186/*
187 * Some of this is a bit hackish. We intercept RIF information
188 * used for source routing. We also grab IP directly and don't feed
189 * it via SNAP.
190 */
191
192__be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev)
193{
194
195 struct trh_hdr *trh;
196 struct trllc *trllc;
197 unsigned riflen=0;
198
199 skb->dev = dev;
200 skb_reset_mac_header(skb);
201 trh = tr_hdr(skb);
202
203 if(trh->saddr[0] & TR_RII)
204 riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
205
206 trllc = (struct trllc *)(skb->data+sizeof(struct trh_hdr)-TR_MAXRIFLEN+riflen);
207
208 skb_pull(skb,sizeof(struct trh_hdr)-TR_MAXRIFLEN+riflen);
209
210 if(*trh->daddr & 0x80)
211 {
212 if(!memcmp(trh->daddr,dev->broadcast,TR_ALEN))
213 skb->pkt_type=PACKET_BROADCAST;
214 else
215 skb->pkt_type=PACKET_MULTICAST;
216 }
217 else if ( (trh->daddr[0] & 0x01) && (trh->daddr[1] & 0x00) && (trh->daddr[2] & 0x5E))
218 {
219 skb->pkt_type=PACKET_MULTICAST;
220 }
221 else if(dev->flags & IFF_PROMISC)
222 {
223 if(memcmp(trh->daddr, dev->dev_addr, TR_ALEN))
224 skb->pkt_type=PACKET_OTHERHOST;
225 }
226
227 if ((skb->pkt_type != PACKET_BROADCAST) &&
228 (skb->pkt_type != PACKET_MULTICAST))
229 tr_add_rif_info(trh,dev) ;
230
231 /*
232 * Strip the SNAP header from ARP packets since we don't
233 * pass them through to the 802.2/SNAP layers.
234 */
235
236 if (trllc->dsap == EXTENDED_SAP &&
237 (trllc->ethertype == htons(ETH_P_IP) ||
238 trllc->ethertype == htons(ETH_P_IPV6) ||
239 trllc->ethertype == htons(ETH_P_ARP)))
240 {
241 skb_pull(skb, sizeof(struct trllc));
242 return trllc->ethertype;
243 }
244
245 return htons(ETH_P_TR_802_2);
246}
247
248/*
249 * We try to do source routing...
250 */
251
252void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,
253 struct net_device *dev)
254{
255 int slack;
256 unsigned int hash;
257 struct rif_cache *entry;
258 unsigned char *olddata;
259 unsigned long flags;
260 static const unsigned char mcast_func_addr[]
261 = {0xC0,0x00,0x00,0x04,0x00,0x00};
262
263 spin_lock_irqsave(&rif_lock, flags);
264
265 /*
266 * Broadcasts are single route as stated in RFC 1042
267 */
268 if( (!memcmp(&(trh->daddr[0]),&(dev->broadcast[0]),TR_ALEN)) ||
269 (!memcmp(&(trh->daddr[0]),&(mcast_func_addr[0]), TR_ALEN)) )
270 {
271 trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK)
272 | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST);
273 trh->saddr[0]|=TR_RII;
274 }
275 else
276 {
277 hash = rif_hash(trh->daddr);
278 /*
279 * Walk the hash table and look for an entry
280 */
281 for(entry=rif_table[hash];entry && memcmp(&(entry->addr[0]),&(trh->daddr[0]),TR_ALEN);entry=entry->next);
282
283 /*
284 * If we found an entry we can route the frame.
285 */
286 if(entry)
287 {
288#if TR_SR_DEBUG
289printk("source routing for %pM\n", trh->daddr);
290#endif
291 if(!entry->local_ring && (ntohs(entry->rcf) & TR_RCF_LEN_MASK) >> 8)
292 {
293 trh->rcf=entry->rcf;
294 memcpy(&trh->rseg[0],&entry->rseg[0],8*sizeof(unsigned short));
295 trh->rcf^=htons(TR_RCF_DIR_BIT);
296 trh->rcf&=htons(0x1fff); /* Issam Chehab <ichehab@madge1.demon.co.uk> */
297
298 trh->saddr[0]|=TR_RII;
299#if TR_SR_DEBUG
300 printk("entry found with rcf %04x\n", entry->rcf);
301 }
302 else
303 {
304 printk("entry found but without rcf length, local=%02x\n", entry->local_ring);
305#endif
306 }
307 entry->last_used=jiffies;
308 }
309 else
310 {
311 /*
312 * Without the information we simply have to shout
313 * on the wire. The replies should rapidly clean this
314 * situation up.
315 */
316 trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK)
317 | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST);
318 trh->saddr[0]|=TR_RII;
319#if TR_SR_DEBUG
320 printk("no entry in rif table found - broadcasting frame\n");
321#endif
322 }
323 }
324
325 /* Compress the RIF here so we don't have to do it in the driver(s) */
326 if (!(trh->saddr[0] & 0x80))
327 slack = 18;
328 else
329 slack = 18 - ((ntohs(trh->rcf) & TR_RCF_LEN_MASK)>>8);
330 olddata = skb->data;
331 spin_unlock_irqrestore(&rif_lock, flags);
332
333 skb_pull(skb, slack);
334 memmove(skb->data, olddata, sizeof(struct trh_hdr) - slack);
335}
336
337/*
338 * We have learned some new RIF information for our source
339 * routing.
340 */
341
342static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev)
343{
344 unsigned int hash, rii_p = 0;
345 unsigned long flags;
346 struct rif_cache *entry;
347 unsigned char saddr0;
348
349 spin_lock_irqsave(&rif_lock, flags);
350 saddr0 = trh->saddr[0];
351
352 /*
353 * Firstly see if the entry exists
354 */
355
356 if(trh->saddr[0] & TR_RII)
357 {
358 trh->saddr[0]&=0x7f;
359 if (((ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8) > 2)
360 {
361 rii_p = 1;
362 }
363 }
364
365 hash = rif_hash(trh->saddr);
366 for(entry=rif_table[hash];entry && memcmp(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN);entry=entry->next);
367
368 if(entry==NULL)
369 {
370#if TR_SR_DEBUG
371 printk("adding rif_entry: addr:%pM rcf:%04X\n",
372 trh->saddr, ntohs(trh->rcf));
373#endif
374 /*
375 * Allocate our new entry. A failure to allocate loses
376 * use the information. This is harmless.
377 *
378 * FIXME: We ought to keep some kind of cache size
379 * limiting and adjust the timers to suit.
380 */
381 entry=kmalloc(sizeof(struct rif_cache),GFP_ATOMIC);
382
383 if(!entry)
384 {
385 printk(KERN_DEBUG "tr.c: Couldn't malloc rif cache entry !\n");
386 spin_unlock_irqrestore(&rif_lock, flags);
387 return;
388 }
389
390 memcpy(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN);
391 entry->iface = dev->ifindex;
392 entry->next=rif_table[hash];
393 entry->last_used=jiffies;
394 rif_table[hash]=entry;
395
396 if (rii_p)
397 {
398 entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK);
399 memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short));
400 entry->local_ring = 0;
401 }
402 else
403 {
404 entry->local_ring = 1;
405 }
406 }
407 else /* Y. Tahara added */
408 {
409 /*
410 * Update existing entries
411 */
412 if (!entry->local_ring)
413 if (entry->rcf != (trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK)) &&
414 !(trh->rcf & htons(TR_RCF_BROADCAST_MASK)))
415 {
416#if TR_SR_DEBUG
417printk("updating rif_entry: addr:%pM rcf:%04X\n",
418 trh->saddr, ntohs(trh->rcf));
419#endif
420 entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK);
421 memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short));
422 }
423 entry->last_used=jiffies;
424 }
425 trh->saddr[0]=saddr0; /* put the routing indicator back for tcpdump */
426 spin_unlock_irqrestore(&rif_lock, flags);
427}
428
429/*
430 * Scan the cache with a timer and see what we need to throw out.
431 */
432
433static void rif_check_expire(unsigned long dummy)
434{
435 int i;
436 unsigned long flags, next_interval = jiffies + sysctl_tr_rif_timeout/2;
437
438 spin_lock_irqsave(&rif_lock, flags);
439
440 for(i =0; i < RIF_TABLE_SIZE; i++) {
441 struct rif_cache *entry, **pentry;
442
443 pentry = rif_table+i;
444 while((entry=*pentry) != NULL) {
445 unsigned long expires
446 = entry->last_used + sysctl_tr_rif_timeout;
447
448 if (time_before_eq(expires, jiffies)) {
449 *pentry = entry->next;
450 kfree(entry);
451 } else {
452 pentry = &entry->next;
453
454 if (time_before(expires, next_interval))
455 next_interval = expires;
456 }
457 }
458 }
459
460 spin_unlock_irqrestore(&rif_lock, flags);
461
462 mod_timer(&rif_timer, next_interval);
463
464}
465
466/*
467 * Generate the /proc/net information for the token ring RIF
468 * routing.
469 */
470
471#ifdef CONFIG_PROC_FS
472
473static struct rif_cache *rif_get_idx(loff_t pos)
474{
475 int i;
476 struct rif_cache *entry;
477 loff_t off = 0;
478
479 for(i = 0; i < RIF_TABLE_SIZE; i++)
480 for(entry = rif_table[i]; entry; entry = entry->next) {
481 if (off == pos)
482 return entry;
483 ++off;
484 }
485
486 return NULL;
487}
488
489static void *rif_seq_start(struct seq_file *seq, loff_t *pos)
490 __acquires(&rif_lock)
491{
492 spin_lock_irq(&rif_lock);
493
494 return *pos ? rif_get_idx(*pos - 1) : SEQ_START_TOKEN;
495}
496
497static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
498{
499 int i;
500 struct rif_cache *ent = v;
501
502 ++*pos;
503
504 if (v == SEQ_START_TOKEN) {
505 i = -1;
506 goto scan;
507 }
508
509 if (ent->next)
510 return ent->next;
511
512 i = rif_hash(ent->addr);
513 scan:
514 while (++i < RIF_TABLE_SIZE) {
515 if ((ent = rif_table[i]) != NULL)
516 return ent;
517 }
518 return NULL;
519}
520
521static void rif_seq_stop(struct seq_file *seq, void *v)
522 __releases(&rif_lock)
523{
524 spin_unlock_irq(&rif_lock);
525}
526
527static int rif_seq_show(struct seq_file *seq, void *v)
528{
529 int j, rcf_len, segment, brdgnmb;
530 struct rif_cache *entry = v;
531
532 if (v == SEQ_START_TOKEN)
533 seq_puts(seq,
534 "if TR address TTL rcf routing segments\n");
535 else {
536 struct net_device *dev = dev_get_by_index(&init_net, entry->iface);
537 long ttl = (long) (entry->last_used + sysctl_tr_rif_timeout)
538 - (long) jiffies;
539
540 seq_printf(seq, "%s %pM %7li ",
541 dev?dev->name:"?",
542 entry->addr,
543 ttl/HZ);
544
545 if (entry->local_ring)
546 seq_puts(seq, "local\n");
547 else {
548
549 seq_printf(seq, "%04X", ntohs(entry->rcf));
550 rcf_len = ((ntohs(entry->rcf) & TR_RCF_LEN_MASK)>>8)-2;
551 if (rcf_len)
552 rcf_len >>= 1;
553 for(j = 1; j < rcf_len; j++) {
554 if(j==1) {
555 segment=ntohs(entry->rseg[j-1])>>4;
556 seq_printf(seq," %03X",segment);
557 }
558
559 segment=ntohs(entry->rseg[j])>>4;
560 brdgnmb=ntohs(entry->rseg[j-1])&0x00f;
561 seq_printf(seq,"-%01X-%03X",brdgnmb,segment);
562 }
563 seq_putc(seq, '\n');
564 }
565
566 if (dev)
567 dev_put(dev);
568 }
569 return 0;
570}
571
572
573static const struct seq_operations rif_seq_ops = {
574 .start = rif_seq_start,
575 .next = rif_seq_next,
576 .stop = rif_seq_stop,
577 .show = rif_seq_show,
578};
579
580static int rif_seq_open(struct inode *inode, struct file *file)
581{
582 return seq_open(file, &rif_seq_ops);
583}
584
585static const struct file_operations rif_seq_fops = {
586 .owner = THIS_MODULE,
587 .open = rif_seq_open,
588 .read = seq_read,
589 .llseek = seq_lseek,
590 .release = seq_release,
591};
592
593#endif
594
595static const struct header_ops tr_header_ops = {
596 .create = tr_header,
597 .rebuild= tr_rebuild_header,
598};
599
600static void tr_setup(struct net_device *dev)
601{
602 /*
603 * Configure and register
604 */
605
606 dev->header_ops = &tr_header_ops;
607
608 dev->type = ARPHRD_IEEE802_TR;
609 dev->hard_header_len = TR_HLEN;
610 dev->mtu = 2000;
611 dev->addr_len = TR_ALEN;
612 dev->tx_queue_len = 100; /* Long queues on tr */
613
614 memset(dev->broadcast,0xFF, TR_ALEN);
615
616 /* New-style flags. */
617 dev->flags = IFF_BROADCAST | IFF_MULTICAST ;
618}
619
620/**
621 * alloc_trdev - Register token ring device
622 * @sizeof_priv: Size of additional driver-private structure to be allocated
623 * for this token ring device
624 *
625 * Fill in the fields of the device structure with token ring-generic values.
626 *
627 * Constructs a new net device, complete with a private data area of
628 * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for
629 * this private data area.
630 */
631struct net_device *alloc_trdev(int sizeof_priv)
632{
633 return alloc_netdev(sizeof_priv, "tr%d", tr_setup);
634}
635
636#ifdef CONFIG_SYSCTL
637static struct ctl_table tr_table[] = {
638 {
639 .procname = "rif_timeout",
640 .data = &sysctl_tr_rif_timeout,
641 .maxlen = sizeof(int),
642 .mode = 0644,
643 .proc_handler = proc_dointvec
644 },
645 { },
646};
647
648static __initdata struct ctl_path tr_path[] = {
649 { .procname = "net", },
650 { .procname = "token-ring", },
651 { }
652};
653#endif
654
655/*
656 * Called during bootup. We don't actually have to initialise
657 * too much for this.
658 */
659
660static int __init rif_init(void)
661{
662 rif_timer.expires = jiffies + sysctl_tr_rif_timeout;
663 setup_timer(&rif_timer, rif_check_expire, 0);
664 add_timer(&rif_timer);
665#ifdef CONFIG_SYSCTL
666 register_sysctl_paths(tr_path, tr_table);
667#endif
668 proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops);
669 return 0;
670}
671
672module_init(rif_init);
673
674EXPORT_SYMBOL(tr_type_trans);
675EXPORT_SYMBOL(alloc_trdev);
676
677MODULE_LICENSE("GPL");
diff --git a/net/activity_stats.c b/net/activity_stats.c
new file mode 100644
index 00000000000..8a3e9347006
--- /dev/null
+++ b/net/activity_stats.c
@@ -0,0 +1,115 @@
1/* net/activity_stats.c
2 *
3 * Copyright (C) 2010 Google, Inc.
4 *
5 * This software is licensed under the terms of the GNU General Public
6 * License version 2, as published by the Free Software Foundation, and
7 * may be copied, distributed, and modified under those terms.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * Author: Mike Chan (mike@android.com)
15 */
16
17#include <linux/proc_fs.h>
18#include <linux/suspend.h>
19#include <net/net_namespace.h>
20
21/*
22 * Track transmission rates in buckets (power of 2).
23 * 1,2,4,8...512 seconds.
24 *
25 * Buckets represent the count of network transmissions at least
26 * N seconds apart, where N is 1 << bucket index.
27 */
28#define BUCKET_MAX 10
29
30/* Track network activity frequency */
31static unsigned long activity_stats[BUCKET_MAX];
32static ktime_t last_transmit;
33static ktime_t suspend_time;
34static DEFINE_SPINLOCK(activity_lock);
35
36void activity_stats_update(void)
37{
38 int i;
39 unsigned long flags;
40 ktime_t now;
41 s64 delta;
42
43 spin_lock_irqsave(&activity_lock, flags);
44 now = ktime_get();
45 delta = ktime_to_ns(ktime_sub(now, last_transmit));
46
47 for (i = BUCKET_MAX - 1; i >= 0; i--) {
48 /*
49 * Check if the time delta between network activity is within the
50 * minimum bucket range.
51 */
52 if (delta < (1000000000ULL << i))
53 continue;
54
55 activity_stats[i]++;
56 last_transmit = now;
57 break;
58 }
59 spin_unlock_irqrestore(&activity_lock, flags);
60}
61
62static int activity_stats_read_proc(char *page, char **start, off_t off,
63 int count, int *eof, void *data)
64{
65 int i;
66 int len;
67 char *p = page;
68
69 /* Only print if offset is 0, or we have enough buffer space */
70 if (off || count < (30 * BUCKET_MAX + 22))
71 return -ENOMEM;
72
73 len = snprintf(p, count, "Min Bucket(sec) Count\n");
74 count -= len;
75 p += len;
76
77 for (i = 0; i < BUCKET_MAX; i++) {
78 len = snprintf(p, count, "%15d %lu\n", 1 << i, activity_stats[i]);
79 count -= len;
80 p += len;
81 }
82 *eof = 1;
83
84 return p - page;
85}
86
87static int activity_stats_notifier(struct notifier_block *nb,
88 unsigned long event, void *dummy)
89{
90 switch (event) {
91 case PM_SUSPEND_PREPARE:
92 suspend_time = ktime_get_real();
93 break;
94
95 case PM_POST_SUSPEND:
96 suspend_time = ktime_sub(ktime_get_real(), suspend_time);
97 last_transmit = ktime_sub(last_transmit, suspend_time);
98 }
99
100 return 0;
101}
102
103static struct notifier_block activity_stats_notifier_block = {
104 .notifier_call = activity_stats_notifier,
105};
106
107static int __init activity_stats_init(void)
108{
109 create_proc_read_entry("activity", S_IRUGO,
110 init_net.proc_net_stat, activity_stats_read_proc, NULL);
111 return register_pm_notifier(&activity_stats_notifier_block);
112}
113
114subsys_initcall(activity_stats_init);
115
diff --git a/net/batman-adv/aggregation.c b/net/batman-adv/aggregation.c
new file mode 100644
index 00000000000..69467fe71ff
--- /dev/null
+++ b/net/batman-adv/aggregation.c
@@ -0,0 +1,293 @@
1/*
2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 *
4 * Marek Lindner, Simon Wunderlich
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2 of the GNU General Public
8 * License as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA
19 *
20 */
21
22#include "main.h"
23#include "translation-table.h"
24#include "aggregation.h"
25#include "send.h"
26#include "routing.h"
27#include "hard-interface.h"
28
29/* return true if new_packet can be aggregated with forw_packet */
30static bool can_aggregate_with(const struct batman_packet *new_batman_packet,
31 struct bat_priv *bat_priv,
32 int packet_len,
33 unsigned long send_time,
34 bool directlink,
35 const struct hard_iface *if_incoming,
36 const struct forw_packet *forw_packet)
37{
38 struct batman_packet *batman_packet =
39 (struct batman_packet *)forw_packet->skb->data;
40 int aggregated_bytes = forw_packet->packet_len + packet_len;
41 struct hard_iface *primary_if = NULL;
42 bool res = false;
43
44 /**
45 * we can aggregate the current packet to this aggregated packet
46 * if:
47 *
48 * - the send time is within our MAX_AGGREGATION_MS time
49 * - the resulting packet wont be bigger than
50 * MAX_AGGREGATION_BYTES
51 */
52
53 if (time_before(send_time, forw_packet->send_time) &&
54 time_after_eq(send_time + msecs_to_jiffies(MAX_AGGREGATION_MS),
55 forw_packet->send_time) &&
56 (aggregated_bytes <= MAX_AGGREGATION_BYTES)) {
57
58 /**
59 * check aggregation compatibility
60 * -> direct link packets are broadcasted on
61 * their interface only
62 * -> aggregate packet if the current packet is
63 * a "global" packet as well as the base
64 * packet
65 */
66
67 primary_if = primary_if_get_selected(bat_priv);
68 if (!primary_if)
69 goto out;
70
71 /* packets without direct link flag and high TTL
72 * are flooded through the net */
73 if ((!directlink) &&
74 (!(batman_packet->flags & DIRECTLINK)) &&
75 (batman_packet->ttl != 1) &&
76
77 /* own packets originating non-primary
78 * interfaces leave only that interface */
79 ((!forw_packet->own) ||
80 (forw_packet->if_incoming == primary_if))) {
81 res = true;
82 goto out;
83 }
84
85 /* if the incoming packet is sent via this one
86 * interface only - we still can aggregate */
87 if ((directlink) &&
88 (new_batman_packet->ttl == 1) &&
89 (forw_packet->if_incoming == if_incoming) &&
90
91 /* packets from direct neighbors or
92 * own secondary interface packets
93 * (= secondary interface packets in general) */
94 (batman_packet->flags & DIRECTLINK ||
95 (forw_packet->own &&
96 forw_packet->if_incoming != primary_if))) {
97 res = true;
98 goto out;
99 }
100 }
101
102out:
103 if (primary_if)
104 hardif_free_ref(primary_if);
105 return res;
106}
107
108/* create a new aggregated packet and add this packet to it */
109static void new_aggregated_packet(const unsigned char *packet_buff,
110 int packet_len, unsigned long send_time,
111 bool direct_link,
112 struct hard_iface *if_incoming,
113 int own_packet)
114{
115 struct bat_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
116 struct forw_packet *forw_packet_aggr;
117 unsigned char *skb_buff;
118
119 if (!atomic_inc_not_zero(&if_incoming->refcount))
120 return;
121
122 /* own packet should always be scheduled */
123 if (!own_packet) {
124 if (!atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
125 bat_dbg(DBG_BATMAN, bat_priv,
126 "batman packet queue full\n");
127 goto out;
128 }
129 }
130
131 forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
132 if (!forw_packet_aggr) {
133 if (!own_packet)
134 atomic_inc(&bat_priv->batman_queue_left);
135 goto out;
136 }
137
138 if ((atomic_read(&bat_priv->aggregated_ogms)) &&
139 (packet_len < MAX_AGGREGATION_BYTES))
140 forw_packet_aggr->skb = dev_alloc_skb(MAX_AGGREGATION_BYTES +
141 sizeof(struct ethhdr));
142 else
143 forw_packet_aggr->skb = dev_alloc_skb(packet_len +
144 sizeof(struct ethhdr));
145
146 if (!forw_packet_aggr->skb) {
147 if (!own_packet)
148 atomic_inc(&bat_priv->batman_queue_left);
149 kfree(forw_packet_aggr);
150 goto out;
151 }
152 skb_reserve(forw_packet_aggr->skb, sizeof(struct ethhdr));
153
154 INIT_HLIST_NODE(&forw_packet_aggr->list);
155
156 skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
157 forw_packet_aggr->packet_len = packet_len;
158 memcpy(skb_buff, packet_buff, packet_len);
159
160 forw_packet_aggr->own = own_packet;
161 forw_packet_aggr->if_incoming = if_incoming;
162 forw_packet_aggr->num_packets = 0;
163 forw_packet_aggr->direct_link_flags = NO_FLAGS;
164 forw_packet_aggr->send_time = send_time;
165
166 /* save packet direct link flag status */
167 if (direct_link)
168 forw_packet_aggr->direct_link_flags |= 1;
169
170 /* add new packet to packet list */
171 spin_lock_bh(&bat_priv->forw_bat_list_lock);
172 hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list);
173 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
174
175 /* start timer for this packet */
176 INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
177 send_outstanding_bat_packet);
178 queue_delayed_work(bat_event_workqueue,
179 &forw_packet_aggr->delayed_work,
180 send_time - jiffies);
181
182 return;
183out:
184 hardif_free_ref(if_incoming);
185}
186
187/* aggregate a new packet into the existing aggregation */
188static void aggregate(struct forw_packet *forw_packet_aggr,
189 const unsigned char *packet_buff, int packet_len,
190 bool direct_link)
191{
192 unsigned char *skb_buff;
193
194 skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
195 memcpy(skb_buff, packet_buff, packet_len);
196 forw_packet_aggr->packet_len += packet_len;
197 forw_packet_aggr->num_packets++;
198
199 /* save packet direct link flag status */
200 if (direct_link)
201 forw_packet_aggr->direct_link_flags |=
202 (1 << forw_packet_aggr->num_packets);
203}
204
205void add_bat_packet_to_list(struct bat_priv *bat_priv,
206 unsigned char *packet_buff, int packet_len,
207 struct hard_iface *if_incoming, int own_packet,
208 unsigned long send_time)
209{
210 /**
211 * _aggr -> pointer to the packet we want to aggregate with
212 * _pos -> pointer to the position in the queue
213 */
214 struct forw_packet *forw_packet_aggr = NULL, *forw_packet_pos = NULL;
215 struct hlist_node *tmp_node;
216 struct batman_packet *batman_packet =
217 (struct batman_packet *)packet_buff;
218 bool direct_link = batman_packet->flags & DIRECTLINK ? 1 : 0;
219
220 /* find position for the packet in the forward queue */
221 spin_lock_bh(&bat_priv->forw_bat_list_lock);
222 /* own packets are not to be aggregated */
223 if ((atomic_read(&bat_priv->aggregated_ogms)) && (!own_packet)) {
224 hlist_for_each_entry(forw_packet_pos, tmp_node,
225 &bat_priv->forw_bat_list, list) {
226 if (can_aggregate_with(batman_packet,
227 bat_priv,
228 packet_len,
229 send_time,
230 direct_link,
231 if_incoming,
232 forw_packet_pos)) {
233 forw_packet_aggr = forw_packet_pos;
234 break;
235 }
236 }
237 }
238
239 /* nothing to aggregate with - either aggregation disabled or no
240 * suitable aggregation packet found */
241 if (!forw_packet_aggr) {
242 /* the following section can run without the lock */
243 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
244
245 /**
246 * if we could not aggregate this packet with one of the others
247 * we hold it back for a while, so that it might be aggregated
248 * later on
249 */
250 if ((!own_packet) &&
251 (atomic_read(&bat_priv->aggregated_ogms)))
252 send_time += msecs_to_jiffies(MAX_AGGREGATION_MS);
253
254 new_aggregated_packet(packet_buff, packet_len,
255 send_time, direct_link,
256 if_incoming, own_packet);
257 } else {
258 aggregate(forw_packet_aggr,
259 packet_buff, packet_len,
260 direct_link);
261 spin_unlock_bh(&bat_priv->forw_bat_list_lock);
262 }
263}
264
265/* unpack the aggregated packets and process them one by one */
266void receive_aggr_bat_packet(const struct ethhdr *ethhdr,
267 unsigned char *packet_buff, int packet_len,
268 struct hard_iface *if_incoming)
269{
270 struct batman_packet *batman_packet;
271 int buff_pos = 0;
272 unsigned char *tt_buff;
273
274 batman_packet = (struct batman_packet *)packet_buff;
275
276 do {
277 /* network to host order for our 32bit seqno and the
278 orig_interval */
279 batman_packet->seqno = ntohl(batman_packet->seqno);
280 batman_packet->tt_crc = ntohs(batman_packet->tt_crc);
281
282 tt_buff = packet_buff + buff_pos + BAT_PACKET_LEN;
283
284 receive_bat_packet(ethhdr, batman_packet, tt_buff, if_incoming);
285
286 buff_pos += BAT_PACKET_LEN +
287 tt_len(batman_packet->tt_num_changes);
288
289 batman_packet = (struct batman_packet *)
290 (packet_buff + buff_pos);
291 } while (aggregated_packet(buff_pos, packet_len,
292 batman_packet->tt_num_changes));
293}
diff --git a/net/batman-adv/aggregation.h b/net/batman-adv/aggregation.h
new file mode 100644
index 00000000000..216337bb841
--- /dev/null
+++ b/net/batman-adv/aggregation.h
@@ -0,0 +1,46 @@
1/*
2 * Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
3 *
4 * Marek Lindner, Simon Wunderlich
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2 of the GNU General Public
8 * License as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA
19 *
20 */
21
22#ifndef _NET_BATMAN_ADV_AGGREGATION_H_
23#define _NET_BATMAN_ADV_AGGREGATION_H_
24
25#include "main.h"
26
27/* is there another aggregated packet here? */
28static inline int aggregated_packet(int buff_pos, int packet_len,
29 int tt_num_changes)
30{
31 int next_buff_pos = buff_pos + BAT_PACKET_LEN + (tt_num_changes *
32 sizeof(struct tt_change));
33
34 return (next_buff_pos <= packet_len) &&
35 (next_buff_pos <= MAX_AGGREGATION_BYTES);
36}
37
38void add_bat_packet_to_list(struct bat_priv *bat_priv,
39 unsigned char *packet_buff, int packet_len,
40 struct hard_iface *if_incoming, int own_packet,
41 unsigned long send_time);
42void receive_aggr_bat_packet(const struct ethhdr *ethhdr,
43 unsigned char *packet_buff, int packet_len,
44 struct hard_iface *if_incoming);
45
46#endif /* _NET_BATMAN_ADV_AGGREGATION_H_ */
diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c
new file mode 100644
index 00000000000..d0af9bf69e4
--- /dev/null
+++ b/net/batman-adv/bat_debugfs.c
@@ -0,0 +1,359 @@
1/*
2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 *
4 * Marek Lindner
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2 of the GNU General Public
8 * License as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA
19 *
20 */
21
22#include "main.h"
23
24#include <linux/debugfs.h>
25
26#include "bat_debugfs.h"
27#include "translation-table.h"
28#include "originator.h"
29#include "hard-interface.h"
30#include "gateway_common.h"
31#include "gateway_client.h"
32#include "soft-interface.h"
33#include "vis.h"
34#include "icmp_socket.h"
35
36static struct dentry *bat_debugfs;
37
38#ifdef CONFIG_BATMAN_ADV_DEBUG
39#define LOG_BUFF_MASK (log_buff_len-1)
40#define LOG_BUFF(idx) (debug_log->log_buff[(idx) & LOG_BUFF_MASK])
41
42static int log_buff_len = LOG_BUF_LEN;
43
44static void emit_log_char(struct debug_log *debug_log, char c)
45{
46 LOG_BUFF(debug_log->log_end) = c;
47 debug_log->log_end++;
48
49 if (debug_log->log_end - debug_log->log_start > log_buff_len)
50 debug_log->log_start = debug_log->log_end - log_buff_len;
51}
52
53__printf(2, 3)
54static int fdebug_log(struct debug_log *debug_log, const char *fmt, ...)
55{
56 va_list args;
57 static char debug_log_buf[256];
58 char *p;
59
60 if (!debug_log)
61 return 0;
62
63 spin_lock_bh(&debug_log->lock);
64 va_start(args, fmt);
65 vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args);
66 va_end(args);
67
68 for (p = debug_log_buf; *p != 0; p++)
69 emit_log_char(debug_log, *p);
70
71 spin_unlock_bh(&debug_log->lock);
72
73 wake_up(&debug_log->queue_wait);
74
75 return 0;
76}
77
78int debug_log(struct bat_priv *bat_priv, const char *fmt, ...)
79{
80 va_list args;
81 char tmp_log_buf[256];
82
83 va_start(args, fmt);
84 vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args);
85 fdebug_log(bat_priv->debug_log, "[%10lu] %s",
86 (jiffies / HZ), tmp_log_buf);
87 va_end(args);
88
89 return 0;
90}
91
92static int log_open(struct inode *inode, struct file *file)
93{
94 nonseekable_open(inode, file);
95 file->private_data = inode->i_private;
96 inc_module_count();
97 return 0;
98}
99
100static int log_release(struct inode *inode, struct file *file)
101{
102 dec_module_count();
103 return 0;
104}
105
106static ssize_t log_read(struct file *file, char __user *buf,
107 size_t count, loff_t *ppos)
108{
109 struct bat_priv *bat_priv = file->private_data;
110 struct debug_log *debug_log = bat_priv->debug_log;
111 int error, i = 0;
112 char c;
113
114 if ((file->f_flags & O_NONBLOCK) &&
115 !(debug_log->log_end - debug_log->log_start))
116 return -EAGAIN;
117
118 if (!buf)
119 return -EINVAL;
120
121 if (count == 0)
122 return 0;
123
124 if (!access_ok(VERIFY_WRITE, buf, count))
125 return -EFAULT;
126
127 error = wait_event_interruptible(debug_log->queue_wait,
128 (debug_log->log_start - debug_log->log_end));
129
130 if (error)
131 return error;
132
133 spin_lock_bh(&debug_log->lock);
134
135 while ((!error) && (i < count) &&
136 (debug_log->log_start != debug_log->log_end)) {
137 c = LOG_BUFF(debug_log->log_start);
138
139 debug_log->log_start++;
140
141 spin_unlock_bh(&debug_log->lock);
142
143 error = __put_user(c, buf);
144
145 spin_lock_bh(&debug_log->lock);
146
147 buf++;
148 i++;
149
150 }
151
152 spin_unlock_bh(&debug_log->lock);
153
154 if (!error)
155 return i;
156
157 return error;
158}
159
160static unsigned int log_poll(struct file *file, poll_table *wait)
161{
162 struct bat_priv *bat_priv = file->private_data;
163 struct debug_log *debug_log = bat_priv->debug_log;
164
165 poll_wait(file, &debug_log->queue_wait, wait);
166
167 if (debug_log->log_end - debug_log->log_start)
168 return POLLIN | POLLRDNORM;
169
170 return 0;
171}
172
173static const struct file_operations log_fops = {
174 .open = log_open,
175 .release = log_release,
176 .read = log_read,
177 .poll = log_poll,
178 .llseek = no_llseek,
179};
180
181static int debug_log_setup(struct bat_priv *bat_priv)
182{
183 struct dentry *d;
184
185 if (!bat_priv->debug_dir)
186 goto err;
187
188 bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC);
189 if (!bat_priv->debug_log)
190 goto err;
191
192 spin_lock_init(&bat_priv->debug_log->lock);
193 init_waitqueue_head(&bat_priv->debug_log->queue_wait);
194
195 d = debugfs_create_file("log", S_IFREG | S_IRUSR,
196 bat_priv->debug_dir, bat_priv, &log_fops);
197 if (d)
198 goto err;
199
200 return 0;
201
202err:
203 return 1;
204}
205
206static void debug_log_cleanup(struct bat_priv *bat_priv)
207{
208 kfree(bat_priv->debug_log);
209 bat_priv->debug_log = NULL;
210}
211#else /* CONFIG_BATMAN_ADV_DEBUG */
212static int debug_log_setup(struct bat_priv *bat_priv)
213{
214 bat_priv->debug_log = NULL;
215 return 0;
216}
217
218static void debug_log_cleanup(struct bat_priv *bat_priv)
219{
220 return;
221}
222#endif
223
224static int originators_open(struct inode *inode, struct file *file)
225{
226 struct net_device *net_dev = (struct net_device *)inode->i_private;
227 return single_open(file, orig_seq_print_text, net_dev);
228}
229
230static int gateways_open(struct inode *inode, struct file *file)
231{
232 struct net_device *net_dev = (struct net_device *)inode->i_private;
233 return single_open(file, gw_client_seq_print_text, net_dev);
234}
235
236static int softif_neigh_open(struct inode *inode, struct file *file)
237{
238 struct net_device *net_dev = (struct net_device *)inode->i_private;
239 return single_open(file, softif_neigh_seq_print_text, net_dev);
240}
241
242static int transtable_global_open(struct inode *inode, struct file *file)
243{
244 struct net_device *net_dev = (struct net_device *)inode->i_private;
245 return single_open(file, tt_global_seq_print_text, net_dev);
246}
247
248static int transtable_local_open(struct inode *inode, struct file *file)
249{
250 struct net_device *net_dev = (struct net_device *)inode->i_private;
251 return single_open(file, tt_local_seq_print_text, net_dev);
252}
253
254static int vis_data_open(struct inode *inode, struct file *file)
255{
256 struct net_device *net_dev = (struct net_device *)inode->i_private;
257 return single_open(file, vis_seq_print_text, net_dev);
258}
259
260struct bat_debuginfo {
261 struct attribute attr;
262 const struct file_operations fops;
263};
264
265#define BAT_DEBUGINFO(_name, _mode, _open) \
266struct bat_debuginfo bat_debuginfo_##_name = { \
267 .attr = { .name = __stringify(_name), \
268 .mode = _mode, }, \
269 .fops = { .owner = THIS_MODULE, \
270 .open = _open, \
271 .read = seq_read, \
272 .llseek = seq_lseek, \
273 .release = single_release, \
274 } \
275};
276
277static BAT_DEBUGINFO(originators, S_IRUGO, originators_open);
278static BAT_DEBUGINFO(gateways, S_IRUGO, gateways_open);
279static BAT_DEBUGINFO(softif_neigh, S_IRUGO, softif_neigh_open);
280static BAT_DEBUGINFO(transtable_global, S_IRUGO, transtable_global_open);
281static BAT_DEBUGINFO(transtable_local, S_IRUGO, transtable_local_open);
282static BAT_DEBUGINFO(vis_data, S_IRUGO, vis_data_open);
283
284static struct bat_debuginfo *mesh_debuginfos[] = {
285 &bat_debuginfo_originators,
286 &bat_debuginfo_gateways,
287 &bat_debuginfo_softif_neigh,
288 &bat_debuginfo_transtable_global,
289 &bat_debuginfo_transtable_local,
290 &bat_debuginfo_vis_data,
291 NULL,
292};
293
294void debugfs_init(void)
295{
296 bat_debugfs = debugfs_create_dir(DEBUGFS_BAT_SUBDIR, NULL);
297 if (bat_debugfs == ERR_PTR(-ENODEV))
298 bat_debugfs = NULL;
299}
300
301void debugfs_destroy(void)
302{
303 if (bat_debugfs) {
304 debugfs_remove_recursive(bat_debugfs);
305 bat_debugfs = NULL;
306 }
307}
308
309int debugfs_add_meshif(struct net_device *dev)
310{
311 struct bat_priv *bat_priv = netdev_priv(dev);
312 struct bat_debuginfo **bat_debug;
313 struct dentry *file;
314
315 if (!bat_debugfs)
316 goto out;
317
318 bat_priv->debug_dir = debugfs_create_dir(dev->name, bat_debugfs);
319 if (!bat_priv->debug_dir)
320 goto out;
321
322 bat_socket_setup(bat_priv);
323 debug_log_setup(bat_priv);
324
325 for (bat_debug = mesh_debuginfos; *bat_debug; ++bat_debug) {
326 file = debugfs_create_file(((*bat_debug)->attr).name,
327 S_IFREG | ((*bat_debug)->attr).mode,
328 bat_priv->debug_dir,
329 dev, &(*bat_debug)->fops);
330 if (!file) {
331 bat_err(dev, "Can't add debugfs file: %s/%s\n",
332 dev->name, ((*bat_debug)->attr).name);
333 goto rem_attr;
334 }
335 }
336
337 return 0;
338rem_attr:
339 debugfs_remove_recursive(bat_priv->debug_dir);
340 bat_priv->debug_dir = NULL;
341out:
342#ifdef CONFIG_DEBUG_FS
343 return -ENOMEM;
344#else
345 return 0;
346#endif /* CONFIG_DEBUG_FS */
347}
348
349void debugfs_del_meshif(struct net_device *dev)
350{
351 struct bat_priv *bat_priv = netdev_priv(dev);
352
353 debug_log_cleanup(bat_priv);
354
355 if (bat_debugfs) {
356 debugfs_remove_recursive(bat_priv->debug_dir);
357 bat_priv->debug_dir = NULL;
358 }
359}
diff --git a/net/batman-adv/bat_debugfs.h b/net/batman-adv/bat_debugfs.h
new file mode 100644
index 00000000000..bc9cda3f01e
--- /dev/null
+++ b/net/batman-adv/bat_debugfs.h
@@ -0,0 +1,33 @@
1/*
2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 *
4 * Marek Lindner
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2 of the GNU General Public
8 * License as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA
19 *
20 */
21
22
23#ifndef _NET_BATMAN_ADV_DEBUGFS_H_
24#define _NET_BATMAN_ADV_DEBUGFS_H_
25
26#define DEBUGFS_BAT_SUBDIR "batman_adv"
27
28void debugfs_init(void);
29void debugfs_destroy(void);
30int debugfs_add_meshif(struct net_device *dev);
31void debugfs_del_meshif(struct net_device *dev);
32
33#endif /* _NET_BATMAN_ADV_DEBUGFS_H_ */
diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c
new file mode 100644
index 00000000000..cd15deba60a
--- /dev/null
+++ b/net/batman-adv/bat_sysfs.c
@@ -0,0 +1,674 @@
1/*
2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 *
4 * Marek Lindner
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2 of the GNU General Public
8 * License as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA
19 *
20 */
21
22#include "main.h"
23#include "bat_sysfs.h"
24#include "translation-table.h"
25#include "originator.h"
26#include "hard-interface.h"
27#include "gateway_common.h"
28#include "gateway_client.h"
29#include "vis.h"
30
31static struct net_device *kobj_to_netdev(struct kobject *obj)
32{
33 struct device *dev = container_of(obj->parent, struct device, kobj);
34 return to_net_dev(dev);
35}
36
37static struct bat_priv *kobj_to_batpriv(struct kobject *obj)
38{
39 struct net_device *net_dev = kobj_to_netdev(obj);
40 return netdev_priv(net_dev);
41}
42
43#define UEV_TYPE_VAR "BATTYPE="
44#define UEV_ACTION_VAR "BATACTION="
45#define UEV_DATA_VAR "BATDATA="
46
47static char *uev_action_str[] = {
48 "add",
49 "del",
50 "change"
51};
52
53static char *uev_type_str[] = {
54 "gw"
55};
56
57/* Use this, if you have customized show and store functions */
58#define BAT_ATTR(_name, _mode, _show, _store) \
59struct bat_attribute bat_attr_##_name = { \
60 .attr = {.name = __stringify(_name), \
61 .mode = _mode }, \
62 .show = _show, \
63 .store = _store, \
64};
65
66#define BAT_ATTR_STORE_BOOL(_name, _post_func) \
67ssize_t store_##_name(struct kobject *kobj, struct attribute *attr, \
68 char *buff, size_t count) \
69{ \
70 struct net_device *net_dev = kobj_to_netdev(kobj); \
71 struct bat_priv *bat_priv = netdev_priv(net_dev); \
72 return __store_bool_attr(buff, count, _post_func, attr, \
73 &bat_priv->_name, net_dev); \
74}
75
76#define BAT_ATTR_SHOW_BOOL(_name) \
77ssize_t show_##_name(struct kobject *kobj, struct attribute *attr, \
78 char *buff) \
79{ \
80 struct bat_priv *bat_priv = kobj_to_batpriv(kobj); \
81 return sprintf(buff, "%s\n", \
82 atomic_read(&bat_priv->_name) == 0 ? \
83 "disabled" : "enabled"); \
84} \
85
86/* Use this, if you are going to turn a [name] in bat_priv on or off */
87#define BAT_ATTR_BOOL(_name, _mode, _post_func) \
88 static BAT_ATTR_STORE_BOOL(_name, _post_func) \
89 static BAT_ATTR_SHOW_BOOL(_name) \
90 static BAT_ATTR(_name, _mode, show_##_name, store_##_name)
91
92
93#define BAT_ATTR_STORE_UINT(_name, _min, _max, _post_func) \
94ssize_t store_##_name(struct kobject *kobj, struct attribute *attr, \
95 char *buff, size_t count) \
96{ \
97 struct net_device *net_dev = kobj_to_netdev(kobj); \
98 struct bat_priv *bat_priv = netdev_priv(net_dev); \
99 return __store_uint_attr(buff, count, _min, _max, _post_func, \
100 attr, &bat_priv->_name, net_dev); \
101}
102
103#define BAT_ATTR_SHOW_UINT(_name) \
104ssize_t show_##_name(struct kobject *kobj, struct attribute *attr, \
105 char *buff) \
106{ \
107 struct bat_priv *bat_priv = kobj_to_batpriv(kobj); \
108 return sprintf(buff, "%i\n", atomic_read(&bat_priv->_name)); \
109} \
110
111/* Use this, if you are going to set [name] in bat_priv to unsigned integer
112 * values only */
113#define BAT_ATTR_UINT(_name, _mode, _min, _max, _post_func) \
114 static BAT_ATTR_STORE_UINT(_name, _min, _max, _post_func) \
115 static BAT_ATTR_SHOW_UINT(_name) \
116 static BAT_ATTR(_name, _mode, show_##_name, store_##_name)
117
118
119static int store_bool_attr(char *buff, size_t count,
120 struct net_device *net_dev,
121 const char *attr_name, atomic_t *attr)
122{
123 int enabled = -1;
124
125 if (buff[count - 1] == '\n')
126 buff[count - 1] = '\0';
127
128 if ((strncmp(buff, "1", 2) == 0) ||
129 (strncmp(buff, "enable", 7) == 0) ||
130 (strncmp(buff, "enabled", 8) == 0))
131 enabled = 1;
132
133 if ((strncmp(buff, "0", 2) == 0) ||
134 (strncmp(buff, "disable", 8) == 0) ||
135 (strncmp(buff, "disabled", 9) == 0))
136 enabled = 0;
137
138 if (enabled < 0) {
139 bat_info(net_dev,
140 "%s: Invalid parameter received: %s\n",
141 attr_name, buff);
142 return -EINVAL;
143 }
144
145 if (atomic_read(attr) == enabled)
146 return count;
147
148 bat_info(net_dev, "%s: Changing from: %s to: %s\n", attr_name,
149 atomic_read(attr) == 1 ? "enabled" : "disabled",
150 enabled == 1 ? "enabled" : "disabled");
151
152 atomic_set(attr, (unsigned)enabled);
153 return count;
154}
155
156static inline ssize_t __store_bool_attr(char *buff, size_t count,
157 void (*post_func)(struct net_device *),
158 struct attribute *attr,
159 atomic_t *attr_store, struct net_device *net_dev)
160{
161 int ret;
162
163 ret = store_bool_attr(buff, count, net_dev, attr->name, attr_store);
164 if (post_func && ret)
165 post_func(net_dev);
166
167 return ret;
168}
169
170static int store_uint_attr(const char *buff, size_t count,
171 struct net_device *net_dev, const char *attr_name,
172 unsigned int min, unsigned int max, atomic_t *attr)
173{
174 unsigned long uint_val;
175 int ret;
176
177 ret = strict_strtoul(buff, 10, &uint_val);
178 if (ret) {
179 bat_info(net_dev,
180 "%s: Invalid parameter received: %s\n",
181 attr_name, buff);
182 return -EINVAL;
183 }
184
185 if (uint_val < min) {
186 bat_info(net_dev, "%s: Value is too small: %lu min: %u\n",
187 attr_name, uint_val, min);
188 return -EINVAL;
189 }
190
191 if (uint_val > max) {
192 bat_info(net_dev, "%s: Value is too big: %lu max: %u\n",
193 attr_name, uint_val, max);
194 return -EINVAL;
195 }
196
197 if (atomic_read(attr) == uint_val)
198 return count;
199
200 bat_info(net_dev, "%s: Changing from: %i to: %lu\n",
201 attr_name, atomic_read(attr), uint_val);
202
203 atomic_set(attr, uint_val);
204 return count;
205}
206
207static inline ssize_t __store_uint_attr(const char *buff, size_t count,
208 int min, int max,
209 void (*post_func)(struct net_device *),
210 const struct attribute *attr,
211 atomic_t *attr_store, struct net_device *net_dev)
212{
213 int ret;
214
215 ret = store_uint_attr(buff, count, net_dev, attr->name,
216 min, max, attr_store);
217 if (post_func && ret)
218 post_func(net_dev);
219
220 return ret;
221}
222
223static ssize_t show_vis_mode(struct kobject *kobj, struct attribute *attr,
224 char *buff)
225{
226 struct bat_priv *bat_priv = kobj_to_batpriv(kobj);
227 int vis_mode = atomic_read(&bat_priv->vis_mode);
228
229 return sprintf(buff, "%s\n",
230 vis_mode == VIS_TYPE_CLIENT_UPDATE ?
231 "client" : "server");
232}
233
234static ssize_t store_vis_mode(struct kobject *kobj, struct attribute *attr,
235 char *buff, size_t count)
236{
237 struct net_device *net_dev = kobj_to_netdev(kobj);
238 struct bat_priv *bat_priv = netdev_priv(net_dev);
239 unsigned long val;
240 int ret, vis_mode_tmp = -1;
241
242 ret = strict_strtoul(buff, 10, &val);
243
244 if (((count == 2) && (!ret) && (val == VIS_TYPE_CLIENT_UPDATE)) ||
245 (strncmp(buff, "client", 6) == 0) ||
246 (strncmp(buff, "off", 3) == 0))
247 vis_mode_tmp = VIS_TYPE_CLIENT_UPDATE;
248
249 if (((count == 2) && (!ret) && (val == VIS_TYPE_SERVER_SYNC)) ||
250 (strncmp(buff, "server", 6) == 0))
251 vis_mode_tmp = VIS_TYPE_SERVER_SYNC;
252
253 if (vis_mode_tmp < 0) {
254 if (buff[count - 1] == '\n')
255 buff[count - 1] = '\0';
256
257 bat_info(net_dev,
258 "Invalid parameter for 'vis mode' setting received: "
259 "%s\n", buff);
260 return -EINVAL;
261 }
262
263 if (atomic_read(&bat_priv->vis_mode) == vis_mode_tmp)
264 return count;
265
266 bat_info(net_dev, "Changing vis mode from: %s to: %s\n",
267 atomic_read(&bat_priv->vis_mode) == VIS_TYPE_CLIENT_UPDATE ?
268 "client" : "server", vis_mode_tmp == VIS_TYPE_CLIENT_UPDATE ?
269 "client" : "server");
270
271 atomic_set(&bat_priv->vis_mode, (unsigned)vis_mode_tmp);
272 return count;
273}
274
275static void post_gw_deselect(struct net_device *net_dev)
276{
277 struct bat_priv *bat_priv = netdev_priv(net_dev);
278 gw_deselect(bat_priv);
279}
280
281static ssize_t show_gw_mode(struct kobject *kobj, struct attribute *attr,
282 char *buff)
283{
284 struct bat_priv *bat_priv = kobj_to_batpriv(kobj);
285 int bytes_written;
286
287 switch (atomic_read(&bat_priv->gw_mode)) {
288 case GW_MODE_CLIENT:
289 bytes_written = sprintf(buff, "%s\n", GW_MODE_CLIENT_NAME);
290 break;
291 case GW_MODE_SERVER:
292 bytes_written = sprintf(buff, "%s\n", GW_MODE_SERVER_NAME);
293 break;
294 default:
295 bytes_written = sprintf(buff, "%s\n", GW_MODE_OFF_NAME);
296 break;
297 }
298
299 return bytes_written;
300}
301
302static ssize_t store_gw_mode(struct kobject *kobj, struct attribute *attr,
303 char *buff, size_t count)
304{
305 struct net_device *net_dev = kobj_to_netdev(kobj);
306 struct bat_priv *bat_priv = netdev_priv(net_dev);
307 char *curr_gw_mode_str;
308 int gw_mode_tmp = -1;
309
310 if (buff[count - 1] == '\n')
311 buff[count - 1] = '\0';
312
313 if (strncmp(buff, GW_MODE_OFF_NAME, strlen(GW_MODE_OFF_NAME)) == 0)
314 gw_mode_tmp = GW_MODE_OFF;
315
316 if (strncmp(buff, GW_MODE_CLIENT_NAME,
317 strlen(GW_MODE_CLIENT_NAME)) == 0)
318 gw_mode_tmp = GW_MODE_CLIENT;
319
320 if (strncmp(buff, GW_MODE_SERVER_NAME,
321 strlen(GW_MODE_SERVER_NAME)) == 0)
322 gw_mode_tmp = GW_MODE_SERVER;
323
324 if (gw_mode_tmp < 0) {
325 bat_info(net_dev,
326 "Invalid parameter for 'gw mode' setting received: "
327 "%s\n", buff);
328 return -EINVAL;
329 }
330
331 if (atomic_read(&bat_priv->gw_mode) == gw_mode_tmp)
332 return count;
333
334 switch (atomic_read(&bat_priv->gw_mode)) {
335 case GW_MODE_CLIENT:
336 curr_gw_mode_str = GW_MODE_CLIENT_NAME;
337 break;
338 case GW_MODE_SERVER:
339 curr_gw_mode_str = GW_MODE_SERVER_NAME;
340 break;
341 default:
342 curr_gw_mode_str = GW_MODE_OFF_NAME;
343 break;
344 }
345
346 bat_info(net_dev, "Changing gw mode from: %s to: %s\n",
347 curr_gw_mode_str, buff);
348
349 gw_deselect(bat_priv);
350 atomic_set(&bat_priv->gw_mode, (unsigned)gw_mode_tmp);
351 return count;
352}
353
354static ssize_t show_gw_bwidth(struct kobject *kobj, struct attribute *attr,
355 char *buff)
356{
357 struct bat_priv *bat_priv = kobj_to_batpriv(kobj);
358 int down, up;
359 int gw_bandwidth = atomic_read(&bat_priv->gw_bandwidth);
360
361 gw_bandwidth_to_kbit(gw_bandwidth, &down, &up);
362 return sprintf(buff, "%i%s/%i%s\n",
363 (down > 2048 ? down / 1024 : down),
364 (down > 2048 ? "MBit" : "KBit"),
365 (up > 2048 ? up / 1024 : up),
366 (up > 2048 ? "MBit" : "KBit"));
367}
368
369static ssize_t store_gw_bwidth(struct kobject *kobj, struct attribute *attr,
370 char *buff, size_t count)
371{
372 struct net_device *net_dev = kobj_to_netdev(kobj);
373
374 if (buff[count - 1] == '\n')
375 buff[count - 1] = '\0';
376
377 return gw_bandwidth_set(net_dev, buff, count);
378}
379
380BAT_ATTR_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL);
381BAT_ATTR_BOOL(bonding, S_IRUGO | S_IWUSR, NULL);
382BAT_ATTR_BOOL(fragmentation, S_IRUGO | S_IWUSR, update_min_mtu);
383static BAT_ATTR(vis_mode, S_IRUGO | S_IWUSR, show_vis_mode, store_vis_mode);
384static BAT_ATTR(gw_mode, S_IRUGO | S_IWUSR, show_gw_mode, store_gw_mode);
385BAT_ATTR_UINT(orig_interval, S_IRUGO | S_IWUSR, 2 * JITTER, INT_MAX, NULL);
386BAT_ATTR_UINT(hop_penalty, S_IRUGO | S_IWUSR, 0, TQ_MAX_VALUE, NULL);
387BAT_ATTR_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, TQ_MAX_VALUE,
388 post_gw_deselect);
389static BAT_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, show_gw_bwidth,
390 store_gw_bwidth);
391#ifdef CONFIG_BATMAN_ADV_DEBUG
392BAT_ATTR_UINT(log_level, S_IRUGO | S_IWUSR, 0, 7, NULL);
393#endif
394
395static struct bat_attribute *mesh_attrs[] = {
396 &bat_attr_aggregated_ogms,
397 &bat_attr_bonding,
398 &bat_attr_fragmentation,
399 &bat_attr_vis_mode,
400 &bat_attr_gw_mode,
401 &bat_attr_orig_interval,
402 &bat_attr_hop_penalty,
403 &bat_attr_gw_sel_class,
404 &bat_attr_gw_bandwidth,
405#ifdef CONFIG_BATMAN_ADV_DEBUG
406 &bat_attr_log_level,
407#endif
408 NULL,
409};
410
411int sysfs_add_meshif(struct net_device *dev)
412{
413 struct kobject *batif_kobject = &dev->dev.kobj;
414 struct bat_priv *bat_priv = netdev_priv(dev);
415 struct bat_attribute **bat_attr;
416 int err;
417
418 bat_priv->mesh_obj = kobject_create_and_add(SYSFS_IF_MESH_SUBDIR,
419 batif_kobject);
420 if (!bat_priv->mesh_obj) {
421 bat_err(dev, "Can't add sysfs directory: %s/%s\n", dev->name,
422 SYSFS_IF_MESH_SUBDIR);
423 goto out;
424 }
425
426 for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr) {
427 err = sysfs_create_file(bat_priv->mesh_obj,
428 &((*bat_attr)->attr));
429 if (err) {
430 bat_err(dev, "Can't add sysfs file: %s/%s/%s\n",
431 dev->name, SYSFS_IF_MESH_SUBDIR,
432 ((*bat_attr)->attr).name);
433 goto rem_attr;
434 }
435 }
436
437 return 0;
438
439rem_attr:
440 for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr)
441 sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
442
443 kobject_put(bat_priv->mesh_obj);
444 bat_priv->mesh_obj = NULL;
445out:
446 return -ENOMEM;
447}
448
449void sysfs_del_meshif(struct net_device *dev)
450{
451 struct bat_priv *bat_priv = netdev_priv(dev);
452 struct bat_attribute **bat_attr;
453
454 for (bat_attr = mesh_attrs; *bat_attr; ++bat_attr)
455 sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
456
457 kobject_put(bat_priv->mesh_obj);
458 bat_priv->mesh_obj = NULL;
459}
460
461static ssize_t show_mesh_iface(struct kobject *kobj, struct attribute *attr,
462 char *buff)
463{
464 struct net_device *net_dev = kobj_to_netdev(kobj);
465 struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
466 ssize_t length;
467
468 if (!hard_iface)
469 return 0;
470
471 length = sprintf(buff, "%s\n", hard_iface->if_status == IF_NOT_IN_USE ?
472 "none" : hard_iface->soft_iface->name);
473
474 hardif_free_ref(hard_iface);
475
476 return length;
477}
478
479static ssize_t store_mesh_iface(struct kobject *kobj, struct attribute *attr,
480 char *buff, size_t count)
481{
482 struct net_device *net_dev = kobj_to_netdev(kobj);
483 struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
484 int status_tmp = -1;
485 int ret = count;
486
487 if (!hard_iface)
488 return count;
489
490 if (buff[count - 1] == '\n')
491 buff[count - 1] = '\0';
492
493 if (strlen(buff) >= IFNAMSIZ) {
494 pr_err("Invalid parameter for 'mesh_iface' setting received: "
495 "interface name too long '%s'\n", buff);
496 hardif_free_ref(hard_iface);
497 return -EINVAL;
498 }
499
500 if (strncmp(buff, "none", 4) == 0)
501 status_tmp = IF_NOT_IN_USE;
502 else
503 status_tmp = IF_I_WANT_YOU;
504
505 if (hard_iface->if_status == status_tmp)
506 goto out;
507
508 if ((hard_iface->soft_iface) &&
509 (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0))
510 goto out;
511
512 if (!rtnl_trylock()) {
513 ret = -ERESTARTSYS;
514 goto out;
515 }
516
517 if (status_tmp == IF_NOT_IN_USE) {
518 hardif_disable_interface(hard_iface);
519 goto unlock;
520 }
521
522 /* if the interface already is in use */
523 if (hard_iface->if_status != IF_NOT_IN_USE)
524 hardif_disable_interface(hard_iface);
525
526 ret = hardif_enable_interface(hard_iface, buff);
527
528unlock:
529 rtnl_unlock();
530out:
531 hardif_free_ref(hard_iface);
532 return ret;
533}
534
535static ssize_t show_iface_status(struct kobject *kobj, struct attribute *attr,
536 char *buff)
537{
538 struct net_device *net_dev = kobj_to_netdev(kobj);
539 struct hard_iface *hard_iface = hardif_get_by_netdev(net_dev);
540 ssize_t length;
541
542 if (!hard_iface)
543 return 0;
544
545 switch (hard_iface->if_status) {
546 case IF_TO_BE_REMOVED:
547 length = sprintf(buff, "disabling\n");
548 break;
549 case IF_INACTIVE:
550 length = sprintf(buff, "inactive\n");
551 break;
552 case IF_ACTIVE:
553 length = sprintf(buff, "active\n");
554 break;
555 case IF_TO_BE_ACTIVATED:
556 length = sprintf(buff, "enabling\n");
557 break;
558 case IF_NOT_IN_USE:
559 default:
560 length = sprintf(buff, "not in use\n");
561 break;
562 }
563
564 hardif_free_ref(hard_iface);
565
566 return length;
567}
568
569static BAT_ATTR(mesh_iface, S_IRUGO | S_IWUSR,
570 show_mesh_iface, store_mesh_iface);
571static BAT_ATTR(iface_status, S_IRUGO, show_iface_status, NULL);
572
573static struct bat_attribute *batman_attrs[] = {
574 &bat_attr_mesh_iface,
575 &bat_attr_iface_status,
576 NULL,
577};
578
579int sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev)
580{
581 struct kobject *hardif_kobject = &dev->dev.kobj;
582 struct bat_attribute **bat_attr;
583 int err;
584
585 *hardif_obj = kobject_create_and_add(SYSFS_IF_BAT_SUBDIR,
586 hardif_kobject);
587
588 if (!*hardif_obj) {
589 bat_err(dev, "Can't add sysfs directory: %s/%s\n", dev->name,
590 SYSFS_IF_BAT_SUBDIR);
591 goto out;
592 }
593
594 for (bat_attr = batman_attrs; *bat_attr; ++bat_attr) {
595 err = sysfs_create_file(*hardif_obj, &((*bat_attr)->attr));
596 if (err) {
597 bat_err(dev, "Can't add sysfs file: %s/%s/%s\n",
598 dev->name, SYSFS_IF_BAT_SUBDIR,
599 ((*bat_attr)->attr).name);
600 goto rem_attr;
601 }
602 }
603
604 return 0;
605
606rem_attr:
607 for (bat_attr = batman_attrs; *bat_attr; ++bat_attr)
608 sysfs_remove_file(*hardif_obj, &((*bat_attr)->attr));
609out:
610 return -ENOMEM;
611}
612
613void sysfs_del_hardif(struct kobject **hardif_obj)
614{
615 kobject_put(*hardif_obj);
616 *hardif_obj = NULL;
617}
618
619int throw_uevent(struct bat_priv *bat_priv, enum uev_type type,
620 enum uev_action action, const char *data)
621{
622 int ret = -1;
623 struct hard_iface *primary_if = NULL;
624 struct kobject *bat_kobj;
625 char *uevent_env[4] = { NULL, NULL, NULL, NULL };
626
627 primary_if = primary_if_get_selected(bat_priv);
628 if (!primary_if)
629 goto out;
630
631 bat_kobj = &primary_if->soft_iface->dev.kobj;
632
633 uevent_env[0] = kmalloc(strlen(UEV_TYPE_VAR) +
634 strlen(uev_type_str[type]) + 1,
635 GFP_ATOMIC);
636 if (!uevent_env[0])
637 goto out;
638
639 sprintf(uevent_env[0], "%s%s", UEV_TYPE_VAR, uev_type_str[type]);
640
641 uevent_env[1] = kmalloc(strlen(UEV_ACTION_VAR) +
642 strlen(uev_action_str[action]) + 1,
643 GFP_ATOMIC);
644 if (!uevent_env[1])
645 goto out;
646
647 sprintf(uevent_env[1], "%s%s", UEV_ACTION_VAR, uev_action_str[action]);
648
649 /* If the event is DEL, ignore the data field */
650 if (action != UEV_DEL) {
651 uevent_env[2] = kmalloc(strlen(UEV_DATA_VAR) +
652 strlen(data) + 1, GFP_ATOMIC);
653 if (!uevent_env[2])
654 goto out;
655
656 sprintf(uevent_env[2], "%s%s", UEV_DATA_VAR, data);
657 }
658
659 ret = kobject_uevent_env(bat_kobj, KOBJ_CHANGE, uevent_env);
660out:
661 kfree(uevent_env[0]);
662 kfree(uevent_env[1]);
663 kfree(uevent_env[2]);
664
665 if (primary_if)
666 hardif_free_ref(primary_if);
667
668 if (ret)
669 bat_dbg(DBG_BATMAN, bat_priv, "Impossible to send "
670 "uevent for (%s,%s,%s) event (err: %d)\n",
671 uev_type_str[type], uev_action_str[action],
672 (action == UEV_DEL ? "NULL" : data), ret);
673 return ret;
674}
diff --git a/net/batman-adv/bat_sysfs.h b/net/batman-adv/bat_sysfs.h
new file mode 100644
index 00000000000..a3f75a723c5
--- /dev/null
+++ b/net/batman-adv/bat_sysfs.h
@@ -0,0 +1,44 @@
1/*
2 * Copyright (C) 2010-2011 B.A.T.M.A.N. contributors:
3 *
4 * Marek Lindner
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of version 2 of the GNU General Public
8 * License as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA
19 *
20 */
21
22
23#ifndef _NET_BATMAN_ADV_SYSFS_H_
24#define _NET_BATMAN_ADV_SYSFS_H_
25
26#define SYSFS_IF_MESH_SUBDIR "mesh"
27#define SYSFS_IF_BAT_SUBDIR "batman_adv"
28
29struct bat_attribute {
30 struct attribute attr;
31 ssize_t (*show)(struct kobject *kobj, struct attribute *attr,
32 char *buf);
33 ssize_t (*store)(struct kobject *kobj, struct attribute *attr,
34 char *buf, size_t count);
35};
36
37int sysfs_add_meshif(struct net_device *dev);
38void sysfs_del_meshif(struct net_device *dev);
39int sysfs_add_hardif(struct kobject **hardif_obj, struct net_device *dev);
40void sysfs_del_hardif(struct kobject **hardif_obj);
41int throw_uevent(struct bat_priv *bat_priv, enum uev_type type,
42 enum uev_action action, const char *data);
43
44#endif /* _NET_BATMAN_ADV_SYSFS_H_ */
diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h
new file mode 100644
index 00000000000..283c2b993fb
--- /dev/null
+++ b/net/core/kmap_skb.h
@@ -0,0 +1,19 @@
1#include <linux/highmem.h>
2
3static inline void *kmap_skb_frag(const skb_frag_t *frag)
4{
5#ifdef CONFIG_HIGHMEM
6 BUG_ON(in_irq());
7
8 local_bh_disable();
9#endif
10 return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
11}
12
13static inline void kunmap_skb_frag(void *vaddr)
14{
15 kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
16#ifdef CONFIG_HIGHMEM
17 local_bh_enable();
18#endif
19}
diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c
new file mode 100644
index 00000000000..8f4ff5a2c81
--- /dev/null
+++ b/net/dsa/mv88e6060.c
@@ -0,0 +1,288 @@
1/*
2 * net/dsa/mv88e6060.c - Driver for Marvell 88e6060 switch chips
3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 */
10
11#include <linux/list.h>
12#include <linux/netdevice.h>
13#include <linux/phy.h>
14#include "dsa_priv.h"
15
16#define REG_PORT(p) (8 + (p))
17#define REG_GLOBAL 0x0f
18
19static int reg_read(struct dsa_switch *ds, int addr, int reg)
20{
21 return mdiobus_read(ds->master_mii_bus, ds->pd->sw_addr + addr, reg);
22}
23
24#define REG_READ(addr, reg) \
25 ({ \
26 int __ret; \
27 \
28 __ret = reg_read(ds, addr, reg); \
29 if (__ret < 0) \
30 return __ret; \
31 __ret; \
32 })
33
34
35static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
36{
37 return mdiobus_write(ds->master_mii_bus, ds->pd->sw_addr + addr,
38 reg, val);
39}
40
41#define REG_WRITE(addr, reg, val) \
42 ({ \
43 int __ret; \
44 \
45 __ret = reg_write(ds, addr, reg, val); \
46 if (__ret < 0) \
47 return __ret; \
48 })
49
50static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr)
51{
52 int ret;
53
54 ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03);
55 if (ret >= 0) {
56 ret &= 0xfff0;
57 if (ret == 0x0600)
58 return "Marvell 88E6060";
59 }
60
61 return NULL;
62}
63
64static int mv88e6060_switch_reset(struct dsa_switch *ds)
65{
66 int i;
67 int ret;
68
69 /*
70 * Set all ports to the disabled state.
71 */
72 for (i = 0; i < 6; i++) {
73 ret = REG_READ(REG_PORT(i), 0x04);
74 REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
75 }
76
77 /*
78 * Wait for transmit queues to drain.
79 */
80 msleep(2);
81
82 /*
83 * Reset the switch.
84 */
85 REG_WRITE(REG_GLOBAL, 0x0a, 0xa130);
86
87 /*
88 * Wait up to one second for reset to complete.
89 */
90 for (i = 0; i < 1000; i++) {
91 ret = REG_READ(REG_GLOBAL, 0x00);
92 if ((ret & 0x8000) == 0x0000)
93 break;
94
95 msleep(1);
96 }
97 if (i == 1000)
98 return -ETIMEDOUT;
99
100 return 0;
101}
102
103static int mv88e6060_setup_global(struct dsa_switch *ds)
104{
105 /*
106 * Disable discarding of frames with excessive collisions,
107 * set the maximum frame size to 1536 bytes, and mask all
108 * interrupt sources.
109 */
110 REG_WRITE(REG_GLOBAL, 0x04, 0x0800);
111
112 /*
113 * Enable automatic address learning, set the address
114 * database size to 1024 entries, and set the default aging
115 * time to 5 minutes.
116 */
117 REG_WRITE(REG_GLOBAL, 0x0a, 0x2130);
118
119 return 0;
120}
121
122static int mv88e6060_setup_port(struct dsa_switch *ds, int p)
123{
124 int addr = REG_PORT(p);
125
126 /*
127 * Do not force flow control, disable Ingress and Egress
128 * Header tagging, disable VLAN tunneling, and set the port
129 * state to Forwarding. Additionally, if this is the CPU
130 * port, enable Ingress and Egress Trailer tagging mode.
131 */
132 REG_WRITE(addr, 0x04, dsa_is_cpu_port(ds, p) ? 0x4103 : 0x0003);
133
134 /*
135 * Port based VLAN map: give each port its own address
136 * database, allow the CPU port to talk to each of the 'real'
137 * ports, and allow each of the 'real' ports to only talk to
138 * the CPU port.
139 */
140 REG_WRITE(addr, 0x06,
141 ((p & 0xf) << 12) |
142 (dsa_is_cpu_port(ds, p) ?
143 ds->phys_port_mask :
144 (1 << ds->dst->cpu_port)));
145
146 /*
147 * Port Association Vector: when learning source addresses
148 * of packets, add the address to the address database using
149 * a port bitmap that has only the bit for this port set and
150 * the other bits clear.
151 */
152 REG_WRITE(addr, 0x0b, 1 << p);
153
154 return 0;
155}
156
157static int mv88e6060_setup(struct dsa_switch *ds)
158{
159 int i;
160 int ret;
161
162 ret = mv88e6060_switch_reset(ds);
163 if (ret < 0)
164 return ret;
165
166 /* @@@ initialise atu */
167
168 ret = mv88e6060_setup_global(ds);
169 if (ret < 0)
170 return ret;
171
172 for (i = 0; i < 6; i++) {
173 ret = mv88e6060_setup_port(ds, i);
174 if (ret < 0)
175 return ret;
176 }
177
178 return 0;
179}
180
181static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr)
182{
183 REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]);
184 REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]);
185 REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]);
186
187 return 0;
188}
189
190static int mv88e6060_port_to_phy_addr(int port)
191{
192 if (port >= 0 && port <= 5)
193 return port;
194 return -1;
195}
196
197static int mv88e6060_phy_read(struct dsa_switch *ds, int port, int regnum)
198{
199 int addr;
200
201 addr = mv88e6060_port_to_phy_addr(port);
202 if (addr == -1)
203 return 0xffff;
204
205 return reg_read(ds, addr, regnum);
206}
207
208static int
209mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
210{
211 int addr;
212
213 addr = mv88e6060_port_to_phy_addr(port);
214 if (addr == -1)
215 return 0xffff;
216
217 return reg_write(ds, addr, regnum, val);
218}
219
220static void mv88e6060_poll_link(struct dsa_switch *ds)
221{
222 int i;
223
224 for (i = 0; i < DSA_MAX_PORTS; i++) {
225 struct net_device *dev;
226 int uninitialized_var(port_status);
227 int link;
228 int speed;
229 int duplex;
230 int fc;
231
232 dev = ds->ports[i];
233 if (dev == NULL)
234 continue;
235
236 link = 0;
237 if (dev->flags & IFF_UP) {
238 port_status = reg_read(ds, REG_PORT(i), 0x00);
239 if (port_status < 0)
240 continue;
241
242 link = !!(port_status & 0x1000);
243 }
244
245 if (!link) {
246 if (netif_carrier_ok(dev)) {
247 printk(KERN_INFO "%s: link down\n", dev->name);
248 netif_carrier_off(dev);
249 }
250 continue;
251 }
252
253 speed = (port_status & 0x0100) ? 100 : 10;
254 duplex = (port_status & 0x0200) ? 1 : 0;
255 fc = ((port_status & 0xc000) == 0xc000) ? 1 : 0;
256
257 if (!netif_carrier_ok(dev)) {
258 printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, "
259 "flow control %sabled\n", dev->name,
260 speed, duplex ? "full" : "half",
261 fc ? "en" : "dis");
262 netif_carrier_on(dev);
263 }
264 }
265}
266
267static struct dsa_switch_driver mv88e6060_switch_driver = {
268 .tag_protocol = htons(ETH_P_TRAILER),
269 .probe = mv88e6060_probe,
270 .setup = mv88e6060_setup,
271 .set_addr = mv88e6060_set_addr,
272 .phy_read = mv88e6060_phy_read,
273 .phy_write = mv88e6060_phy_write,
274 .poll_link = mv88e6060_poll_link,
275};
276
277static int __init mv88e6060_init(void)
278{
279 register_switch_driver(&mv88e6060_switch_driver);
280 return 0;
281}
282module_init(mv88e6060_init);
283
284static void __exit mv88e6060_cleanup(void)
285{
286 unregister_switch_driver(&mv88e6060_switch_driver);
287}
288module_exit(mv88e6060_cleanup);
diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c
new file mode 100644
index 00000000000..52faaa21a4d
--- /dev/null
+++ b/net/dsa/mv88e6123_61_65.c
@@ -0,0 +1,447 @@
1/*
2 * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support
3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 */
10
11#include <linux/list.h>
12#include <linux/netdevice.h>
13#include <linux/phy.h>
14#include "dsa_priv.h"
15#include "mv88e6xxx.h"
16
17static char *mv88e6123_61_65_probe(struct mii_bus *bus, int sw_addr)
18{
19 int ret;
20
21 ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
22 if (ret >= 0) {
23 ret &= 0xfff0;
24 if (ret == 0x1210)
25 return "Marvell 88E6123";
26 if (ret == 0x1610)
27 return "Marvell 88E6161";
28 if (ret == 0x1650)
29 return "Marvell 88E6165";
30 }
31
32 return NULL;
33}
34
35static int mv88e6123_61_65_switch_reset(struct dsa_switch *ds)
36{
37 int i;
38 int ret;
39
40 /*
41 * Set all ports to the disabled state.
42 */
43 for (i = 0; i < 8; i++) {
44 ret = REG_READ(REG_PORT(i), 0x04);
45 REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
46 }
47
48 /*
49 * Wait for transmit queues to drain.
50 */
51 msleep(2);
52
53 /*
54 * Reset the switch.
55 */
56 REG_WRITE(REG_GLOBAL, 0x04, 0xc400);
57
58 /*
59 * Wait up to one second for reset to complete.
60 */
61 for (i = 0; i < 1000; i++) {
62 ret = REG_READ(REG_GLOBAL, 0x00);
63 if ((ret & 0xc800) == 0xc800)
64 break;
65
66 msleep(1);
67 }
68 if (i == 1000)
69 return -ETIMEDOUT;
70
71 return 0;
72}
73
74static int mv88e6123_61_65_setup_global(struct dsa_switch *ds)
75{
76 int ret;
77 int i;
78
79 /*
80 * Disable the PHY polling unit (since there won't be any
81 * external PHYs to poll), don't discard packets with
82 * excessive collisions, and mask all interrupt sources.
83 */
84 REG_WRITE(REG_GLOBAL, 0x04, 0x0000);
85
86 /*
87 * Set the default address aging time to 5 minutes, and
88 * enable address learn messages to be sent to all message
89 * ports.
90 */
91 REG_WRITE(REG_GLOBAL, 0x0a, 0x0148);
92
93 /*
94 * Configure the priority mapping registers.
95 */
96 ret = mv88e6xxx_config_prio(ds);
97 if (ret < 0)
98 return ret;
99
100 /*
101 * Configure the upstream port, and configure the upstream
102 * port as the port to which ingress and egress monitor frames
103 * are to be sent.
104 */
105 REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110));
106
107 /*
108 * Disable remote management for now, and set the switch's
109 * DSA device number.
110 */
111 REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f);
112
113 /*
114 * Send all frames with destination addresses matching
115 * 01:80:c2:00:00:2x to the CPU port.
116 */
117 REG_WRITE(REG_GLOBAL2, 0x02, 0xffff);
118
119 /*
120 * Send all frames with destination addresses matching
121 * 01:80:c2:00:00:0x to the CPU port.
122 */
123 REG_WRITE(REG_GLOBAL2, 0x03, 0xffff);
124
125 /*
126 * Disable the loopback filter, disable flow control
127 * messages, disable flood broadcast override, disable
128 * removing of provider tags, disable ATU age violation
129 * interrupts, disable tag flow control, force flow
130 * control priority to the highest, and send all special
131 * multicast frames to the CPU at the highest priority.
132 */
133 REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff);
134
135 /*
136 * Program the DSA routing table.
137 */
138 for (i = 0; i < 32; i++) {
139 int nexthop;
140
141 nexthop = 0x1f;
142 if (i != ds->index && i < ds->dst->pd->nr_chips)
143 nexthop = ds->pd->rtable[i] & 0x1f;
144
145 REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop);
146 }
147
148 /*
149 * Clear all trunk masks.
150 */
151 for (i = 0; i < 8; i++)
152 REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff);
153
154 /*
155 * Clear all trunk mappings.
156 */
157 for (i = 0; i < 16; i++)
158 REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11));
159
160 /*
161 * Disable ingress rate limiting by resetting all ingress
162 * rate limit registers to their initial state.
163 */
164 for (i = 0; i < 6; i++)
165 REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8));
166
167 /*
168 * Initialise cross-chip port VLAN table to reset defaults.
169 */
170 REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000);
171
172 /*
173 * Clear the priority override table.
174 */
175 for (i = 0; i < 16; i++)
176 REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8));
177
178 /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */
179
180 return 0;
181}
182
183static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p)
184{
185 int addr = REG_PORT(p);
186 u16 val;
187
188 /*
189 * MAC Forcing register: don't force link, speed, duplex
190 * or flow control state to any particular values on physical
191 * ports, but force the CPU port and all DSA ports to 1000 Mb/s
192 * full duplex.
193 */
194 if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p))
195 REG_WRITE(addr, 0x01, 0x003e);
196 else
197 REG_WRITE(addr, 0x01, 0x0003);
198
199 /*
200 * Do not limit the period of time that this port can be
201 * paused for by the remote end or the period of time that
202 * this port can pause the remote end.
203 */
204 REG_WRITE(addr, 0x02, 0x0000);
205
206 /*
207 * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock,
208 * disable Header mode, enable IGMP/MLD snooping, disable VLAN
209 * tunneling, determine priority by looking at 802.1p and IP
210 * priority fields (IP prio has precedence), and set STP state
211 * to Forwarding.
212 *
213 * If this is the CPU link, use DSA or EDSA tagging depending
214 * on which tagging mode was configured.
215 *
216 * If this is a link to another switch, use DSA tagging mode.
217 *
218 * If this is the upstream port for this switch, enable
219 * forwarding of unknown unicasts and multicasts.
220 */
221 val = 0x0433;
222 if (dsa_is_cpu_port(ds, p)) {
223 if (ds->dst->tag_protocol == htons(ETH_P_EDSA))
224 val |= 0x3300;
225 else
226 val |= 0x0100;
227 }
228 if (ds->dsa_port_mask & (1 << p))
229 val |= 0x0100;
230 if (p == dsa_upstream_port(ds))
231 val |= 0x000c;
232 REG_WRITE(addr, 0x04, val);
233
234 /*
235 * Port Control 1: disable trunking. Also, if this is the
236 * CPU port, enable learn messages to be sent to this port.
237 */
238 REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
239
240 /*
241 * Port based VLAN map: give each port its own address
242 * database, allow the CPU port to talk to each of the 'real'
243 * ports, and allow each of the 'real' ports to only talk to
244 * the upstream port.
245 */
246 val = (p & 0xf) << 12;
247 if (dsa_is_cpu_port(ds, p))
248 val |= ds->phys_port_mask;
249 else
250 val |= 1 << dsa_upstream_port(ds);
251 REG_WRITE(addr, 0x06, val);
252
253 /*
254 * Default VLAN ID and priority: don't set a default VLAN
255 * ID, and set the default packet priority to zero.
256 */
257 REG_WRITE(addr, 0x07, 0x0000);
258
259 /*
260 * Port Control 2: don't force a good FCS, set the maximum
261 * frame size to 10240 bytes, don't let the switch add or
262 * strip 802.1q tags, don't discard tagged or untagged frames
263 * on this port, do a destination address lookup on all
264 * received packets as usual, disable ARP mirroring and don't
265 * send a copy of all transmitted/received frames on this port
266 * to the CPU.
267 */
268 REG_WRITE(addr, 0x08, 0x2080);
269
270 /*
271 * Egress rate control: disable egress rate control.
272 */
273 REG_WRITE(addr, 0x09, 0x0001);
274
275 /*
276 * Egress rate control 2: disable egress rate control.
277 */
278 REG_WRITE(addr, 0x0a, 0x0000);
279
280 /*
281 * Port Association Vector: when learning source addresses
282 * of packets, add the address to the address database using
283 * a port bitmap that has only the bit for this port set and
284 * the other bits clear.
285 */
286 REG_WRITE(addr, 0x0b, 1 << p);
287
288 /*
289 * Port ATU control: disable limiting the number of address
290 * database entries that this port is allowed to use.
291 */
292 REG_WRITE(addr, 0x0c, 0x0000);
293
294 /*
295 * Priorit Override: disable DA, SA and VTU priority override.
296 */
297 REG_WRITE(addr, 0x0d, 0x0000);
298
299 /*
300 * Port Ethertype: use the Ethertype DSA Ethertype value.
301 */
302 REG_WRITE(addr, 0x0f, ETH_P_EDSA);
303
304 /*
305 * Tag Remap: use an identity 802.1p prio -> switch prio
306 * mapping.
307 */
308 REG_WRITE(addr, 0x18, 0x3210);
309
310 /*
311 * Tag Remap 2: use an identity 802.1p prio -> switch prio
312 * mapping.
313 */
314 REG_WRITE(addr, 0x19, 0x7654);
315
316 return 0;
317}
318
319static int mv88e6123_61_65_setup(struct dsa_switch *ds)
320{
321 struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
322 int i;
323 int ret;
324
325 mutex_init(&ps->smi_mutex);
326 mutex_init(&ps->stats_mutex);
327
328 ret = mv88e6123_61_65_switch_reset(ds);
329 if (ret < 0)
330 return ret;
331
332 /* @@@ initialise vtu and atu */
333
334 ret = mv88e6123_61_65_setup_global(ds);
335 if (ret < 0)
336 return ret;
337
338 for (i = 0; i < 6; i++) {
339 ret = mv88e6123_61_65_setup_port(ds, i);
340 if (ret < 0)
341 return ret;
342 }
343
344 return 0;
345}
346
347static int mv88e6123_61_65_port_to_phy_addr(int port)
348{
349 if (port >= 0 && port <= 4)
350 return port;
351 return -1;
352}
353
354static int
355mv88e6123_61_65_phy_read(struct dsa_switch *ds, int port, int regnum)
356{
357 int addr = mv88e6123_61_65_port_to_phy_addr(port);
358 return mv88e6xxx_phy_read(ds, addr, regnum);
359}
360
361static int
362mv88e6123_61_65_phy_write(struct dsa_switch *ds,
363 int port, int regnum, u16 val)
364{
365 int addr = mv88e6123_61_65_port_to_phy_addr(port);
366 return mv88e6xxx_phy_write(ds, addr, regnum, val);
367}
368
369static struct mv88e6xxx_hw_stat mv88e6123_61_65_hw_stats[] = {
370 { "in_good_octets", 8, 0x00, },
371 { "in_bad_octets", 4, 0x02, },
372 { "in_unicast", 4, 0x04, },
373 { "in_broadcasts", 4, 0x06, },
374 { "in_multicasts", 4, 0x07, },
375 { "in_pause", 4, 0x16, },
376 { "in_undersize", 4, 0x18, },
377 { "in_fragments", 4, 0x19, },
378 { "in_oversize", 4, 0x1a, },
379 { "in_jabber", 4, 0x1b, },
380 { "in_rx_error", 4, 0x1c, },
381 { "in_fcs_error", 4, 0x1d, },
382 { "out_octets", 8, 0x0e, },
383 { "out_unicast", 4, 0x10, },
384 { "out_broadcasts", 4, 0x13, },
385 { "out_multicasts", 4, 0x12, },
386 { "out_pause", 4, 0x15, },
387 { "excessive", 4, 0x11, },
388 { "collisions", 4, 0x1e, },
389 { "deferred", 4, 0x05, },
390 { "single", 4, 0x14, },
391 { "multiple", 4, 0x17, },
392 { "out_fcs_error", 4, 0x03, },
393 { "late", 4, 0x1f, },
394 { "hist_64bytes", 4, 0x08, },
395 { "hist_65_127bytes", 4, 0x09, },
396 { "hist_128_255bytes", 4, 0x0a, },
397 { "hist_256_511bytes", 4, 0x0b, },
398 { "hist_512_1023bytes", 4, 0x0c, },
399 { "hist_1024_max_bytes", 4, 0x0d, },
400};
401
402static void
403mv88e6123_61_65_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
404{
405 mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats),
406 mv88e6123_61_65_hw_stats, port, data);
407}
408
409static void
410mv88e6123_61_65_get_ethtool_stats(struct dsa_switch *ds,
411 int port, uint64_t *data)
412{
413 mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats),
414 mv88e6123_61_65_hw_stats, port, data);
415}
416
417static int mv88e6123_61_65_get_sset_count(struct dsa_switch *ds)
418{
419 return ARRAY_SIZE(mv88e6123_61_65_hw_stats);
420}
421
422static struct dsa_switch_driver mv88e6123_61_65_switch_driver = {
423 .tag_protocol = cpu_to_be16(ETH_P_EDSA),
424 .priv_size = sizeof(struct mv88e6xxx_priv_state),
425 .probe = mv88e6123_61_65_probe,
426 .setup = mv88e6123_61_65_setup,
427 .set_addr = mv88e6xxx_set_addr_indirect,
428 .phy_read = mv88e6123_61_65_phy_read,
429 .phy_write = mv88e6123_61_65_phy_write,
430 .poll_link = mv88e6xxx_poll_link,
431 .get_strings = mv88e6123_61_65_get_strings,
432 .get_ethtool_stats = mv88e6123_61_65_get_ethtool_stats,
433 .get_sset_count = mv88e6123_61_65_get_sset_count,
434};
435
436static int __init mv88e6123_61_65_init(void)
437{
438 register_switch_driver(&mv88e6123_61_65_switch_driver);
439 return 0;
440}
441module_init(mv88e6123_61_65_init);
442
443static void __exit mv88e6123_61_65_cleanup(void)
444{
445 unregister_switch_driver(&mv88e6123_61_65_switch_driver);
446}
447module_exit(mv88e6123_61_65_cleanup);
diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c
new file mode 100644
index 00000000000..9bd1061fa4e
--- /dev/null
+++ b/net/dsa/mv88e6131.c
@@ -0,0 +1,443 @@
1/*
2 * net/dsa/mv88e6131.c - Marvell 88e6095/6095f/6131 switch chip support
3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 */
10
11#include <linux/list.h>
12#include <linux/netdevice.h>
13#include <linux/phy.h>
14#include "dsa_priv.h"
15#include "mv88e6xxx.h"
16
17/*
18 * Switch product IDs
19 */
20#define ID_6085 0x04a0
21#define ID_6095 0x0950
22#define ID_6131 0x1060
23
24static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr)
25{
26 int ret;
27
28 ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
29 if (ret >= 0) {
30 ret &= 0xfff0;
31 if (ret == ID_6085)
32 return "Marvell 88E6085";
33 if (ret == ID_6095)
34 return "Marvell 88E6095/88E6095F";
35 if (ret == ID_6131)
36 return "Marvell 88E6131";
37 }
38
39 return NULL;
40}
41
42static int mv88e6131_switch_reset(struct dsa_switch *ds)
43{
44 int i;
45 int ret;
46
47 /*
48 * Set all ports to the disabled state.
49 */
50 for (i = 0; i < 11; i++) {
51 ret = REG_READ(REG_PORT(i), 0x04);
52 REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
53 }
54
55 /*
56 * Wait for transmit queues to drain.
57 */
58 msleep(2);
59
60 /*
61 * Reset the switch.
62 */
63 REG_WRITE(REG_GLOBAL, 0x04, 0xc400);
64
65 /*
66 * Wait up to one second for reset to complete.
67 */
68 for (i = 0; i < 1000; i++) {
69 ret = REG_READ(REG_GLOBAL, 0x00);
70 if ((ret & 0xc800) == 0xc800)
71 break;
72
73 msleep(1);
74 }
75 if (i == 1000)
76 return -ETIMEDOUT;
77
78 return 0;
79}
80
81static int mv88e6131_setup_global(struct dsa_switch *ds)
82{
83 int ret;
84 int i;
85
86 /*
87 * Enable the PHY polling unit, don't discard packets with
88 * excessive collisions, use a weighted fair queueing scheme
89 * to arbitrate between packet queues, set the maximum frame
90 * size to 1632, and mask all interrupt sources.
91 */
92 REG_WRITE(REG_GLOBAL, 0x04, 0x4400);
93
94 /*
95 * Set the default address aging time to 5 minutes, and
96 * enable address learn messages to be sent to all message
97 * ports.
98 */
99 REG_WRITE(REG_GLOBAL, 0x0a, 0x0148);
100
101 /*
102 * Configure the priority mapping registers.
103 */
104 ret = mv88e6xxx_config_prio(ds);
105 if (ret < 0)
106 return ret;
107
108 /*
109 * Set the VLAN ethertype to 0x8100.
110 */
111 REG_WRITE(REG_GLOBAL, 0x19, 0x8100);
112
113 /*
114 * Disable ARP mirroring, and configure the upstream port as
115 * the port to which ingress and egress monitor frames are to
116 * be sent.
117 */
118 REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1100) | 0x00f0);
119
120 /*
121 * Disable cascade port functionality unless this device
122 * is used in a cascade configuration, and set the switch's
123 * DSA device number.
124 */
125 if (ds->dst->pd->nr_chips > 1)
126 REG_WRITE(REG_GLOBAL, 0x1c, 0xf000 | (ds->index & 0x1f));
127 else
128 REG_WRITE(REG_GLOBAL, 0x1c, 0xe000 | (ds->index & 0x1f));
129
130 /*
131 * Send all frames with destination addresses matching
132 * 01:80:c2:00:00:0x to the CPU port.
133 */
134 REG_WRITE(REG_GLOBAL2, 0x03, 0xffff);
135
136 /*
137 * Ignore removed tag data on doubly tagged packets, disable
138 * flow control messages, force flow control priority to the
139 * highest, and send all special multicast frames to the CPU
140 * port at the highest priority.
141 */
142 REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff);
143
144 /*
145 * Program the DSA routing table.
146 */
147 for (i = 0; i < 32; i++) {
148 int nexthop;
149
150 nexthop = 0x1f;
151 if (i != ds->index && i < ds->dst->pd->nr_chips)
152 nexthop = ds->pd->rtable[i] & 0x1f;
153
154 REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop);
155 }
156
157 /*
158 * Clear all trunk masks.
159 */
160 for (i = 0; i < 8; i++)
161 REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0x7ff);
162
163 /*
164 * Clear all trunk mappings.
165 */
166 for (i = 0; i < 16; i++)
167 REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11));
168
169 /*
170 * Force the priority of IGMP/MLD snoop frames and ARP frames
171 * to the highest setting.
172 */
173 REG_WRITE(REG_GLOBAL2, 0x0f, 0x00ff);
174
175 return 0;
176}
177
178static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
179{
180 struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
181 int addr = REG_PORT(p);
182 u16 val;
183
184 /*
185 * MAC Forcing register: don't force link, speed, duplex
186 * or flow control state to any particular values on physical
187 * ports, but force the CPU port and all DSA ports to 1000 Mb/s
188 * (100 Mb/s on 6085) full duplex.
189 */
190 if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p))
191 if (ps->id == ID_6085)
192 REG_WRITE(addr, 0x01, 0x003d); /* 100 Mb/s */
193 else
194 REG_WRITE(addr, 0x01, 0x003e); /* 1000 Mb/s */
195 else
196 REG_WRITE(addr, 0x01, 0x0003);
197
198 /*
199 * Port Control: disable Core Tag, disable Drop-on-Lock,
200 * transmit frames unmodified, disable Header mode,
201 * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN
202 * tunneling, determine priority by looking at 802.1p and
203 * IP priority fields (IP prio has precedence), and set STP
204 * state to Forwarding.
205 *
206 * If this is the upstream port for this switch, enable
207 * forwarding of unknown unicasts, and enable DSA tagging
208 * mode.
209 *
210 * If this is the link to another switch, use DSA tagging
211 * mode, but do not enable forwarding of unknown unicasts.
212 */
213 val = 0x0433;
214 if (p == dsa_upstream_port(ds)) {
215 val |= 0x0104;
216 /*
217 * On 6085, unknown multicast forward is controlled
218 * here rather than in Port Control 2 register.
219 */
220 if (ps->id == ID_6085)
221 val |= 0x0008;
222 }
223 if (ds->dsa_port_mask & (1 << p))
224 val |= 0x0100;
225 REG_WRITE(addr, 0x04, val);
226
227 /*
228 * Port Control 1: disable trunking. Also, if this is the
229 * CPU port, enable learn messages to be sent to this port.
230 */
231 REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
232
233 /*
234 * Port based VLAN map: give each port its own address
235 * database, allow the CPU port to talk to each of the 'real'
236 * ports, and allow each of the 'real' ports to only talk to
237 * the upstream port.
238 */
239 val = (p & 0xf) << 12;
240 if (dsa_is_cpu_port(ds, p))
241 val |= ds->phys_port_mask;
242 else
243 val |= 1 << dsa_upstream_port(ds);
244 REG_WRITE(addr, 0x06, val);
245
246 /*
247 * Default VLAN ID and priority: don't set a default VLAN
248 * ID, and set the default packet priority to zero.
249 */
250 REG_WRITE(addr, 0x07, 0x0000);
251
252 /*
253 * Port Control 2: don't force a good FCS, don't use
254 * VLAN-based, source address-based or destination
255 * address-based priority overrides, don't let the switch
256 * add or strip 802.1q tags, don't discard tagged or
257 * untagged frames on this port, do a destination address
258 * lookup on received packets as usual, don't send a copy
259 * of all transmitted/received frames on this port to the
260 * CPU, and configure the upstream port number.
261 *
262 * If this is the upstream port for this switch, enable
263 * forwarding of unknown multicast addresses.
264 */
265 if (ps->id == ID_6085)
266 /*
267 * on 6085, bits 3:0 are reserved, bit 6 control ARP
268 * mirroring, and multicast forward is handled in
269 * Port Control register.
270 */
271 REG_WRITE(addr, 0x08, 0x0080);
272 else {
273 val = 0x0080 | dsa_upstream_port(ds);
274 if (p == dsa_upstream_port(ds))
275 val |= 0x0040;
276 REG_WRITE(addr, 0x08, val);
277 }
278
279 /*
280 * Rate Control: disable ingress rate limiting.
281 */
282 REG_WRITE(addr, 0x09, 0x0000);
283
284 /*
285 * Rate Control 2: disable egress rate limiting.
286 */
287 REG_WRITE(addr, 0x0a, 0x0000);
288
289 /*
290 * Port Association Vector: when learning source addresses
291 * of packets, add the address to the address database using
292 * a port bitmap that has only the bit for this port set and
293 * the other bits clear.
294 */
295 REG_WRITE(addr, 0x0b, 1 << p);
296
297 /*
298 * Tag Remap: use an identity 802.1p prio -> switch prio
299 * mapping.
300 */
301 REG_WRITE(addr, 0x18, 0x3210);
302
303 /*
304 * Tag Remap 2: use an identity 802.1p prio -> switch prio
305 * mapping.
306 */
307 REG_WRITE(addr, 0x19, 0x7654);
308
309 return 0;
310}
311
312static int mv88e6131_setup(struct dsa_switch *ds)
313{
314 struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
315 int i;
316 int ret;
317
318 mutex_init(&ps->smi_mutex);
319 mv88e6xxx_ppu_state_init(ds);
320 mutex_init(&ps->stats_mutex);
321
322 ps->id = REG_READ(REG_PORT(0), 0x03) & 0xfff0;
323
324 ret = mv88e6131_switch_reset(ds);
325 if (ret < 0)
326 return ret;
327
328 /* @@@ initialise vtu and atu */
329
330 ret = mv88e6131_setup_global(ds);
331 if (ret < 0)
332 return ret;
333
334 for (i = 0; i < 11; i++) {
335 ret = mv88e6131_setup_port(ds, i);
336 if (ret < 0)
337 return ret;
338 }
339
340 return 0;
341}
342
343static int mv88e6131_port_to_phy_addr(int port)
344{
345 if (port >= 0 && port <= 11)
346 return port;
347 return -1;
348}
349
350static int
351mv88e6131_phy_read(struct dsa_switch *ds, int port, int regnum)
352{
353 int addr = mv88e6131_port_to_phy_addr(port);
354 return mv88e6xxx_phy_read_ppu(ds, addr, regnum);
355}
356
357static int
358mv88e6131_phy_write(struct dsa_switch *ds,
359 int port, int regnum, u16 val)
360{
361 int addr = mv88e6131_port_to_phy_addr(port);
362 return mv88e6xxx_phy_write_ppu(ds, addr, regnum, val);
363}
364
365static struct mv88e6xxx_hw_stat mv88e6131_hw_stats[] = {
366 { "in_good_octets", 8, 0x00, },
367 { "in_bad_octets", 4, 0x02, },
368 { "in_unicast", 4, 0x04, },
369 { "in_broadcasts", 4, 0x06, },
370 { "in_multicasts", 4, 0x07, },
371 { "in_pause", 4, 0x16, },
372 { "in_undersize", 4, 0x18, },
373 { "in_fragments", 4, 0x19, },
374 { "in_oversize", 4, 0x1a, },
375 { "in_jabber", 4, 0x1b, },
376 { "in_rx_error", 4, 0x1c, },
377 { "in_fcs_error", 4, 0x1d, },
378 { "out_octets", 8, 0x0e, },
379 { "out_unicast", 4, 0x10, },
380 { "out_broadcasts", 4, 0x13, },
381 { "out_multicasts", 4, 0x12, },
382 { "out_pause", 4, 0x15, },
383 { "excessive", 4, 0x11, },
384 { "collisions", 4, 0x1e, },
385 { "deferred", 4, 0x05, },
386 { "single", 4, 0x14, },
387 { "multiple", 4, 0x17, },
388 { "out_fcs_error", 4, 0x03, },
389 { "late", 4, 0x1f, },
390 { "hist_64bytes", 4, 0x08, },
391 { "hist_65_127bytes", 4, 0x09, },
392 { "hist_128_255bytes", 4, 0x0a, },
393 { "hist_256_511bytes", 4, 0x0b, },
394 { "hist_512_1023bytes", 4, 0x0c, },
395 { "hist_1024_max_bytes", 4, 0x0d, },
396};
397
398static void
399mv88e6131_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
400{
401 mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6131_hw_stats),
402 mv88e6131_hw_stats, port, data);
403}
404
405static void
406mv88e6131_get_ethtool_stats(struct dsa_switch *ds,
407 int port, uint64_t *data)
408{
409 mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6131_hw_stats),
410 mv88e6131_hw_stats, port, data);
411}
412
413static int mv88e6131_get_sset_count(struct dsa_switch *ds)
414{
415 return ARRAY_SIZE(mv88e6131_hw_stats);
416}
417
418static struct dsa_switch_driver mv88e6131_switch_driver = {
419 .tag_protocol = cpu_to_be16(ETH_P_DSA),
420 .priv_size = sizeof(struct mv88e6xxx_priv_state),
421 .probe = mv88e6131_probe,
422 .setup = mv88e6131_setup,
423 .set_addr = mv88e6xxx_set_addr_direct,
424 .phy_read = mv88e6131_phy_read,
425 .phy_write = mv88e6131_phy_write,
426 .poll_link = mv88e6xxx_poll_link,
427 .get_strings = mv88e6131_get_strings,
428 .get_ethtool_stats = mv88e6131_get_ethtool_stats,
429 .get_sset_count = mv88e6131_get_sset_count,
430};
431
432static int __init mv88e6131_init(void)
433{
434 register_switch_driver(&mv88e6131_switch_driver);
435 return 0;
436}
437module_init(mv88e6131_init);
438
439static void __exit mv88e6131_cleanup(void)
440{
441 unregister_switch_driver(&mv88e6131_switch_driver);
442}
443module_exit(mv88e6131_cleanup);
diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c
new file mode 100644
index 00000000000..efe661a9def
--- /dev/null
+++ b/net/dsa/mv88e6xxx.c
@@ -0,0 +1,522 @@
1/*
2 * net/dsa/mv88e6xxx.c - Marvell 88e6xxx switch chip support
3 * Copyright (c) 2008 Marvell Semiconductor
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 */
10
11#include <linux/list.h>
12#include <linux/netdevice.h>
13#include <linux/phy.h>
14#include "dsa_priv.h"
15#include "mv88e6xxx.h"
16
17/*
18 * If the switch's ADDR[4:0] strap pins are strapped to zero, it will
19 * use all 32 SMI bus addresses on its SMI bus, and all switch registers
20 * will be directly accessible on some {device address,register address}
21 * pair. If the ADDR[4:0] pins are not strapped to zero, the switch
22 * will only respond to SMI transactions to that specific address, and
23 * an indirect addressing mechanism needs to be used to access its
24 * registers.
25 */
26static int mv88e6xxx_reg_wait_ready(struct mii_bus *bus, int sw_addr)
27{
28 int ret;
29 int i;
30
31 for (i = 0; i < 16; i++) {
32 ret = mdiobus_read(bus, sw_addr, 0);
33 if (ret < 0)
34 return ret;
35
36 if ((ret & 0x8000) == 0)
37 return 0;
38 }
39
40 return -ETIMEDOUT;
41}
42
43int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg)
44{
45 int ret;
46
47 if (sw_addr == 0)
48 return mdiobus_read(bus, addr, reg);
49
50 /*
51 * Wait for the bus to become free.
52 */
53 ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
54 if (ret < 0)
55 return ret;
56
57 /*
58 * Transmit the read command.
59 */
60 ret = mdiobus_write(bus, sw_addr, 0, 0x9800 | (addr << 5) | reg);
61 if (ret < 0)
62 return ret;
63
64 /*
65 * Wait for the read command to complete.
66 */
67 ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
68 if (ret < 0)
69 return ret;
70
71 /*
72 * Read the data.
73 */
74 ret = mdiobus_read(bus, sw_addr, 1);
75 if (ret < 0)
76 return ret;
77
78 return ret & 0xffff;
79}
80
81int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg)
82{
83 struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
84 int ret;
85
86 mutex_lock(&ps->smi_mutex);
87 ret = __mv88e6xxx_reg_read(ds->master_mii_bus,
88 ds->pd->sw_addr, addr, reg);
89 mutex_unlock(&ps->smi_mutex);
90
91 return ret;
92}
93
94int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
95 int reg, u16 val)
96{
97 int ret;
98
99 if (sw_addr == 0)
100 return mdiobus_write(bus, addr, reg, val);
101
102 /*
103 * Wait for the bus to become free.
104 */
105 ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
106 if (ret < 0)
107 return ret;
108
109 /*
110 * Transmit the data to write.
111 */
112 ret = mdiobus_write(bus, sw_addr, 1, val);
113 if (ret < 0)
114 return ret;
115
116 /*
117 * Transmit the write command.
118 */
119 ret = mdiobus_write(bus, sw_addr, 0, 0x9400 | (addr << 5) | reg);
120 if (ret < 0)
121 return ret;
122
123 /*
124 * Wait for the write command to complete.
125 */
126 ret = mv88e6xxx_reg_wait_ready(bus, sw_addr);
127 if (ret < 0)
128 return ret;
129
130 return 0;
131}
132
133int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
134{
135 struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
136 int ret;
137
138 mutex_lock(&ps->smi_mutex);
139 ret = __mv88e6xxx_reg_write(ds->master_mii_bus,
140 ds->pd->sw_addr, addr, reg, val);
141 mutex_unlock(&ps->smi_mutex);
142
143 return ret;
144}
145
146int mv88e6xxx_config_prio(struct dsa_switch *ds)
147{
148 /*
149 * Configure the IP ToS mapping registers.
150 */
151 REG_WRITE(REG_GLOBAL, 0x10, 0x0000);
152 REG_WRITE(REG_GLOBAL, 0x11, 0x0000);
153 REG_WRITE(REG_GLOBAL, 0x12, 0x5555);
154 REG_WRITE(REG_GLOBAL, 0x13, 0x5555);
155 REG_WRITE(REG_GLOBAL, 0x14, 0xaaaa);
156 REG_WRITE(REG_GLOBAL, 0x15, 0xaaaa);
157 REG_WRITE(REG_GLOBAL, 0x16, 0xffff);
158 REG_WRITE(REG_GLOBAL, 0x17, 0xffff);
159
160 /*
161 * Configure the IEEE 802.1p priority mapping register.
162 */
163 REG_WRITE(REG_GLOBAL, 0x18, 0xfa41);
164
165 return 0;
166}
167
168int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr)
169{
170 REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]);
171 REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]);
172 REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]);
173
174 return 0;
175}
176
177int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr)
178{
179 int i;
180 int ret;
181
182 for (i = 0; i < 6; i++) {
183 int j;
184
185 /*
186 * Write the MAC address byte.
187 */
188 REG_WRITE(REG_GLOBAL2, 0x0d, 0x8000 | (i << 8) | addr[i]);
189
190 /*
191 * Wait for the write to complete.
192 */
193 for (j = 0; j < 16; j++) {
194 ret = REG_READ(REG_GLOBAL2, 0x0d);
195 if ((ret & 0x8000) == 0)
196 break;
197 }
198 if (j == 16)
199 return -ETIMEDOUT;
200 }
201
202 return 0;
203}
204
205int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum)
206{
207 if (addr >= 0)
208 return mv88e6xxx_reg_read(ds, addr, regnum);
209 return 0xffff;
210}
211
212int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val)
213{
214 if (addr >= 0)
215 return mv88e6xxx_reg_write(ds, addr, regnum, val);
216 return 0;
217}
218
219#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU
220static int mv88e6xxx_ppu_disable(struct dsa_switch *ds)
221{
222 int ret;
223 int i;
224
225 ret = REG_READ(REG_GLOBAL, 0x04);
226 REG_WRITE(REG_GLOBAL, 0x04, ret & ~0x4000);
227
228 for (i = 0; i < 1000; i++) {
229 ret = REG_READ(REG_GLOBAL, 0x00);
230 msleep(1);
231 if ((ret & 0xc000) != 0xc000)
232 return 0;
233 }
234
235 return -ETIMEDOUT;
236}
237
238static int mv88e6xxx_ppu_enable(struct dsa_switch *ds)
239{
240 int ret;
241 int i;
242
243 ret = REG_READ(REG_GLOBAL, 0x04);
244 REG_WRITE(REG_GLOBAL, 0x04, ret | 0x4000);
245
246 for (i = 0; i < 1000; i++) {
247 ret = REG_READ(REG_GLOBAL, 0x00);
248 msleep(1);
249 if ((ret & 0xc000) == 0xc000)
250 return 0;
251 }
252
253 return -ETIMEDOUT;
254}
255
256static void mv88e6xxx_ppu_reenable_work(struct work_struct *ugly)
257{
258 struct mv88e6xxx_priv_state *ps;
259
260 ps = container_of(ugly, struct mv88e6xxx_priv_state, ppu_work);
261 if (mutex_trylock(&ps->ppu_mutex)) {
262 struct dsa_switch *ds = ((struct dsa_switch *)ps) - 1;
263
264 if (mv88e6xxx_ppu_enable(ds) == 0)
265 ps->ppu_disabled = 0;
266 mutex_unlock(&ps->ppu_mutex);
267 }
268}
269
270static void mv88e6xxx_ppu_reenable_timer(unsigned long _ps)
271{
272 struct mv88e6xxx_priv_state *ps = (void *)_ps;
273
274 schedule_work(&ps->ppu_work);
275}
276
277static int mv88e6xxx_ppu_access_get(struct dsa_switch *ds)
278{
279 struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
280 int ret;
281
282 mutex_lock(&ps->ppu_mutex);
283
284 /*
285 * If the PHY polling unit is enabled, disable it so that
286 * we can access the PHY registers. If it was already
287 * disabled, cancel the timer that is going to re-enable
288 * it.
289 */
290 if (!ps->ppu_disabled) {
291 ret = mv88e6xxx_ppu_disable(ds);
292 if (ret < 0) {
293 mutex_unlock(&ps->ppu_mutex);
294 return ret;
295 }
296 ps->ppu_disabled = 1;
297 } else {
298 del_timer(&ps->ppu_timer);
299 ret = 0;
300 }
301
302 return ret;
303}
304
305static void mv88e6xxx_ppu_access_put(struct dsa_switch *ds)
306{
307 struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
308
309 /*
310 * Schedule a timer to re-enable the PHY polling unit.
311 */
312 mod_timer(&ps->ppu_timer, jiffies + msecs_to_jiffies(10));
313 mutex_unlock(&ps->ppu_mutex);
314}
315
316void mv88e6xxx_ppu_state_init(struct dsa_switch *ds)
317{
318 struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
319
320 mutex_init(&ps->ppu_mutex);
321 INIT_WORK(&ps->ppu_work, mv88e6xxx_ppu_reenable_work);
322 init_timer(&ps->ppu_timer);
323 ps->ppu_timer.data = (unsigned long)ps;
324 ps->ppu_timer.function = mv88e6xxx_ppu_reenable_timer;
325}
326
327int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum)
328{
329 int ret;
330
331 ret = mv88e6xxx_ppu_access_get(ds);
332 if (ret >= 0) {
333 ret = mv88e6xxx_reg_read(ds, addr, regnum);
334 mv88e6xxx_ppu_access_put(ds);
335 }
336
337 return ret;
338}
339
340int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr,
341 int regnum, u16 val)
342{
343 int ret;
344
345 ret = mv88e6xxx_ppu_access_get(ds);
346 if (ret >= 0) {
347 ret = mv88e6xxx_reg_write(ds, addr, regnum, val);
348 mv88e6xxx_ppu_access_put(ds);
349 }
350
351 return ret;
352}
353#endif
354
355void mv88e6xxx_poll_link(struct dsa_switch *ds)
356{
357 int i;
358
359 for (i = 0; i < DSA_MAX_PORTS; i++) {
360 struct net_device *dev;
361 int uninitialized_var(port_status);
362 int link;
363 int speed;
364 int duplex;
365 int fc;
366
367 dev = ds->ports[i];
368 if (dev == NULL)
369 continue;
370
371 link = 0;
372 if (dev->flags & IFF_UP) {
373 port_status = mv88e6xxx_reg_read(ds, REG_PORT(i), 0x00);
374 if (port_status < 0)
375 continue;
376
377 link = !!(port_status & 0x0800);
378 }
379
380 if (!link) {
381 if (netif_carrier_ok(dev)) {
382 printk(KERN_INFO "%s: link down\n", dev->name);
383 netif_carrier_off(dev);
384 }
385 continue;
386 }
387
388 switch (port_status & 0x0300) {
389 case 0x0000:
390 speed = 10;
391 break;
392 case 0x0100:
393 speed = 100;
394 break;
395 case 0x0200:
396 speed = 1000;
397 break;
398 default:
399 speed = -1;
400 break;
401 }
402 duplex = (port_status & 0x0400) ? 1 : 0;
403 fc = (port_status & 0x8000) ? 1 : 0;
404
405 if (!netif_carrier_ok(dev)) {
406 printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, "
407 "flow control %sabled\n", dev->name,
408 speed, duplex ? "full" : "half",
409 fc ? "en" : "dis");
410 netif_carrier_on(dev);
411 }
412 }
413}
414
415static int mv88e6xxx_stats_wait(struct dsa_switch *ds)
416{
417 int ret;
418 int i;
419
420 for (i = 0; i < 10; i++) {
421 ret = REG_READ(REG_GLOBAL, 0x1d);
422 if ((ret & 0x8000) == 0)
423 return 0;
424 }
425
426 return -ETIMEDOUT;
427}
428
429static int mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port)
430{
431 int ret;
432
433 /*
434 * Snapshot the hardware statistics counters for this port.
435 */
436 REG_WRITE(REG_GLOBAL, 0x1d, 0xdc00 | port);
437
438 /*
439 * Wait for the snapshotting to complete.
440 */
441 ret = mv88e6xxx_stats_wait(ds);
442 if (ret < 0)
443 return ret;
444
445 return 0;
446}
447
448static void mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val)
449{
450 u32 _val;
451 int ret;
452
453 *val = 0;
454
455 ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x1d, 0xcc00 | stat);
456 if (ret < 0)
457 return;
458
459 ret = mv88e6xxx_stats_wait(ds);
460 if (ret < 0)
461 return;
462
463 ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1e);
464 if (ret < 0)
465 return;
466
467 _val = ret << 16;
468
469 ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1f);
470 if (ret < 0)
471 return;
472
473 *val = _val | ret;
474}
475
476void mv88e6xxx_get_strings(struct dsa_switch *ds,
477 int nr_stats, struct mv88e6xxx_hw_stat *stats,
478 int port, uint8_t *data)
479{
480 int i;
481
482 for (i = 0; i < nr_stats; i++) {
483 memcpy(data + i * ETH_GSTRING_LEN,
484 stats[i].string, ETH_GSTRING_LEN);
485 }
486}
487
488void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
489 int nr_stats, struct mv88e6xxx_hw_stat *stats,
490 int port, uint64_t *data)
491{
492 struct mv88e6xxx_priv_state *ps = (void *)(ds + 1);
493 int ret;
494 int i;
495
496 mutex_lock(&ps->stats_mutex);
497
498 ret = mv88e6xxx_stats_snapshot(ds, port);
499 if (ret < 0) {
500 mutex_unlock(&ps->stats_mutex);
501 return;
502 }
503
504 /*
505 * Read each of the counters.
506 */
507 for (i = 0; i < nr_stats; i++) {
508 struct mv88e6xxx_hw_stat *s = stats + i;
509 u32 low;
510 u32 high;
511
512 mv88e6xxx_stats_read(ds, s->reg, &low);
513 if (s->sizeof_stat == 8)
514 mv88e6xxx_stats_read(ds, s->reg + 1, &high);
515 else
516 high = 0;
517
518 data[i] = (((u64)high) << 32) | low;
519 }
520
521 mutex_unlock(&ps->stats_mutex);
522}
diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h
new file mode 100644
index 00000000000..61156ca26a0
--- /dev/null
+++ b/net/dsa/mv88e6xxx.h
@@ -0,0 +1,95 @@
1/*
2 * net/dsa/mv88e6xxx.h - Marvell 88e6xxx switch chip support
3 * Copyright (c) 2008 Marvell Semiconductor
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 */
10
11#ifndef __MV88E6XXX_H
12#define __MV88E6XXX_H
13
14#define REG_PORT(p) (0x10 + (p))
15#define REG_GLOBAL 0x1b
16#define REG_GLOBAL2 0x1c
17
18struct mv88e6xxx_priv_state {
19 /*
20 * When using multi-chip addressing, this mutex protects
21 * access to the indirect access registers. (In single-chip
22 * mode, this mutex is effectively useless.)
23 */
24 struct mutex smi_mutex;
25
26#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU
27 /*
28 * Handles automatic disabling and re-enabling of the PHY
29 * polling unit.
30 */
31 struct mutex ppu_mutex;
32 int ppu_disabled;
33 struct work_struct ppu_work;
34 struct timer_list ppu_timer;
35#endif
36
37 /*
38 * This mutex serialises access to the statistics unit.
39 * Hold this mutex over snapshot + dump sequences.
40 */
41 struct mutex stats_mutex;
42
43 int id; /* switch product id */
44};
45
46struct mv88e6xxx_hw_stat {
47 char string[ETH_GSTRING_LEN];
48 int sizeof_stat;
49 int reg;
50};
51
52int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg);
53int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg);
54int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr,
55 int reg, u16 val);
56int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val);
57int mv88e6xxx_config_prio(struct dsa_switch *ds);
58int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr);
59int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr);
60int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum);
61int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val);
62void mv88e6xxx_ppu_state_init(struct dsa_switch *ds);
63int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum);
64int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr,
65 int regnum, u16 val);
66void mv88e6xxx_poll_link(struct dsa_switch *ds);
67void mv88e6xxx_get_strings(struct dsa_switch *ds,
68 int nr_stats, struct mv88e6xxx_hw_stat *stats,
69 int port, uint8_t *data);
70void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds,
71 int nr_stats, struct mv88e6xxx_hw_stat *stats,
72 int port, uint64_t *data);
73
74#define REG_READ(addr, reg) \
75 ({ \
76 int __ret; \
77 \
78 __ret = mv88e6xxx_reg_read(ds, addr, reg); \
79 if (__ret < 0) \
80 return __ret; \
81 __ret; \
82 })
83
84#define REG_WRITE(addr, reg, val) \
85 ({ \
86 int __ret; \
87 \
88 __ret = mv88e6xxx_reg_write(ds, addr, reg, val); \
89 if (__ret < 0) \
90 return __ret; \
91 })
92
93
94
95#endif
diff --git a/net/econet/Kconfig b/net/econet/Kconfig
new file mode 100644
index 00000000000..39a2d2975e0
--- /dev/null
+++ b/net/econet/Kconfig
@@ -0,0 +1,36 @@
1#
2# Acorn Econet/AUN protocols
3#
4
5config ECONET
6 tristate "Acorn Econet/AUN protocols (EXPERIMENTAL)"
7 depends on EXPERIMENTAL && INET
8 ---help---
9 Econet is a fairly old and slow networking protocol mainly used by
10 Acorn computers to access file and print servers. It uses native
11 Econet network cards. AUN is an implementation of the higher level
12 parts of Econet that runs over ordinary Ethernet connections, on
13 top of the UDP packet protocol, which in turn runs on top of the
14 Internet protocol IP.
15
16 If you say Y here, you can choose with the next two options whether
17 to send Econet/AUN traffic over a UDP Ethernet connection or over
18 a native Econet network card.
19
20 To compile this driver as a module, choose M here: the module
21 will be called econet.
22
23config ECONET_AUNUDP
24 bool "AUN over UDP"
25 depends on ECONET
26 help
27 Say Y here if you want to send Econet/AUN traffic over a UDP
28 connection (UDP is a packet based protocol that runs on top of the
29 Internet protocol IP) using an ordinary Ethernet network card.
30
31config ECONET_NATIVE
32 bool "Native Econet"
33 depends on ECONET
34 help
35 Say Y here if you have a native Econet network card installed in
36 your computer.
diff --git a/net/econet/Makefile b/net/econet/Makefile
new file mode 100644
index 00000000000..05fae8be2fe
--- /dev/null
+++ b/net/econet/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for Econet support code.
3#
4
5obj-$(CONFIG_ECONET) += econet.o
6
7econet-y := af_econet.o
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
new file mode 100644
index 00000000000..1c1f26c5d67
--- /dev/null
+++ b/net/econet/af_econet.c
@@ -0,0 +1,1170 @@
1/*
2 * An implementation of the Acorn Econet and AUN protocols.
3 * Philip Blundell <philb@gnu.org>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 *
10 */
11
12#define pr_fmt(fmt) fmt
13
14#include <linux/module.h>
15
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/string.h>
19#include <linux/mm.h>
20#include <linux/socket.h>
21#include <linux/sockios.h>
22#include <linux/in.h>
23#include <linux/errno.h>
24#include <linux/interrupt.h>
25#include <linux/if_ether.h>
26#include <linux/netdevice.h>
27#include <linux/inetdevice.h>
28#include <linux/route.h>
29#include <linux/inet.h>
30#include <linux/etherdevice.h>
31#include <linux/if_arp.h>
32#include <linux/wireless.h>
33#include <linux/skbuff.h>
34#include <linux/udp.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37#include <net/sock.h>
38#include <net/inet_common.h>
39#include <linux/stat.h>
40#include <linux/init.h>
41#include <linux/if_ec.h>
42#include <net/udp.h>
43#include <net/ip.h>
44#include <linux/spinlock.h>
45#include <linux/rcupdate.h>
46#include <linux/bitops.h>
47#include <linux/mutex.h>
48
49#include <linux/uaccess.h>
50#include <asm/system.h>
51
52static const struct proto_ops econet_ops;
53static struct hlist_head econet_sklist;
54static DEFINE_SPINLOCK(econet_lock);
55static DEFINE_MUTEX(econet_mutex);
56
57/* Since there are only 256 possible network numbers (or fewer, depends
58 how you count) it makes sense to use a simple lookup table. */
59static struct net_device *net2dev_map[256];
60
61#define EC_PORT_IP 0xd2
62
63#ifdef CONFIG_ECONET_AUNUDP
64static DEFINE_SPINLOCK(aun_queue_lock);
65static struct socket *udpsock;
66#define AUN_PORT 0x8000
67
68struct aunhdr {
69 unsigned char code; /* AUN magic protocol byte */
70 unsigned char port;
71 unsigned char cb;
72 unsigned char pad;
73 unsigned long handle;
74};
75
76static unsigned long aun_seq;
77
78/* Queue of packets waiting to be transmitted. */
79static struct sk_buff_head aun_queue;
80static struct timer_list ab_cleanup_timer;
81
82#endif /* CONFIG_ECONET_AUNUDP */
83
84/* Per-packet information */
85struct ec_cb {
86 struct sockaddr_ec sec;
87 unsigned long cookie; /* Supplied by user. */
88#ifdef CONFIG_ECONET_AUNUDP
89 int done;
90 unsigned long seq; /* Sequencing */
91 unsigned long timeout; /* Timeout */
92 unsigned long start; /* jiffies */
93#endif
94#ifdef CONFIG_ECONET_NATIVE
95 void (*sent)(struct sk_buff *, int result);
96#endif
97};
98
99static void econet_remove_socket(struct hlist_head *list, struct sock *sk)
100{
101 spin_lock_bh(&econet_lock);
102 sk_del_node_init(sk);
103 spin_unlock_bh(&econet_lock);
104}
105
106static void econet_insert_socket(struct hlist_head *list, struct sock *sk)
107{
108 spin_lock_bh(&econet_lock);
109 sk_add_node(sk, list);
110 spin_unlock_bh(&econet_lock);
111}
112
113/*
114 * Pull a packet from our receive queue and hand it to the user.
115 * If necessary we block.
116 */
117
118static int econet_recvmsg(struct kiocb *iocb, struct socket *sock,
119 struct msghdr *msg, size_t len, int flags)
120{
121 struct sock *sk = sock->sk;
122 struct sk_buff *skb;
123 size_t copied;
124 int err;
125
126 msg->msg_namelen = sizeof(struct sockaddr_ec);
127
128 mutex_lock(&econet_mutex);
129
130 /*
131 * Call the generic datagram receiver. This handles all sorts
132 * of horrible races and re-entrancy so we can forget about it
133 * in the protocol layers.
134 *
135 * Now it will return ENETDOWN, if device have just gone down,
136 * but then it will block.
137 */
138
139 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
140
141 /*
142 * An error occurred so return it. Because skb_recv_datagram()
143 * handles the blocking we don't see and worry about blocking
144 * retries.
145 */
146
147 if (skb == NULL)
148 goto out;
149
150 /*
151 * You lose any data beyond the buffer you gave. If it worries a
152 * user program they can ask the device for its MTU anyway.
153 */
154
155 copied = skb->len;
156 if (copied > len) {
157 copied = len;
158 msg->msg_flags |= MSG_TRUNC;
159 }
160
161 /* We can't use skb_copy_datagram here */
162 err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
163 if (err)
164 goto out_free;
165 sk->sk_stamp = skb->tstamp;
166
167 if (msg->msg_name)
168 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
169
170 /*
171 * Free or return the buffer as appropriate. Again this
172 * hides all the races and re-entrancy issues from us.
173 */
174 err = copied;
175
176out_free:
177 skb_free_datagram(sk, skb);
178out:
179 mutex_unlock(&econet_mutex);
180 return err;
181}
182
183/*
184 * Bind an Econet socket.
185 */
186
187static int econet_bind(struct socket *sock, struct sockaddr *uaddr,
188 int addr_len)
189{
190 struct sockaddr_ec *sec = (struct sockaddr_ec *)uaddr;
191 struct sock *sk;
192 struct econet_sock *eo;
193
194 /*
195 * Check legality
196 */
197
198 if (addr_len < sizeof(struct sockaddr_ec) ||
199 sec->sec_family != AF_ECONET)
200 return -EINVAL;
201
202 mutex_lock(&econet_mutex);
203
204 sk = sock->sk;
205 eo = ec_sk(sk);
206
207 eo->cb = sec->cb;
208 eo->port = sec->port;
209 eo->station = sec->addr.station;
210 eo->net = sec->addr.net;
211
212 mutex_unlock(&econet_mutex);
213
214 return 0;
215}
216
217#if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE)
218/*
219 * Queue a transmit result for the user to be told about.
220 */
221
222static void tx_result(struct sock *sk, unsigned long cookie, int result)
223{
224 struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
225 struct ec_cb *eb;
226 struct sockaddr_ec *sec;
227
228 if (skb == NULL) {
229 pr_debug("econet: memory squeeze, transmit result dropped\n");
230 return;
231 }
232
233 eb = (struct ec_cb *)&skb->cb;
234 sec = (struct sockaddr_ec *)&eb->sec;
235 memset(sec, 0, sizeof(struct sockaddr_ec));
236 sec->cookie = cookie;
237 sec->type = ECTYPE_TRANSMIT_STATUS | result;
238 sec->sec_family = AF_ECONET;
239
240 if (sock_queue_rcv_skb(sk, skb) < 0)
241 kfree_skb(skb);
242}
243#endif
244
245#ifdef CONFIG_ECONET_NATIVE
246/*
247 * Called by the Econet hardware driver when a packet transmit
248 * has completed. Tell the user.
249 */
250
251static void ec_tx_done(struct sk_buff *skb, int result)
252{
253 struct ec_cb *eb = (struct ec_cb *)&skb->cb;
254 tx_result(skb->sk, eb->cookie, result);
255}
256#endif
257
258/*
259 * Send a packet. We have to work out which device it's going out on
260 * and hence whether to use real Econet or the UDP emulation.
261 */
262
263static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
264 struct msghdr *msg, size_t len)
265{
266 struct sockaddr_ec *saddr = (struct sockaddr_ec *)msg->msg_name;
267 struct net_device *dev;
268 struct ec_addr addr;
269 int err;
270 unsigned char port, cb;
271#if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE)
272 struct sock *sk = sock->sk;
273 struct sk_buff *skb;
274 struct ec_cb *eb;
275#endif
276#ifdef CONFIG_ECONET_AUNUDP
277 struct msghdr udpmsg;
278 struct iovec iov[2];
279 struct aunhdr ah;
280 struct sockaddr_in udpdest;
281 __kernel_size_t size;
282 mm_segment_t oldfs;
283 char *userbuf;
284#endif
285
286 /*
287 * Check the flags.
288 */
289
290 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
291 return -EINVAL;
292
293 /*
294 * Get and verify the address.
295 */
296
297 mutex_lock(&econet_mutex);
298
299 if (saddr == NULL || msg->msg_namelen < sizeof(struct sockaddr_ec)) {
300 mutex_unlock(&econet_mutex);
301 return -EINVAL;
302 }
303 addr.station = saddr->addr.station;
304 addr.net = saddr->addr.net;
305 port = saddr->port;
306 cb = saddr->cb;
307
308 /* Look for a device with the right network number. */
309 dev = net2dev_map[addr.net];
310
311 /* If not directly reachable, use some default */
312 if (dev == NULL) {
313 dev = net2dev_map[0];
314 /* No interfaces at all? */
315 if (dev == NULL) {
316 mutex_unlock(&econet_mutex);
317 return -ENETDOWN;
318 }
319 }
320
321 if (dev->type == ARPHRD_ECONET) {
322 /* Real hardware Econet. We're not worthy etc. */
323#ifdef CONFIG_ECONET_NATIVE
324 unsigned short proto = 0;
325 int res;
326
327 if (len + 15 > dev->mtu) {
328 mutex_unlock(&econet_mutex);
329 return -EMSGSIZE;
330 }
331
332 dev_hold(dev);
333
334 skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
335 msg->msg_flags & MSG_DONTWAIT, &err);
336 if (skb == NULL)
337 goto out_unlock;
338
339 skb_reserve(skb, LL_RESERVED_SPACE(dev));
340 skb_reset_network_header(skb);
341
342 eb = (struct ec_cb *)&skb->cb;
343
344 eb->cookie = saddr->cookie;
345 eb->sec = *saddr;
346 eb->sent = ec_tx_done;
347
348 err = -EINVAL;
349 res = dev_hard_header(skb, dev, ntohs(proto), &addr, NULL, len);
350 if (res < 0)
351 goto out_free;
352 if (res > 0) {
353 struct ec_framehdr *fh;
354 /* Poke in our control byte and
355 port number. Hack, hack. */
356 fh = (struct ec_framehdr *)skb->data;
357 fh->cb = cb;
358 fh->port = port;
359 if (sock->type != SOCK_DGRAM) {
360 skb_reset_tail_pointer(skb);
361 skb->len = 0;
362 }
363 }
364
365 /* Copy the data. Returns -EFAULT on error */
366 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
367 skb->protocol = proto;
368 skb->dev = dev;
369 skb->priority = sk->sk_priority;
370 if (err)
371 goto out_free;
372
373 err = -ENETDOWN;
374 if (!(dev->flags & IFF_UP))
375 goto out_free;
376
377 /*
378 * Now send it
379 */
380
381 dev_queue_xmit(skb);
382 dev_put(dev);
383 mutex_unlock(&econet_mutex);
384 return len;
385
386out_free:
387 kfree_skb(skb);
388out_unlock:
389 if (dev)
390 dev_put(dev);
391#else
392 err = -EPROTOTYPE;
393#endif
394 mutex_unlock(&econet_mutex);
395
396 return err;
397 }
398
399#ifdef CONFIG_ECONET_AUNUDP
400 /* AUN virtual Econet. */
401
402 if (udpsock == NULL) {
403 mutex_unlock(&econet_mutex);
404 return -ENETDOWN; /* No socket - can't send */
405 }
406
407 if (len > 32768) {
408 err = -E2BIG;
409 goto error;
410 }
411
412 /* Make up a UDP datagram and hand it off to some higher intellect. */
413
414 memset(&udpdest, 0, sizeof(udpdest));
415 udpdest.sin_family = AF_INET;
416 udpdest.sin_port = htons(AUN_PORT);
417
418 /* At the moment we use the stupid Acorn scheme of Econet address
419 y.x maps to IP a.b.c.x. This should be replaced with something
420 more flexible and more aware of subnet masks. */
421 {
422 struct in_device *idev;
423 unsigned long network = 0;
424
425 rcu_read_lock();
426 idev = __in_dev_get_rcu(dev);
427 if (idev) {
428 if (idev->ifa_list)
429 network = ntohl(idev->ifa_list->ifa_address) &
430 0xffffff00; /* !!! */
431 }
432 rcu_read_unlock();
433 udpdest.sin_addr.s_addr = htonl(network | addr.station);
434 }
435
436 memset(&ah, 0, sizeof(ah));
437 ah.port = port;
438 ah.cb = cb & 0x7f;
439 ah.code = 2; /* magic */
440
441 /* tack our header on the front of the iovec */
442 size = sizeof(struct aunhdr);
443 iov[0].iov_base = (void *)&ah;
444 iov[0].iov_len = size;
445
446 userbuf = vmalloc(len);
447 if (userbuf == NULL) {
448 err = -ENOMEM;
449 goto error;
450 }
451
452 iov[1].iov_base = userbuf;
453 iov[1].iov_len = len;
454 err = memcpy_fromiovec(userbuf, msg->msg_iov, len);
455 if (err)
456 goto error_free_buf;
457
458 /* Get a skbuff (no data, just holds our cb information) */
459 skb = sock_alloc_send_skb(sk, 0, msg->msg_flags & MSG_DONTWAIT, &err);
460 if (skb == NULL)
461 goto error_free_buf;
462
463 eb = (struct ec_cb *)&skb->cb;
464
465 eb->cookie = saddr->cookie;
466 eb->timeout = 5 * HZ;
467 eb->start = jiffies;
468 ah.handle = aun_seq;
469 eb->seq = (aun_seq++);
470 eb->sec = *saddr;
471
472 skb_queue_tail(&aun_queue, skb);
473
474 udpmsg.msg_name = (void *)&udpdest;
475 udpmsg.msg_namelen = sizeof(udpdest);
476 udpmsg.msg_iov = &iov[0];
477 udpmsg.msg_iovlen = 2;
478 udpmsg.msg_control = NULL;
479 udpmsg.msg_controllen = 0;
480 udpmsg.msg_flags = 0;
481
482 oldfs = get_fs();
483 set_fs(KERNEL_DS); /* More privs :-) */
484 err = sock_sendmsg(udpsock, &udpmsg, size);
485 set_fs(oldfs);
486
487error_free_buf:
488 vfree(userbuf);
489error:
490#else
491 err = -EPROTOTYPE;
492#endif
493 mutex_unlock(&econet_mutex);
494
495 return err;
496}
497
498/*
499 * Look up the address of a socket.
500 */
501
502static int econet_getname(struct socket *sock, struct sockaddr *uaddr,
503 int *uaddr_len, int peer)
504{
505 struct sock *sk;
506 struct econet_sock *eo;
507 struct sockaddr_ec *sec = (struct sockaddr_ec *)uaddr;
508
509 if (peer)
510 return -EOPNOTSUPP;
511
512 memset(sec, 0, sizeof(*sec));
513 mutex_lock(&econet_mutex);
514
515 sk = sock->sk;
516 eo = ec_sk(sk);
517
518 sec->sec_family = AF_ECONET;
519 sec->port = eo->port;
520 sec->addr.station = eo->station;
521 sec->addr.net = eo->net;
522
523 mutex_unlock(&econet_mutex);
524
525 *uaddr_len = sizeof(*sec);
526 return 0;
527}
528
529static void econet_destroy_timer(unsigned long data)
530{
531 struct sock *sk = (struct sock *)data;
532
533 if (!sk_has_allocations(sk)) {
534 sk_free(sk);
535 return;
536 }
537
538 sk->sk_timer.expires = jiffies + 10 * HZ;
539 add_timer(&sk->sk_timer);
540 pr_debug("econet: socket destroy delayed\n");
541}
542
543/*
544 * Close an econet socket.
545 */
546
547static int econet_release(struct socket *sock)
548{
549 struct sock *sk;
550
551 mutex_lock(&econet_mutex);
552
553 sk = sock->sk;
554 if (!sk)
555 goto out_unlock;
556
557 econet_remove_socket(&econet_sklist, sk);
558
559 /*
560 * Now the socket is dead. No more input will appear.
561 */
562
563 sk->sk_state_change(sk); /* It is useless. Just for sanity. */
564
565 sock_orphan(sk);
566
567 /* Purge queues */
568
569 skb_queue_purge(&sk->sk_receive_queue);
570
571 if (sk_has_allocations(sk)) {
572 sk->sk_timer.data = (unsigned long)sk;
573 sk->sk_timer.expires = jiffies + HZ;
574 sk->sk_timer.function = econet_destroy_timer;
575 add_timer(&sk->sk_timer);
576
577 goto out_unlock;
578 }
579
580 sk_free(sk);
581
582out_unlock:
583 mutex_unlock(&econet_mutex);
584 return 0;
585}
586
587static struct proto econet_proto = {
588 .name = "ECONET",
589 .owner = THIS_MODULE,
590 .obj_size = sizeof(struct econet_sock),
591};
592
593/*
594 * Create an Econet socket
595 */
596
597static int econet_create(struct net *net, struct socket *sock, int protocol,
598 int kern)
599{
600 struct sock *sk;
601 struct econet_sock *eo;
602 int err;
603
604 if (!net_eq(net, &init_net))
605 return -EAFNOSUPPORT;
606
607 /* Econet only provides datagram services. */
608 if (sock->type != SOCK_DGRAM)
609 return -ESOCKTNOSUPPORT;
610
611 sock->state = SS_UNCONNECTED;
612
613 err = -ENOBUFS;
614 sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto);
615 if (sk == NULL)
616 goto out;
617
618 sk->sk_reuse = 1;
619 sock->ops = &econet_ops;
620 sock_init_data(sock, sk);
621
622 eo = ec_sk(sk);
623 sock_reset_flag(sk, SOCK_ZAPPED);
624 sk->sk_family = PF_ECONET;
625 eo->num = protocol;
626
627 econet_insert_socket(&econet_sklist, sk);
628 return 0;
629out:
630 return err;
631}
632
633/*
634 * Handle Econet specific ioctls
635 */
636
637static int ec_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg)
638{
639 struct ifreq ifr;
640 struct ec_device *edev;
641 struct net_device *dev;
642 struct sockaddr_ec *sec;
643 int err;
644
645 /*
646 * Fetch the caller's info block into kernel space
647 */
648
649 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
650 return -EFAULT;
651
652 dev = dev_get_by_name(&init_net, ifr.ifr_name);
653 if (dev == NULL)
654 return -ENODEV;
655
656 sec = (struct sockaddr_ec *)&ifr.ifr_addr;
657
658 mutex_lock(&econet_mutex);
659
660 err = 0;
661 switch (cmd) {
662 case SIOCSIFADDR:
663 if (!capable(CAP_NET_ADMIN)) {
664 err = -EPERM;
665 break;
666 }
667
668 edev = dev->ec_ptr;
669 if (edev == NULL) {
670 /* Magic up a new one. */
671 edev = kzalloc(sizeof(struct ec_device), GFP_KERNEL);
672 if (edev == NULL) {
673 err = -ENOMEM;
674 break;
675 }
676 dev->ec_ptr = edev;
677 } else
678 net2dev_map[edev->net] = NULL;
679 edev->station = sec->addr.station;
680 edev->net = sec->addr.net;
681 net2dev_map[sec->addr.net] = dev;
682 if (!net2dev_map[0])
683 net2dev_map[0] = dev;
684 break;
685
686 case SIOCGIFADDR:
687 edev = dev->ec_ptr;
688 if (edev == NULL) {
689 err = -ENODEV;
690 break;
691 }
692 memset(sec, 0, sizeof(struct sockaddr_ec));
693 sec->addr.station = edev->station;
694 sec->addr.net = edev->net;
695 sec->sec_family = AF_ECONET;
696 dev_put(dev);
697 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
698 err = -EFAULT;
699 break;
700
701 default:
702 err = -EINVAL;
703 break;
704 }
705
706 mutex_unlock(&econet_mutex);
707
708 dev_put(dev);
709
710 return err;
711}
712
713/*
714 * Handle generic ioctls
715 */
716
717static int econet_ioctl(struct socket *sock, unsigned int cmd,
718 unsigned long arg)
719{
720 struct sock *sk = sock->sk;
721 void __user *argp = (void __user *)arg;
722
723 switch (cmd) {
724 case SIOCGSTAMP:
725 return sock_get_timestamp(sk, argp);
726
727 case SIOCGSTAMPNS:
728 return sock_get_timestampns(sk, argp);
729
730 case SIOCSIFADDR:
731 case SIOCGIFADDR:
732 return ec_dev_ioctl(sock, cmd, argp);
733
734 }
735
736 return -ENOIOCTLCMD;
737}
738
739static const struct net_proto_family econet_family_ops = {
740 .family = PF_ECONET,
741 .create = econet_create,
742 .owner = THIS_MODULE,
743};
744
745static const struct proto_ops econet_ops = {
746 .family = PF_ECONET,
747 .owner = THIS_MODULE,
748 .release = econet_release,
749 .bind = econet_bind,
750 .connect = sock_no_connect,
751 .socketpair = sock_no_socketpair,
752 .accept = sock_no_accept,
753 .getname = econet_getname,
754 .poll = datagram_poll,
755 .ioctl = econet_ioctl,
756 .listen = sock_no_listen,
757 .shutdown = sock_no_shutdown,
758 .setsockopt = sock_no_setsockopt,
759 .getsockopt = sock_no_getsockopt,
760 .sendmsg = econet_sendmsg,
761 .recvmsg = econet_recvmsg,
762 .mmap = sock_no_mmap,
763 .sendpage = sock_no_sendpage,
764};
765
766#if defined(CONFIG_ECONET_AUNUDP) || defined(CONFIG_ECONET_NATIVE)
767/*
768 * Find the listening socket, if any, for the given data.
769 */
770
771static struct sock *ec_listening_socket(unsigned char port, unsigned char
772 station, unsigned char net)
773{
774 struct sock *sk;
775 struct hlist_node *node;
776
777 spin_lock(&econet_lock);
778 sk_for_each(sk, node, &econet_sklist) {
779 struct econet_sock *opt = ec_sk(sk);
780 if ((opt->port == port || opt->port == 0) &&
781 (opt->station == station || opt->station == 0) &&
782 (opt->net == net || opt->net == 0)) {
783 sock_hold(sk);
784 goto found;
785 }
786 }
787 sk = NULL;
788found:
789 spin_unlock(&econet_lock);
790 return sk;
791}
792
793/*
794 * Queue a received packet for a socket.
795 */
796
797static int ec_queue_packet(struct sock *sk, struct sk_buff *skb,
798 unsigned char stn, unsigned char net,
799 unsigned char cb, unsigned char port)
800{
801 struct ec_cb *eb = (struct ec_cb *)&skb->cb;
802 struct sockaddr_ec *sec = (struct sockaddr_ec *)&eb->sec;
803
804 memset(sec, 0, sizeof(struct sockaddr_ec));
805 sec->sec_family = AF_ECONET;
806 sec->type = ECTYPE_PACKET_RECEIVED;
807 sec->port = port;
808 sec->cb = cb;
809 sec->addr.net = net;
810 sec->addr.station = stn;
811
812 return sock_queue_rcv_skb(sk, skb);
813}
814#endif
815
816#ifdef CONFIG_ECONET_AUNUDP
817/*
818 * Send an AUN protocol response.
819 */
820
821static void aun_send_response(__u32 addr, unsigned long seq, int code, int cb)
822{
823 struct sockaddr_in sin = {
824 .sin_family = AF_INET,
825 .sin_port = htons(AUN_PORT),
826 .sin_addr = {.s_addr = addr}
827 };
828 struct aunhdr ah = {.code = code, .cb = cb, .handle = seq};
829 struct kvec iov = {.iov_base = (void *)&ah, .iov_len = sizeof(ah)};
830 struct msghdr udpmsg;
831
832 udpmsg.msg_name = (void *)&sin;
833 udpmsg.msg_namelen = sizeof(sin);
834 udpmsg.msg_control = NULL;
835 udpmsg.msg_controllen = 0;
836 udpmsg.msg_flags = 0;
837
838 kernel_sendmsg(udpsock, &udpmsg, &iov, 1, sizeof(ah));
839}
840
841
842/*
843 * Handle incoming AUN packets. Work out if anybody wants them,
844 * and send positive or negative acknowledgements as appropriate.
845 */
846
847static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
848{
849 struct iphdr *ip = ip_hdr(skb);
850 unsigned char stn = ntohl(ip->saddr) & 0xff;
851 struct dst_entry *dst = skb_dst(skb);
852 struct ec_device *edev = NULL;
853 struct sock *sk = NULL;
854 struct sk_buff *newskb;
855
856 if (dst)
857 edev = dst->dev->ec_ptr;
858
859 if (!edev)
860 goto bad;
861
862 sk = ec_listening_socket(ah->port, stn, edev->net);
863 if (sk == NULL)
864 goto bad; /* Nobody wants it */
865
866 newskb = alloc_skb((len - sizeof(struct aunhdr) + 15) & ~15,
867 GFP_ATOMIC);
868 if (newskb == NULL) {
869 pr_debug("AUN: memory squeeze, dropping packet\n");
870 /* Send nack and hope sender tries again */
871 goto bad;
872 }
873
874 memcpy(skb_put(newskb, len - sizeof(struct aunhdr)), (void *)(ah + 1),
875 len - sizeof(struct aunhdr));
876
877 if (ec_queue_packet(sk, newskb, stn, edev->net, ah->cb, ah->port)) {
878 /* Socket is bankrupt. */
879 kfree_skb(newskb);
880 goto bad;
881 }
882
883 aun_send_response(ip->saddr, ah->handle, 3, 0);
884 sock_put(sk);
885 return;
886
887bad:
888 aun_send_response(ip->saddr, ah->handle, 4, 0);
889 if (sk)
890 sock_put(sk);
891}
892
893/*
894 * Handle incoming AUN transmit acknowledgements. If the sequence
895 * number matches something in our backlog then kill it and tell
896 * the user. If the remote took too long to reply then we may have
897 * dropped the packet already.
898 */
899
900static void aun_tx_ack(unsigned long seq, int result)
901{
902 struct sk_buff *skb;
903 unsigned long flags;
904 struct ec_cb *eb;
905
906 spin_lock_irqsave(&aun_queue_lock, flags);
907 skb_queue_walk(&aun_queue, skb) {
908 eb = (struct ec_cb *)&skb->cb;
909 if (eb->seq == seq)
910 goto foundit;
911 }
912 spin_unlock_irqrestore(&aun_queue_lock, flags);
913 pr_debug("AUN: unknown sequence %ld\n", seq);
914 return;
915
916foundit:
917 tx_result(skb->sk, eb->cookie, result);
918 skb_unlink(skb, &aun_queue);
919 spin_unlock_irqrestore(&aun_queue_lock, flags);
920 kfree_skb(skb);
921}
922
923/*
924 * Deal with received AUN frames - sort out what type of thing it is
925 * and hand it to the right function.
926 */
927
928static void aun_data_available(struct sock *sk, int slen)
929{
930 int err;
931 struct sk_buff *skb;
932 unsigned char *data;
933 struct aunhdr *ah;
934 size_t len;
935
936 while ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) {
937 if (err == -EAGAIN) {
938 pr_err("AUN: no data available?!\n");
939 return;
940 }
941 pr_debug("AUN: recvfrom() error %d\n", -err);
942 }
943
944 data = skb_transport_header(skb) + sizeof(struct udphdr);
945 ah = (struct aunhdr *)data;
946 len = skb->len - sizeof(struct udphdr);
947
948 switch (ah->code) {
949 case 2:
950 aun_incoming(skb, ah, len);
951 break;
952 case 3:
953 aun_tx_ack(ah->handle, ECTYPE_TRANSMIT_OK);
954 break;
955 case 4:
956 aun_tx_ack(ah->handle, ECTYPE_TRANSMIT_NOT_LISTENING);
957 break;
958 default:
959 pr_debug("AUN: unknown packet type: %d\n", data[0]);
960 }
961
962 skb_free_datagram(sk, skb);
963}
964
965/*
966 * Called by the timer to manage the AUN transmit queue. If a packet
967 * was sent to a dead or nonexistent host then we will never get an
968 * acknowledgement back. After a few seconds we need to spot this and
969 * drop the packet.
970 */
971
972static void ab_cleanup(unsigned long h)
973{
974 struct sk_buff *skb, *n;
975 unsigned long flags;
976
977 spin_lock_irqsave(&aun_queue_lock, flags);
978 skb_queue_walk_safe(&aun_queue, skb, n) {
979 struct ec_cb *eb = (struct ec_cb *)&skb->cb;
980 if ((jiffies - eb->start) > eb->timeout) {
981 tx_result(skb->sk, eb->cookie,
982 ECTYPE_TRANSMIT_NOT_PRESENT);
983 skb_unlink(skb, &aun_queue);
984 kfree_skb(skb);
985 }
986 }
987 spin_unlock_irqrestore(&aun_queue_lock, flags);
988
989 mod_timer(&ab_cleanup_timer, jiffies + (HZ * 2));
990}
991
992static int __init aun_udp_initialise(void)
993{
994 int error;
995 struct sockaddr_in sin;
996
997 skb_queue_head_init(&aun_queue);
998 setup_timer(&ab_cleanup_timer, ab_cleanup, 0);
999 ab_cleanup_timer.expires = jiffies + (HZ * 2);
1000 add_timer(&ab_cleanup_timer);
1001
1002 memset(&sin, 0, sizeof(sin));
1003 sin.sin_port = htons(AUN_PORT);
1004
1005 /* We can count ourselves lucky Acorn machines are too dim to
1006 speak IPv6. :-) */
1007 error = sock_create_kern(PF_INET, SOCK_DGRAM, 0, &udpsock);
1008 if (error < 0) {
1009 pr_err("AUN: socket error %d\n", -error);
1010 return error;
1011 }
1012
1013 udpsock->sk->sk_reuse = 1;
1014 udpsock->sk->sk_allocation = GFP_ATOMIC; /* we're going to call it
1015 from interrupts */
1016
1017 error = udpsock->ops->bind(udpsock, (struct sockaddr *)&sin,
1018 sizeof(sin));
1019 if (error < 0) {
1020 pr_err("AUN: bind error %d\n", -error);
1021 goto release;
1022 }
1023
1024 udpsock->sk->sk_data_ready = aun_data_available;
1025
1026 return 0;
1027
1028release:
1029 sock_release(udpsock);
1030 udpsock = NULL;
1031 return error;
1032}
1033#endif
1034
1035#ifdef CONFIG_ECONET_NATIVE
1036
1037/*
1038 * Receive an Econet frame from a device.
1039 */
1040
1041static int econet_rcv(struct sk_buff *skb, struct net_device *dev,
1042 struct packet_type *pt, struct net_device *orig_dev)
1043{
1044 struct ec_framehdr *hdr;
1045 struct sock *sk = NULL;
1046 struct ec_device *edev = dev->ec_ptr;
1047
1048 if (!net_eq(dev_net(dev), &init_net))
1049 goto drop;
1050
1051 if (skb->pkt_type == PACKET_OTHERHOST)
1052 goto drop;
1053
1054 if (!edev)
1055 goto drop;
1056
1057 skb = skb_share_check(skb, GFP_ATOMIC);
1058 if (skb == NULL)
1059 return NET_RX_DROP;
1060
1061 if (!pskb_may_pull(skb, sizeof(struct ec_framehdr)))
1062 goto drop;
1063
1064 hdr = (struct ec_framehdr *)skb->data;
1065
1066 /* First check for encapsulated IP */
1067 if (hdr->port == EC_PORT_IP) {
1068 skb->protocol = htons(ETH_P_IP);
1069 skb_pull(skb, sizeof(struct ec_framehdr));
1070 netif_rx(skb);
1071 return NET_RX_SUCCESS;
1072 }
1073
1074 sk = ec_listening_socket(hdr->port, hdr->src_stn, hdr->src_net);
1075 if (!sk)
1076 goto drop;
1077
1078 if (ec_queue_packet(sk, skb, edev->net, hdr->src_stn, hdr->cb,
1079 hdr->port))
1080 goto drop;
1081 sock_put(sk);
1082 return NET_RX_SUCCESS;
1083
1084drop:
1085 if (sk)
1086 sock_put(sk);
1087 kfree_skb(skb);
1088 return NET_RX_DROP;
1089}
1090
1091static struct packet_type econet_packet_type __read_mostly = {
1092 .type = cpu_to_be16(ETH_P_ECONET),
1093 .func = econet_rcv,
1094};
1095
1096static void econet_hw_initialise(void)
1097{
1098 dev_add_pack(&econet_packet_type);
1099}
1100
1101#endif
1102
1103static int econet_notifier(struct notifier_block *this, unsigned long msg,
1104 void *data)
1105{
1106 struct net_device *dev = data;
1107 struct ec_device *edev;
1108
1109 if (!net_eq(dev_net(dev), &init_net))
1110 return NOTIFY_DONE;
1111
1112 switch (msg) {
1113 case NETDEV_UNREGISTER:
1114 /* A device has gone down - kill any data we hold for it. */
1115 edev = dev->ec_ptr;
1116 if (edev) {
1117 if (net2dev_map[0] == dev)
1118 net2dev_map[0] = NULL;
1119 net2dev_map[edev->net] = NULL;
1120 kfree(edev);
1121 dev->ec_ptr = NULL;
1122 }
1123 break;
1124 }
1125
1126 return NOTIFY_DONE;
1127}
1128
1129static struct notifier_block econet_netdev_notifier = {
1130 .notifier_call = econet_notifier,
1131};
1132
1133static void __exit econet_proto_exit(void)
1134{
1135#ifdef CONFIG_ECONET_AUNUDP
1136 del_timer(&ab_cleanup_timer);
1137 if (udpsock)
1138 sock_release(udpsock);
1139#endif
1140 unregister_netdevice_notifier(&econet_netdev_notifier);
1141#ifdef CONFIG_ECONET_NATIVE
1142 dev_remove_pack(&econet_packet_type);
1143#endif
1144 sock_unregister(econet_family_ops.family);
1145 proto_unregister(&econet_proto);
1146}
1147
1148static int __init econet_proto_init(void)
1149{
1150 int err = proto_register(&econet_proto, 0);
1151
1152 if (err != 0)
1153 goto out;
1154 sock_register(&econet_family_ops);
1155#ifdef CONFIG_ECONET_AUNUDP
1156 aun_udp_initialise();
1157#endif
1158#ifdef CONFIG_ECONET_NATIVE
1159 econet_hw_initialise();
1160#endif
1161 register_netdevice_notifier(&econet_netdev_notifier);
1162out:
1163 return err;
1164}
1165
1166module_init(econet_proto_init);
1167module_exit(econet_proto_exit);
1168
1169MODULE_LICENSE("GPL");
1170MODULE_ALIAS_NETPROTO(PF_ECONET);
diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c
new file mode 100644
index 00000000000..85d574addbc
--- /dev/null
+++ b/net/ethernet/pe2.c
@@ -0,0 +1,37 @@
1#include <linux/in.h>
2#include <linux/mm.h>
3#include <linux/module.h>
4#include <linux/netdevice.h>
5#include <linux/skbuff.h>
6#include <linux/slab.h>
7
8#include <net/datalink.h>
9
10static int pEII_request(struct datalink_proto *dl,
11 struct sk_buff *skb, unsigned char *dest_node)
12{
13 struct net_device *dev = skb->dev;
14
15 skb->protocol = htons(ETH_P_IPX);
16 dev_hard_header(skb, dev, ETH_P_IPX, dest_node, NULL, skb->len);
17 return dev_queue_xmit(skb);
18}
19
20struct datalink_proto *make_EII_client(void)
21{
22 struct datalink_proto *proto = kmalloc(sizeof(*proto), GFP_ATOMIC);
23
24 if (proto) {
25 proto->header_length = 0;
26 proto->request = pEII_request;
27 }
28
29 return proto;
30}
31EXPORT_SYMBOL(make_EII_client);
32
33void destroy_EII_client(struct datalink_proto *dl)
34{
35 kfree(dl);
36}
37EXPORT_SYMBOL(destroy_EII_client);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
new file mode 100644
index 00000000000..e59aabd0eae
--- /dev/null
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -0,0 +1,637 @@
1/*
2 * This is a module which is used for queueing IPv4 packets and
3 * communicating with userspace via netlink.
4 *
5 * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
6 * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/init.h>
15#include <linux/ip.h>
16#include <linux/notifier.h>
17#include <linux/netdevice.h>
18#include <linux/netfilter.h>
19#include <linux/netfilter_ipv4/ip_queue.h>
20#include <linux/netfilter_ipv4/ip_tables.h>
21#include <linux/netlink.h>
22#include <linux/spinlock.h>
23#include <linux/sysctl.h>
24#include <linux/proc_fs.h>
25#include <linux/seq_file.h>
26#include <linux/security.h>
27#include <linux/net.h>
28#include <linux/mutex.h>
29#include <linux/slab.h>
30#include <net/net_namespace.h>
31#include <net/sock.h>
32#include <net/route.h>
33#include <net/netfilter/nf_queue.h>
34#include <net/ip.h>
35
36#define IPQ_QMAX_DEFAULT 1024
37#define IPQ_PROC_FS_NAME "ip_queue"
38#define NET_IPQ_QMAX 2088
39#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
40
41typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
42
43static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
44static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
45static DEFINE_SPINLOCK(queue_lock);
46static int peer_pid __read_mostly;
47static unsigned int copy_range __read_mostly;
48static unsigned int queue_total;
49static unsigned int queue_dropped = 0;
50static unsigned int queue_user_dropped = 0;
51static struct sock *ipqnl __read_mostly;
52static LIST_HEAD(queue_list);
53static DEFINE_MUTEX(ipqnl_mutex);
54
55static inline void
56__ipq_enqueue_entry(struct nf_queue_entry *entry)
57{
58 list_add_tail(&entry->list, &queue_list);
59 queue_total++;
60}
61
62static inline int
63__ipq_set_mode(unsigned char mode, unsigned int range)
64{
65 int status = 0;
66
67 switch(mode) {
68 case IPQ_COPY_NONE:
69 case IPQ_COPY_META:
70 copy_mode = mode;
71 copy_range = 0;
72 break;
73
74 case IPQ_COPY_PACKET:
75 if (range > 0xFFFF)
76 range = 0xFFFF;
77 copy_range = range;
78 copy_mode = mode;
79 break;
80
81 default:
82 status = -EINVAL;
83
84 }
85 return status;
86}
87
88static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
89
90static inline void
91__ipq_reset(void)
92{
93 peer_pid = 0;
94 net_disable_timestamp();
95 __ipq_set_mode(IPQ_COPY_NONE, 0);
96 __ipq_flush(NULL, 0);
97}
98
99static struct nf_queue_entry *
100ipq_find_dequeue_entry(unsigned long id)
101{
102 struct nf_queue_entry *entry = NULL, *i;
103
104 spin_lock_bh(&queue_lock);
105
106 list_for_each_entry(i, &queue_list, list) {
107 if ((unsigned long)i == id) {
108 entry = i;
109 break;
110 }
111 }
112
113 if (entry) {
114 list_del(&entry->list);
115 queue_total--;
116 }
117
118 spin_unlock_bh(&queue_lock);
119 return entry;
120}
121
122static void
123__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
124{
125 struct nf_queue_entry *entry, *next;
126
127 list_for_each_entry_safe(entry, next, &queue_list, list) {
128 if (!cmpfn || cmpfn(entry, data)) {
129 list_del(&entry->list);
130 queue_total--;
131 nf_reinject(entry, NF_DROP);
132 }
133 }
134}
135
136static void
137ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
138{
139 spin_lock_bh(&queue_lock);
140 __ipq_flush(cmpfn, data);
141 spin_unlock_bh(&queue_lock);
142}
143
144static struct sk_buff *
145ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
146{
147 sk_buff_data_t old_tail;
148 size_t size = 0;
149 size_t data_len = 0;
150 struct sk_buff *skb;
151 struct ipq_packet_msg *pmsg;
152 struct nlmsghdr *nlh;
153 struct timeval tv;
154
155 switch (ACCESS_ONCE(copy_mode)) {
156 case IPQ_COPY_META:
157 case IPQ_COPY_NONE:
158 size = NLMSG_SPACE(sizeof(*pmsg));
159 break;
160
161 case IPQ_COPY_PACKET:
162 if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
163 (*errp = skb_checksum_help(entry->skb)))
164 return NULL;
165
166 data_len = ACCESS_ONCE(copy_range);
167 if (data_len == 0 || data_len > entry->skb->len)
168 data_len = entry->skb->len;
169
170 size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
171 break;
172
173 default:
174 *errp = -EINVAL;
175 return NULL;
176 }
177
178 skb = alloc_skb(size, GFP_ATOMIC);
179 if (!skb)
180 goto nlmsg_failure;
181
182 old_tail = skb->tail;
183 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
184 pmsg = NLMSG_DATA(nlh);
185 memset(pmsg, 0, sizeof(*pmsg));
186
187 pmsg->packet_id = (unsigned long )entry;
188 pmsg->data_len = data_len;
189 tv = ktime_to_timeval(entry->skb->tstamp);
190 pmsg->timestamp_sec = tv.tv_sec;
191 pmsg->timestamp_usec = tv.tv_usec;
192 pmsg->mark = entry->skb->mark;
193 pmsg->hook = entry->hook;
194 pmsg->hw_protocol = entry->skb->protocol;
195
196 if (entry->indev)
197 strcpy(pmsg->indev_name, entry->indev->name);
198 else
199 pmsg->indev_name[0] = '\0';
200
201 if (entry->outdev)
202 strcpy(pmsg->outdev_name, entry->outdev->name);
203 else
204 pmsg->outdev_name[0] = '\0';
205
206 if (entry->indev && entry->skb->dev &&
207 entry->skb->mac_header != entry->skb->network_header) {
208 pmsg->hw_type = entry->skb->dev->type;
209 pmsg->hw_addrlen = dev_parse_header(entry->skb,
210 pmsg->hw_addr);
211 }
212
213 if (data_len)
214 if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
215 BUG();
216
217 nlh->nlmsg_len = skb->tail - old_tail;
218 return skb;
219
220nlmsg_failure:
221 kfree_skb(skb);
222 *errp = -EINVAL;
223 printk(KERN_ERR "ip_queue: error creating packet message\n");
224 return NULL;
225}
226
227static int
228ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
229{
230 int status = -EINVAL;
231 struct sk_buff *nskb;
232
233 if (copy_mode == IPQ_COPY_NONE)
234 return -EAGAIN;
235
236 nskb = ipq_build_packet_message(entry, &status);
237 if (nskb == NULL)
238 return status;
239
240 spin_lock_bh(&queue_lock);
241
242 if (!peer_pid)
243 goto err_out_free_nskb;
244
245 if (queue_total >= queue_maxlen) {
246 queue_dropped++;
247 status = -ENOSPC;
248 if (net_ratelimit())
249 printk (KERN_WARNING "ip_queue: full at %d entries, "
250 "dropping packets(s). Dropped: %d\n", queue_total,
251 queue_dropped);
252 goto err_out_free_nskb;
253 }
254
255 /* netlink_unicast will either free the nskb or attach it to a socket */
256 status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
257 if (status < 0) {
258 queue_user_dropped++;
259 goto err_out_unlock;
260 }
261
262 __ipq_enqueue_entry(entry);
263
264 spin_unlock_bh(&queue_lock);
265 return status;
266
267err_out_free_nskb:
268 kfree_skb(nskb);
269
270err_out_unlock:
271 spin_unlock_bh(&queue_lock);
272 return status;
273}
274
275static int
276ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
277{
278 int diff;
279 struct iphdr *user_iph = (struct iphdr *)v->payload;
280 struct sk_buff *nskb;
281
282 if (v->data_len < sizeof(*user_iph))
283 return 0;
284 diff = v->data_len - e->skb->len;
285 if (diff < 0) {
286 if (pskb_trim(e->skb, v->data_len))
287 return -ENOMEM;
288 } else if (diff > 0) {
289 if (v->data_len > 0xFFFF)
290 return -EINVAL;
291 if (diff > skb_tailroom(e->skb)) {
292 nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
293 diff, GFP_ATOMIC);
294 if (!nskb) {
295 printk(KERN_WARNING "ip_queue: error "
296 "in mangle, dropping packet\n");
297 return -ENOMEM;
298 }
299 kfree_skb(e->skb);
300 e->skb = nskb;
301 }
302 skb_put(e->skb, diff);
303 }
304 if (!skb_make_writable(e->skb, v->data_len))
305 return -ENOMEM;
306 skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
307 e->skb->ip_summed = CHECKSUM_NONE;
308
309 return 0;
310}
311
312static int
313ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
314{
315 struct nf_queue_entry *entry;
316
317 if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
318 return -EINVAL;
319
320 entry = ipq_find_dequeue_entry(vmsg->id);
321 if (entry == NULL)
322 return -ENOENT;
323 else {
324 int verdict = vmsg->value;
325
326 if (vmsg->data_len && vmsg->data_len == len)
327 if (ipq_mangle_ipv4(vmsg, entry) < 0)
328 verdict = NF_DROP;
329
330 nf_reinject(entry, verdict);
331 return 0;
332 }
333}
334
335static int
336ipq_set_mode(unsigned char mode, unsigned int range)
337{
338 int status;
339
340 spin_lock_bh(&queue_lock);
341 status = __ipq_set_mode(mode, range);
342 spin_unlock_bh(&queue_lock);
343 return status;
344}
345
346static int
347ipq_receive_peer(struct ipq_peer_msg *pmsg,
348 unsigned char type, unsigned int len)
349{
350 int status = 0;
351
352 if (len < sizeof(*pmsg))
353 return -EINVAL;
354
355 switch (type) {
356 case IPQM_MODE:
357 status = ipq_set_mode(pmsg->msg.mode.value,
358 pmsg->msg.mode.range);
359 break;
360
361 case IPQM_VERDICT:
362 status = ipq_set_verdict(&pmsg->msg.verdict,
363 len - sizeof(*pmsg));
364 break;
365 default:
366 status = -EINVAL;
367 }
368 return status;
369}
370
371static int
372dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
373{
374 if (entry->indev)
375 if (entry->indev->ifindex == ifindex)
376 return 1;
377 if (entry->outdev)
378 if (entry->outdev->ifindex == ifindex)
379 return 1;
380#ifdef CONFIG_BRIDGE_NETFILTER
381 if (entry->skb->nf_bridge) {
382 if (entry->skb->nf_bridge->physindev &&
383 entry->skb->nf_bridge->physindev->ifindex == ifindex)
384 return 1;
385 if (entry->skb->nf_bridge->physoutdev &&
386 entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
387 return 1;
388 }
389#endif
390 return 0;
391}
392
393static void
394ipq_dev_drop(int ifindex)
395{
396 ipq_flush(dev_cmp, ifindex);
397}
398
399#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
400
401static inline void
402__ipq_rcv_skb(struct sk_buff *skb)
403{
404 int status, type, pid, flags;
405 unsigned int nlmsglen, skblen;
406 struct nlmsghdr *nlh;
407
408 skblen = skb->len;
409 if (skblen < sizeof(*nlh))
410 return;
411
412 nlh = nlmsg_hdr(skb);
413 nlmsglen = nlh->nlmsg_len;
414 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
415 return;
416
417 pid = nlh->nlmsg_pid;
418 flags = nlh->nlmsg_flags;
419
420 if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
421 RCV_SKB_FAIL(-EINVAL);
422
423 if (flags & MSG_TRUNC)
424 RCV_SKB_FAIL(-ECOMM);
425
426 type = nlh->nlmsg_type;
427 if (type < NLMSG_NOOP || type >= IPQM_MAX)
428 RCV_SKB_FAIL(-EINVAL);
429
430 if (type <= IPQM_BASE)
431 return;
432
433 if (security_netlink_recv(skb, CAP_NET_ADMIN))
434 RCV_SKB_FAIL(-EPERM);
435
436 spin_lock_bh(&queue_lock);
437
438 if (peer_pid) {
439 if (peer_pid != pid) {
440 spin_unlock_bh(&queue_lock);
441 RCV_SKB_FAIL(-EBUSY);
442 }
443 } else {
444 net_enable_timestamp();
445 peer_pid = pid;
446 }
447
448 spin_unlock_bh(&queue_lock);
449
450 status = ipq_receive_peer(NLMSG_DATA(nlh), type,
451 nlmsglen - NLMSG_LENGTH(0));
452 if (status < 0)
453 RCV_SKB_FAIL(status);
454
455 if (flags & NLM_F_ACK)
456 netlink_ack(skb, nlh, 0);
457}
458
459static void
460ipq_rcv_skb(struct sk_buff *skb)
461{
462 mutex_lock(&ipqnl_mutex);
463 __ipq_rcv_skb(skb);
464 mutex_unlock(&ipqnl_mutex);
465}
466
467static int
468ipq_rcv_dev_event(struct notifier_block *this,
469 unsigned long event, void *ptr)
470{
471 struct net_device *dev = ptr;
472
473 if (!net_eq(dev_net(dev), &init_net))
474 return NOTIFY_DONE;
475
476 /* Drop any packets associated with the downed device */
477 if (event == NETDEV_DOWN)
478 ipq_dev_drop(dev->ifindex);
479 return NOTIFY_DONE;
480}
481
482static struct notifier_block ipq_dev_notifier = {
483 .notifier_call = ipq_rcv_dev_event,
484};
485
486static int
487ipq_rcv_nl_event(struct notifier_block *this,
488 unsigned long event, void *ptr)
489{
490 struct netlink_notify *n = ptr;
491
492 if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
493 spin_lock_bh(&queue_lock);
494 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
495 __ipq_reset();
496 spin_unlock_bh(&queue_lock);
497 }
498 return NOTIFY_DONE;
499}
500
501static struct notifier_block ipq_nl_notifier = {
502 .notifier_call = ipq_rcv_nl_event,
503};
504
505#ifdef CONFIG_SYSCTL
506static struct ctl_table_header *ipq_sysctl_header;
507
508static ctl_table ipq_table[] = {
509 {
510 .procname = NET_IPQ_QMAX_NAME,
511 .data = &queue_maxlen,
512 .maxlen = sizeof(queue_maxlen),
513 .mode = 0644,
514 .proc_handler = proc_dointvec
515 },
516 { }
517};
518#endif
519
520#ifdef CONFIG_PROC_FS
521static int ip_queue_show(struct seq_file *m, void *v)
522{
523 spin_lock_bh(&queue_lock);
524
525 seq_printf(m,
526 "Peer PID : %d\n"
527 "Copy mode : %hu\n"
528 "Copy range : %u\n"
529 "Queue length : %u\n"
530 "Queue max. length : %u\n"
531 "Queue dropped : %u\n"
532 "Netlink dropped : %u\n",
533 peer_pid,
534 copy_mode,
535 copy_range,
536 queue_total,
537 queue_maxlen,
538 queue_dropped,
539 queue_user_dropped);
540
541 spin_unlock_bh(&queue_lock);
542 return 0;
543}
544
545static int ip_queue_open(struct inode *inode, struct file *file)
546{
547 return single_open(file, ip_queue_show, NULL);
548}
549
550static const struct file_operations ip_queue_proc_fops = {
551 .open = ip_queue_open,
552 .read = seq_read,
553 .llseek = seq_lseek,
554 .release = single_release,
555 .owner = THIS_MODULE,
556};
557#endif
558
559static const struct nf_queue_handler nfqh = {
560 .name = "ip_queue",
561 .outfn = &ipq_enqueue_packet,
562};
563
564static int __init ip_queue_init(void)
565{
566 int status = -ENOMEM;
567 struct proc_dir_entry *proc __maybe_unused;
568
569 netlink_register_notifier(&ipq_nl_notifier);
570 ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
571 ipq_rcv_skb, NULL, THIS_MODULE);
572 if (ipqnl == NULL) {
573 printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
574 goto cleanup_netlink_notifier;
575 }
576
577#ifdef CONFIG_PROC_FS
578 proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
579 &ip_queue_proc_fops);
580 if (!proc) {
581 printk(KERN_ERR "ip_queue: failed to create proc entry\n");
582 goto cleanup_ipqnl;
583 }
584#endif
585 register_netdevice_notifier(&ipq_dev_notifier);
586#ifdef CONFIG_SYSCTL
587 ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
588#endif
589 status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
590 if (status < 0) {
591 printk(KERN_ERR "ip_queue: failed to register queue handler\n");
592 goto cleanup_sysctl;
593 }
594 return status;
595
596cleanup_sysctl:
597#ifdef CONFIG_SYSCTL
598 unregister_sysctl_table(ipq_sysctl_header);
599#endif
600 unregister_netdevice_notifier(&ipq_dev_notifier);
601 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
602cleanup_ipqnl: __maybe_unused
603 netlink_kernel_release(ipqnl);
604 mutex_lock(&ipqnl_mutex);
605 mutex_unlock(&ipqnl_mutex);
606
607cleanup_netlink_notifier:
608 netlink_unregister_notifier(&ipq_nl_notifier);
609 return status;
610}
611
612static void __exit ip_queue_fini(void)
613{
614 nf_unregister_queue_handlers(&nfqh);
615
616 ipq_flush(NULL, 0);
617
618#ifdef CONFIG_SYSCTL
619 unregister_sysctl_table(ipq_sysctl_header);
620#endif
621 unregister_netdevice_notifier(&ipq_dev_notifier);
622 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
623
624 netlink_kernel_release(ipqnl);
625 mutex_lock(&ipqnl_mutex);
626 mutex_unlock(&ipqnl_mutex);
627
628 netlink_unregister_notifier(&ipq_nl_notifier);
629}
630
631MODULE_DESCRIPTION("IPv4 packet queue handler");
632MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
633MODULE_LICENSE("GPL");
634MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL);
635
636module_init(ip_queue_init);
637module_exit(ip_queue_fini);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
new file mode 100644
index 00000000000..d76d6c9ed94
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -0,0 +1,516 @@
1/*
2 * This is a module which is used for logging packets.
3 */
4
5/* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h>
14#include <linux/spinlock.h>
15#include <linux/skbuff.h>
16#include <linux/if_arp.h>
17#include <linux/ip.h>
18#include <net/icmp.h>
19#include <net/udp.h>
20#include <net/tcp.h>
21#include <net/route.h>
22
23#include <linux/netfilter.h>
24#include <linux/netfilter/x_tables.h>
25#include <linux/netfilter_ipv4/ipt_LOG.h>
26#include <net/netfilter/nf_log.h>
27#include <net/netfilter/xt_log.h>
28
29MODULE_LICENSE("GPL");
30MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
31MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog");
32
33/* One level of recursion won't kill us */
34static void dump_packet(struct sbuff *m,
35 const struct nf_loginfo *info,
36 const struct sk_buff *skb,
37 unsigned int iphoff)
38{
39 struct iphdr _iph;
40 const struct iphdr *ih;
41 unsigned int logflags;
42
43 if (info->type == NF_LOG_TYPE_LOG)
44 logflags = info->u.log.logflags;
45 else
46 logflags = NF_LOG_MASK;
47
48 ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
49 if (ih == NULL) {
50 sb_add(m, "TRUNCATED");
51 return;
52 }
53
54 /* Important fields:
55 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
56 /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
57 sb_add(m, "SRC=%pI4 DST=%pI4 ",
58 &ih->saddr, &ih->daddr);
59
60 /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
61 sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
62 ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
63 ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
64
65 /* Max length: 6 "CE DF MF " */
66 if (ntohs(ih->frag_off) & IP_CE)
67 sb_add(m, "CE ");
68 if (ntohs(ih->frag_off) & IP_DF)
69 sb_add(m, "DF ");
70 if (ntohs(ih->frag_off) & IP_MF)
71 sb_add(m, "MF ");
72
73 /* Max length: 11 "FRAG:65535 " */
74 if (ntohs(ih->frag_off) & IP_OFFSET)
75 sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
76
77 if ((logflags & IPT_LOG_IPOPT) &&
78 ih->ihl * 4 > sizeof(struct iphdr)) {
79 const unsigned char *op;
80 unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
81 unsigned int i, optsize;
82
83 optsize = ih->ihl * 4 - sizeof(struct iphdr);
84 op = skb_header_pointer(skb, iphoff+sizeof(_iph),
85 optsize, _opt);
86 if (op == NULL) {
87 sb_add(m, "TRUNCATED");
88 return;
89 }
90
91 /* Max length: 127 "OPT (" 15*4*2chars ") " */
92 sb_add(m, "OPT (");
93 for (i = 0; i < optsize; i++)
94 sb_add(m, "%02X", op[i]);
95 sb_add(m, ") ");
96 }
97
98 switch (ih->protocol) {
99 case IPPROTO_TCP: {
100 struct tcphdr _tcph;
101 const struct tcphdr *th;
102
103 /* Max length: 10 "PROTO=TCP " */
104 sb_add(m, "PROTO=TCP ");
105
106 if (ntohs(ih->frag_off) & IP_OFFSET)
107 break;
108
109 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
110 th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
111 sizeof(_tcph), &_tcph);
112 if (th == NULL) {
113 sb_add(m, "INCOMPLETE [%u bytes] ",
114 skb->len - iphoff - ih->ihl*4);
115 break;
116 }
117
118 /* Max length: 20 "SPT=65535 DPT=65535 " */
119 sb_add(m, "SPT=%u DPT=%u ",
120 ntohs(th->source), ntohs(th->dest));
121 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
122 if (logflags & IPT_LOG_TCPSEQ)
123 sb_add(m, "SEQ=%u ACK=%u ",
124 ntohl(th->seq), ntohl(th->ack_seq));
125 /* Max length: 13 "WINDOW=65535 " */
126 sb_add(m, "WINDOW=%u ", ntohs(th->window));
127 /* Max length: 9 "RES=0x3F " */
128 sb_add(m, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
129 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
130 if (th->cwr)
131 sb_add(m, "CWR ");
132 if (th->ece)
133 sb_add(m, "ECE ");
134 if (th->urg)
135 sb_add(m, "URG ");
136 if (th->ack)
137 sb_add(m, "ACK ");
138 if (th->psh)
139 sb_add(m, "PSH ");
140 if (th->rst)
141 sb_add(m, "RST ");
142 if (th->syn)
143 sb_add(m, "SYN ");
144 if (th->fin)
145 sb_add(m, "FIN ");
146 /* Max length: 11 "URGP=65535 " */
147 sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
148
149 if ((logflags & IPT_LOG_TCPOPT) &&
150 th->doff * 4 > sizeof(struct tcphdr)) {
151 unsigned char _opt[4 * 15 - sizeof(struct tcphdr)];
152 const unsigned char *op;
153 unsigned int i, optsize;
154
155 optsize = th->doff * 4 - sizeof(struct tcphdr);
156 op = skb_header_pointer(skb,
157 iphoff+ih->ihl*4+sizeof(_tcph),
158 optsize, _opt);
159 if (op == NULL) {
160 sb_add(m, "TRUNCATED");
161 return;
162 }
163
164 /* Max length: 127 "OPT (" 15*4*2chars ") " */
165 sb_add(m, "OPT (");
166 for (i = 0; i < optsize; i++)
167 sb_add(m, "%02X", op[i]);
168 sb_add(m, ") ");
169 }
170 break;
171 }
172 case IPPROTO_UDP:
173 case IPPROTO_UDPLITE: {
174 struct udphdr _udph;
175 const struct udphdr *uh;
176
177 if (ih->protocol == IPPROTO_UDP)
178 /* Max length: 10 "PROTO=UDP " */
179 sb_add(m, "PROTO=UDP " );
180 else /* Max length: 14 "PROTO=UDPLITE " */
181 sb_add(m, "PROTO=UDPLITE ");
182
183 if (ntohs(ih->frag_off) & IP_OFFSET)
184 break;
185
186 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
187 uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
188 sizeof(_udph), &_udph);
189 if (uh == NULL) {
190 sb_add(m, "INCOMPLETE [%u bytes] ",
191 skb->len - iphoff - ih->ihl*4);
192 break;
193 }
194
195 /* Max length: 20 "SPT=65535 DPT=65535 " */
196 sb_add(m, "SPT=%u DPT=%u LEN=%u ",
197 ntohs(uh->source), ntohs(uh->dest),
198 ntohs(uh->len));
199 break;
200 }
201 case IPPROTO_ICMP: {
202 struct icmphdr _icmph;
203 const struct icmphdr *ich;
204 static const size_t required_len[NR_ICMP_TYPES+1]
205 = { [ICMP_ECHOREPLY] = 4,
206 [ICMP_DEST_UNREACH]
207 = 8 + sizeof(struct iphdr),
208 [ICMP_SOURCE_QUENCH]
209 = 8 + sizeof(struct iphdr),
210 [ICMP_REDIRECT]
211 = 8 + sizeof(struct iphdr),
212 [ICMP_ECHO] = 4,
213 [ICMP_TIME_EXCEEDED]
214 = 8 + sizeof(struct iphdr),
215 [ICMP_PARAMETERPROB]
216 = 8 + sizeof(struct iphdr),
217 [ICMP_TIMESTAMP] = 20,
218 [ICMP_TIMESTAMPREPLY] = 20,
219 [ICMP_ADDRESS] = 12,
220 [ICMP_ADDRESSREPLY] = 12 };
221
222 /* Max length: 11 "PROTO=ICMP " */
223 sb_add(m, "PROTO=ICMP ");
224
225 if (ntohs(ih->frag_off) & IP_OFFSET)
226 break;
227
228 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
229 ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
230 sizeof(_icmph), &_icmph);
231 if (ich == NULL) {
232 sb_add(m, "INCOMPLETE [%u bytes] ",
233 skb->len - iphoff - ih->ihl*4);
234 break;
235 }
236
237 /* Max length: 18 "TYPE=255 CODE=255 " */
238 sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
239
240 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
241 if (ich->type <= NR_ICMP_TYPES &&
242 required_len[ich->type] &&
243 skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
244 sb_add(m, "INCOMPLETE [%u bytes] ",
245 skb->len - iphoff - ih->ihl*4);
246 break;
247 }
248
249 switch (ich->type) {
250 case ICMP_ECHOREPLY:
251 case ICMP_ECHO:
252 /* Max length: 19 "ID=65535 SEQ=65535 " */
253 sb_add(m, "ID=%u SEQ=%u ",
254 ntohs(ich->un.echo.id),
255 ntohs(ich->un.echo.sequence));
256 break;
257
258 case ICMP_PARAMETERPROB:
259 /* Max length: 14 "PARAMETER=255 " */
260 sb_add(m, "PARAMETER=%u ",
261 ntohl(ich->un.gateway) >> 24);
262 break;
263 case ICMP_REDIRECT:
264 /* Max length: 24 "GATEWAY=255.255.255.255 " */
265 sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
266 /* Fall through */
267 case ICMP_DEST_UNREACH:
268 case ICMP_SOURCE_QUENCH:
269 case ICMP_TIME_EXCEEDED:
270 /* Max length: 3+maxlen */
271 if (!iphoff) { /* Only recurse once. */
272 sb_add(m, "[");
273 dump_packet(m, info, skb,
274 iphoff + ih->ihl*4+sizeof(_icmph));
275 sb_add(m, "] ");
276 }
277
278 /* Max length: 10 "MTU=65535 " */
279 if (ich->type == ICMP_DEST_UNREACH &&
280 ich->code == ICMP_FRAG_NEEDED)
281 sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu));
282 }
283 break;
284 }
285 /* Max Length */
286 case IPPROTO_AH: {
287 struct ip_auth_hdr _ahdr;
288 const struct ip_auth_hdr *ah;
289
290 if (ntohs(ih->frag_off) & IP_OFFSET)
291 break;
292
293 /* Max length: 9 "PROTO=AH " */
294 sb_add(m, "PROTO=AH ");
295
296 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
297 ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
298 sizeof(_ahdr), &_ahdr);
299 if (ah == NULL) {
300 sb_add(m, "INCOMPLETE [%u bytes] ",
301 skb->len - iphoff - ih->ihl*4);
302 break;
303 }
304
305 /* Length: 15 "SPI=0xF1234567 " */
306 sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
307 break;
308 }
309 case IPPROTO_ESP: {
310 struct ip_esp_hdr _esph;
311 const struct ip_esp_hdr *eh;
312
313 /* Max length: 10 "PROTO=ESP " */
314 sb_add(m, "PROTO=ESP ");
315
316 if (ntohs(ih->frag_off) & IP_OFFSET)
317 break;
318
319 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
320 eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
321 sizeof(_esph), &_esph);
322 if (eh == NULL) {
323 sb_add(m, "INCOMPLETE [%u bytes] ",
324 skb->len - iphoff - ih->ihl*4);
325 break;
326 }
327
328 /* Length: 15 "SPI=0xF1234567 " */
329 sb_add(m, "SPI=0x%x ", ntohl(eh->spi));
330 break;
331 }
332 /* Max length: 10 "PROTO 255 " */
333 default:
334 sb_add(m, "PROTO=%u ", ih->protocol);
335 }
336
337 /* Max length: 15 "UID=4294967295 " */
338 if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
339 read_lock_bh(&skb->sk->sk_callback_lock);
340 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
341 sb_add(m, "UID=%u GID=%u ",
342 skb->sk->sk_socket->file->f_cred->fsuid,
343 skb->sk->sk_socket->file->f_cred->fsgid);
344 read_unlock_bh(&skb->sk->sk_callback_lock);
345 }
346
347 /* Max length: 16 "MARK=0xFFFFFFFF " */
348 if (!iphoff && skb->mark)
349 sb_add(m, "MARK=0x%x ", skb->mark);
350
351 /* Proto Max log string length */
352 /* IP: 40+46+6+11+127 = 230 */
353 /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
354 /* UDP: 10+max(25,20) = 35 */
355 /* UDPLITE: 14+max(25,20) = 39 */
356 /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
357 /* ESP: 10+max(25)+15 = 50 */
358 /* AH: 9+max(25)+15 = 49 */
359 /* unknown: 10 */
360
361 /* (ICMP allows recursion one level deep) */
362 /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */
363 /* maxlen = 230+ 91 + 230 + 252 = 803 */
364}
365
366static void dump_mac_header(struct sbuff *m,
367 const struct nf_loginfo *info,
368 const struct sk_buff *skb)
369{
370 struct net_device *dev = skb->dev;
371 unsigned int logflags = 0;
372
373 if (info->type == NF_LOG_TYPE_LOG)
374 logflags = info->u.log.logflags;
375
376 if (!(logflags & IPT_LOG_MACDECODE))
377 goto fallback;
378
379 switch (dev->type) {
380 case ARPHRD_ETHER:
381 sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
382 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
383 ntohs(eth_hdr(skb)->h_proto));
384 return;
385 default:
386 break;
387 }
388
389fallback:
390 sb_add(m, "MAC=");
391 if (dev->hard_header_len &&
392 skb->mac_header != skb->network_header) {
393 const unsigned char *p = skb_mac_header(skb);
394 unsigned int i;
395
396 sb_add(m, "%02x", *p++);
397 for (i = 1; i < dev->hard_header_len; i++, p++)
398 sb_add(m, ":%02x", *p);
399 }
400 sb_add(m, " ");
401}
402
403static struct nf_loginfo default_loginfo = {
404 .type = NF_LOG_TYPE_LOG,
405 .u = {
406 .log = {
407 .level = 5,
408 .logflags = NF_LOG_MASK,
409 },
410 },
411};
412
413static void
414ipt_log_packet(u_int8_t pf,
415 unsigned int hooknum,
416 const struct sk_buff *skb,
417 const struct net_device *in,
418 const struct net_device *out,
419 const struct nf_loginfo *loginfo,
420 const char *prefix)
421{
422 struct sbuff *m = sb_open();
423
424 if (!loginfo)
425 loginfo = &default_loginfo;
426
427 sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
428 prefix,
429 in ? in->name : "",
430 out ? out->name : "");
431#ifdef CONFIG_BRIDGE_NETFILTER
432 if (skb->nf_bridge) {
433 const struct net_device *physindev;
434 const struct net_device *physoutdev;
435
436 physindev = skb->nf_bridge->physindev;
437 if (physindev && in != physindev)
438 sb_add(m, "PHYSIN=%s ", physindev->name);
439 physoutdev = skb->nf_bridge->physoutdev;
440 if (physoutdev && out != physoutdev)
441 sb_add(m, "PHYSOUT=%s ", physoutdev->name);
442 }
443#endif
444
445 if (in != NULL)
446 dump_mac_header(m, loginfo, skb);
447
448 dump_packet(m, loginfo, skb, 0);
449
450 sb_close(m);
451}
452
453static unsigned int
454log_tg(struct sk_buff *skb, const struct xt_action_param *par)
455{
456 const struct ipt_log_info *loginfo = par->targinfo;
457 struct nf_loginfo li;
458
459 li.type = NF_LOG_TYPE_LOG;
460 li.u.log.level = loginfo->level;
461 li.u.log.logflags = loginfo->logflags;
462
463 ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li,
464 loginfo->prefix);
465 return XT_CONTINUE;
466}
467
468static int log_tg_check(const struct xt_tgchk_param *par)
469{
470 const struct ipt_log_info *loginfo = par->targinfo;
471
472 if (loginfo->level >= 8) {
473 pr_debug("level %u >= 8\n", loginfo->level);
474 return -EINVAL;
475 }
476 if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
477 pr_debug("prefix is not null-terminated\n");
478 return -EINVAL;
479 }
480 return 0;
481}
482
483static struct xt_target log_tg_reg __read_mostly = {
484 .name = "LOG",
485 .family = NFPROTO_IPV4,
486 .target = log_tg,
487 .targetsize = sizeof(struct ipt_log_info),
488 .checkentry = log_tg_check,
489 .me = THIS_MODULE,
490};
491
492static struct nf_logger ipt_log_logger __read_mostly = {
493 .name = "ipt_LOG",
494 .logfn = &ipt_log_packet,
495 .me = THIS_MODULE,
496};
497
498static int __init log_tg_init(void)
499{
500 int ret;
501
502 ret = xt_register_target(&log_tg_reg);
503 if (ret < 0)
504 return ret;
505 nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
506 return 0;
507}
508
509static void __exit log_tg_exit(void)
510{
511 nf_log_unregister(&ipt_log_logger);
512 xt_unregister_target(&log_tg_reg);
513}
514
515module_init(log_tg_init);
516module_exit(log_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
new file mode 100644
index 00000000000..6cdb298f103
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -0,0 +1,98 @@
1/* NETMAP - static NAT mapping of IP network addresses (1:1).
2 * The mapping can be applied to source (POSTROUTING),
3 * destination (PREROUTING), or both (with separate rules).
4 */
5
6/* (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/ip.h>
14#include <linux/module.h>
15#include <linux/netdevice.h>
16#include <linux/netfilter.h>
17#include <linux/netfilter_ipv4.h>
18#include <linux/netfilter/x_tables.h>
19#include <net/netfilter/nf_nat_rule.h>
20
21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
23MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
24
25static int netmap_tg_check(const struct xt_tgchk_param *par)
26{
27 const struct nf_nat_multi_range_compat *mr = par->targinfo;
28
29 if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
30 pr_debug("bad MAP_IPS.\n");
31 return -EINVAL;
32 }
33 if (mr->rangesize != 1) {
34 pr_debug("bad rangesize %u.\n", mr->rangesize);
35 return -EINVAL;
36 }
37 return 0;
38}
39
40static unsigned int
41netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
42{
43 struct nf_conn *ct;
44 enum ip_conntrack_info ctinfo;
45 __be32 new_ip, netmask;
46 const struct nf_nat_multi_range_compat *mr = par->targinfo;
47 struct nf_nat_range newrange;
48
49 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
50 par->hooknum == NF_INET_POST_ROUTING ||
51 par->hooknum == NF_INET_LOCAL_OUT ||
52 par->hooknum == NF_INET_LOCAL_IN);
53 ct = nf_ct_get(skb, &ctinfo);
54
55 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
56
57 if (par->hooknum == NF_INET_PRE_ROUTING ||
58 par->hooknum == NF_INET_LOCAL_OUT)
59 new_ip = ip_hdr(skb)->daddr & ~netmask;
60 else
61 new_ip = ip_hdr(skb)->saddr & ~netmask;
62 new_ip |= mr->range[0].min_ip & netmask;
63
64 newrange = ((struct nf_nat_range)
65 { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
66 new_ip, new_ip,
67 mr->range[0].min, mr->range[0].max });
68
69 /* Hand modified range to generic setup. */
70 return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
71}
72
73static struct xt_target netmap_tg_reg __read_mostly = {
74 .name = "NETMAP",
75 .family = NFPROTO_IPV4,
76 .target = netmap_tg,
77 .targetsize = sizeof(struct nf_nat_multi_range_compat),
78 .table = "nat",
79 .hooks = (1 << NF_INET_PRE_ROUTING) |
80 (1 << NF_INET_POST_ROUTING) |
81 (1 << NF_INET_LOCAL_OUT) |
82 (1 << NF_INET_LOCAL_IN),
83 .checkentry = netmap_tg_check,
84 .me = THIS_MODULE
85};
86
87static int __init netmap_tg_init(void)
88{
89 return xt_register_target(&netmap_tg_reg);
90}
91
92static void __exit netmap_tg_exit(void)
93{
94 xt_unregister_target(&netmap_tg_reg);
95}
96
97module_init(netmap_tg_init);
98module_exit(netmap_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
new file mode 100644
index 00000000000..18a0656505a
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -0,0 +1,110 @@
1/* Redirect. Simple mapping which alters dst to a local IP address. */
2/* (C) 1999-2001 Paul `Rusty' Russell
3 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/types.h>
11#include <linux/ip.h>
12#include <linux/timer.h>
13#include <linux/module.h>
14#include <linux/netfilter.h>
15#include <linux/netdevice.h>
16#include <linux/if.h>
17#include <linux/inetdevice.h>
18#include <net/protocol.h>
19#include <net/checksum.h>
20#include <linux/netfilter_ipv4.h>
21#include <linux/netfilter/x_tables.h>
22#include <net/netfilter/nf_nat_rule.h>
23
24MODULE_LICENSE("GPL");
25MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
26MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
27
28/* FIXME: Take multiple ranges --RR */
29static int redirect_tg_check(const struct xt_tgchk_param *par)
30{
31 const struct nf_nat_multi_range_compat *mr = par->targinfo;
32
33 if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
34 pr_debug("bad MAP_IPS.\n");
35 return -EINVAL;
36 }
37 if (mr->rangesize != 1) {
38 pr_debug("bad rangesize %u.\n", mr->rangesize);
39 return -EINVAL;
40 }
41 return 0;
42}
43
44static unsigned int
45redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
46{
47 struct nf_conn *ct;
48 enum ip_conntrack_info ctinfo;
49 __be32 newdst;
50 const struct nf_nat_multi_range_compat *mr = par->targinfo;
51 struct nf_nat_range newrange;
52
53 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
54 par->hooknum == NF_INET_LOCAL_OUT);
55
56 ct = nf_ct_get(skb, &ctinfo);
57 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
58
59 /* Local packets: make them go to loopback */
60 if (par->hooknum == NF_INET_LOCAL_OUT)
61 newdst = htonl(0x7F000001);
62 else {
63 struct in_device *indev;
64 struct in_ifaddr *ifa;
65
66 newdst = 0;
67
68 rcu_read_lock();
69 indev = __in_dev_get_rcu(skb->dev);
70 if (indev && (ifa = indev->ifa_list))
71 newdst = ifa->ifa_local;
72 rcu_read_unlock();
73
74 if (!newdst)
75 return NF_DROP;
76 }
77
78 /* Transfer from original range. */
79 newrange = ((struct nf_nat_range)
80 { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
81 newdst, newdst,
82 mr->range[0].min, mr->range[0].max });
83
84 /* Hand modified range to generic setup. */
85 return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST);
86}
87
88static struct xt_target redirect_tg_reg __read_mostly = {
89 .name = "REDIRECT",
90 .family = NFPROTO_IPV4,
91 .target = redirect_tg,
92 .targetsize = sizeof(struct nf_nat_multi_range_compat),
93 .table = "nat",
94 .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
95 .checkentry = redirect_tg_check,
96 .me = THIS_MODULE,
97};
98
99static int __init redirect_tg_init(void)
100{
101 return xt_register_target(&redirect_tg_reg);
102}
103
104static void __exit redirect_tg_exit(void)
105{
106 xt_unregister_target(&redirect_tg_reg);
107}
108
109module_init(redirect_tg_init);
110module_exit(redirect_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
new file mode 100644
index 00000000000..2b57e52c746
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -0,0 +1,127 @@
1/* IP tables module for matching the value of the IPv4 and TCP ECN bits
2 *
3 * (C) 2002 by Harald Welte <laforge@gnumonks.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/in.h>
11#include <linux/ip.h>
12#include <net/ip.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/tcp.h>
16
17#include <linux/netfilter/x_tables.h>
18#include <linux/netfilter_ipv4/ip_tables.h>
19#include <linux/netfilter_ipv4/ipt_ecn.h>
20
21MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
22MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
23MODULE_LICENSE("GPL");
24
25static inline bool match_ip(const struct sk_buff *skb,
26 const struct ipt_ecn_info *einfo)
27{
28 return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^
29 !!(einfo->invert & IPT_ECN_OP_MATCH_IP);
30}
31
32static inline bool match_tcp(const struct sk_buff *skb,
33 const struct ipt_ecn_info *einfo,
34 bool *hotdrop)
35{
36 struct tcphdr _tcph;
37 const struct tcphdr *th;
38
39 /* In practice, TCP match does this, so can't fail. But let's
40 * be good citizens.
41 */
42 th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
43 if (th == NULL) {
44 *hotdrop = false;
45 return false;
46 }
47
48 if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
49 if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
50 if (th->ece == 1)
51 return false;
52 } else {
53 if (th->ece == 0)
54 return false;
55 }
56 }
57
58 if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
59 if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
60 if (th->cwr == 1)
61 return false;
62 } else {
63 if (th->cwr == 0)
64 return false;
65 }
66 }
67
68 return true;
69}
70
71static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
72{
73 const struct ipt_ecn_info *info = par->matchinfo;
74
75 if (info->operation & IPT_ECN_OP_MATCH_IP)
76 if (!match_ip(skb, info))
77 return false;
78
79 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
80 if (!match_tcp(skb, info, &par->hotdrop))
81 return false;
82 }
83
84 return true;
85}
86
87static int ecn_mt_check(const struct xt_mtchk_param *par)
88{
89 const struct ipt_ecn_info *info = par->matchinfo;
90 const struct ipt_ip *ip = par->entryinfo;
91
92 if (info->operation & IPT_ECN_OP_MATCH_MASK)
93 return -EINVAL;
94
95 if (info->invert & IPT_ECN_OP_MATCH_MASK)
96 return -EINVAL;
97
98 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
99 (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
100 pr_info("cannot match TCP bits in rule for non-tcp packets\n");
101 return -EINVAL;
102 }
103
104 return 0;
105}
106
107static struct xt_match ecn_mt_reg __read_mostly = {
108 .name = "ecn",
109 .family = NFPROTO_IPV4,
110 .match = ecn_mt,
111 .matchsize = sizeof(struct ipt_ecn_info),
112 .checkentry = ecn_mt_check,
113 .me = THIS_MODULE,
114};
115
116static int __init ecn_mt_init(void)
117{
118 return xt_register_match(&ecn_mt_reg);
119}
120
121static void __exit ecn_mt_exit(void)
122{
123 xt_unregister_match(&ecn_mt_reg);
124}
125
126module_init(ecn_mt_init);
127module_exit(ecn_mt_exit);
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
new file mode 100644
index 00000000000..703f366fd23
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -0,0 +1,85 @@
1/* Amanda extension for TCP NAT alteration.
2 * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
3 * based on a copy of HW's ip_nat_irc.c as well as other modules
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 */
10
11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/udp.h>
15
16#include <net/netfilter/nf_nat_helper.h>
17#include <net/netfilter/nf_nat_rule.h>
18#include <net/netfilter/nf_conntrack_helper.h>
19#include <net/netfilter/nf_conntrack_expect.h>
20#include <linux/netfilter/nf_conntrack_amanda.h>
21
22MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
23MODULE_DESCRIPTION("Amanda NAT helper");
24MODULE_LICENSE("GPL");
25MODULE_ALIAS("ip_nat_amanda");
26
27static unsigned int help(struct sk_buff *skb,
28 enum ip_conntrack_info ctinfo,
29 unsigned int matchoff,
30 unsigned int matchlen,
31 struct nf_conntrack_expect *exp)
32{
33 char buffer[sizeof("65535")];
34 u_int16_t port;
35 unsigned int ret;
36
37 /* Connection comes from client. */
38 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
39 exp->dir = IP_CT_DIR_ORIGINAL;
40
41 /* When you see the packet, we need to NAT it the same as the
42 * this one (ie. same IP: it will be TCP and master is UDP). */
43 exp->expectfn = nf_nat_follow_master;
44
45 /* Try to get same port: if not, try to change it. */
46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
47 int res;
48
49 exp->tuple.dst.u.tcp.port = htons(port);
50 res = nf_ct_expect_related(exp);
51 if (res == 0)
52 break;
53 else if (res != -EBUSY) {
54 port = 0;
55 break;
56 }
57 }
58
59 if (port == 0)
60 return NF_DROP;
61
62 sprintf(buffer, "%u", port);
63 ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
64 matchoff, matchlen,
65 buffer, strlen(buffer));
66 if (ret != NF_ACCEPT)
67 nf_ct_unexpect_related(exp);
68 return ret;
69}
70
71static void __exit nf_nat_amanda_fini(void)
72{
73 rcu_assign_pointer(nf_nat_amanda_hook, NULL);
74 synchronize_rcu();
75}
76
77static int __init nf_nat_amanda_init(void)
78{
79 BUG_ON(nf_nat_amanda_hook != NULL);
80 rcu_assign_pointer(nf_nat_amanda_hook, help);
81 return 0;
82}
83
84module_init(nf_nat_amanda_init);
85module_exit(nf_nat_amanda_fini);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
new file mode 100644
index 00000000000..3346de5d94d
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -0,0 +1,779 @@
1/* NAT for netfilter; shared with compatibility layer. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/types.h>
13#include <linux/timer.h>
14#include <linux/skbuff.h>
15#include <linux/gfp.h>
16#include <net/checksum.h>
17#include <net/icmp.h>
18#include <net/ip.h>
19#include <net/tcp.h> /* For tcp_prot in getorigdst */
20#include <linux/icmp.h>
21#include <linux/udp.h>
22#include <linux/jhash.h>
23
24#include <linux/netfilter_ipv4.h>
25#include <net/netfilter/nf_conntrack.h>
26#include <net/netfilter/nf_conntrack_core.h>
27#include <net/netfilter/nf_nat.h>
28#include <net/netfilter/nf_nat_protocol.h>
29#include <net/netfilter/nf_nat_core.h>
30#include <net/netfilter/nf_nat_helper.h>
31#include <net/netfilter/nf_conntrack_helper.h>
32#include <net/netfilter/nf_conntrack_l3proto.h>
33#include <net/netfilter/nf_conntrack_l4proto.h>
34#include <net/netfilter/nf_conntrack_zones.h>
35
36static DEFINE_SPINLOCK(nf_nat_lock);
37
38static struct nf_conntrack_l3proto *l3proto __read_mostly;
39
40#define MAX_IP_NAT_PROTO 256
41static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO]
42 __read_mostly;
43
44static inline const struct nf_nat_protocol *
45__nf_nat_proto_find(u_int8_t protonum)
46{
47 return rcu_dereference(nf_nat_protos[protonum]);
48}
49
50/* We keep an extra hash for each conntrack, for fast searching. */
51static inline unsigned int
52hash_by_src(const struct net *net, u16 zone,
53 const struct nf_conntrack_tuple *tuple)
54{
55 unsigned int hash;
56
57 /* Original src, to ensure we map it consistently if poss. */
58 hash = jhash_3words((__force u32)tuple->src.u3.ip,
59 (__force u32)tuple->src.u.all ^ zone,
60 tuple->dst.protonum, 0);
61 return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
62}
63
64/* Is this tuple already taken? (not by us) */
65int
66nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
67 const struct nf_conn *ignored_conntrack)
68{
69 /* Conntrack tracking doesn't keep track of outgoing tuples; only
70 incoming ones. NAT means they don't have a fixed mapping,
71 so we invert the tuple and look for the incoming reply.
72
73 We could keep a separate hash if this proves too slow. */
74 struct nf_conntrack_tuple reply;
75
76 nf_ct_invert_tuplepr(&reply, tuple);
77 return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
78}
79EXPORT_SYMBOL(nf_nat_used_tuple);
80
81/* If we source map this tuple so reply looks like reply_tuple, will
82 * that meet the constraints of range. */
83static int
84in_range(const struct nf_conntrack_tuple *tuple,
85 const struct nf_nat_range *range)
86{
87 const struct nf_nat_protocol *proto;
88 int ret = 0;
89
90 /* If we are supposed to map IPs, then we must be in the
91 range specified, otherwise let this drag us onto a new src IP. */
92 if (range->flags & IP_NAT_RANGE_MAP_IPS) {
93 if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
94 ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
95 return 0;
96 }
97
98 rcu_read_lock();
99 proto = __nf_nat_proto_find(tuple->dst.protonum);
100 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
101 proto->in_range(tuple, IP_NAT_MANIP_SRC,
102 &range->min, &range->max))
103 ret = 1;
104 rcu_read_unlock();
105
106 return ret;
107}
108
109static inline int
110same_src(const struct nf_conn *ct,
111 const struct nf_conntrack_tuple *tuple)
112{
113 const struct nf_conntrack_tuple *t;
114
115 t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
116 return (t->dst.protonum == tuple->dst.protonum &&
117 t->src.u3.ip == tuple->src.u3.ip &&
118 t->src.u.all == tuple->src.u.all);
119}
120
121/* Only called for SRC manip */
122static int
123find_appropriate_src(struct net *net, u16 zone,
124 const struct nf_conntrack_tuple *tuple,
125 struct nf_conntrack_tuple *result,
126 const struct nf_nat_range *range)
127{
128 unsigned int h = hash_by_src(net, zone, tuple);
129 const struct nf_conn_nat *nat;
130 const struct nf_conn *ct;
131 const struct hlist_node *n;
132
133 rcu_read_lock();
134 hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
135 ct = nat->ct;
136 if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
137 /* Copy source part from reply tuple. */
138 nf_ct_invert_tuplepr(result,
139 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
140 result->dst = tuple->dst;
141
142 if (in_range(result, range)) {
143 rcu_read_unlock();
144 return 1;
145 }
146 }
147 }
148 rcu_read_unlock();
149 return 0;
150}
151
152/* For [FUTURE] fragmentation handling, we want the least-used
153 src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
154 if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
155 1-65535, we don't do pro-rata allocation based on ports; we choose
156 the ip with the lowest src-ip/dst-ip/proto usage.
157*/
158static void
159find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
160 const struct nf_nat_range *range,
161 const struct nf_conn *ct,
162 enum nf_nat_manip_type maniptype)
163{
164 __be32 *var_ipp;
165 /* Host order */
166 u_int32_t minip, maxip, j;
167
168 /* No IP mapping? Do nothing. */
169 if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
170 return;
171
172 if (maniptype == IP_NAT_MANIP_SRC)
173 var_ipp = &tuple->src.u3.ip;
174 else
175 var_ipp = &tuple->dst.u3.ip;
176
177 /* Fast path: only one choice. */
178 if (range->min_ip == range->max_ip) {
179 *var_ipp = range->min_ip;
180 return;
181 }
182
183 /* Hashing source and destination IPs gives a fairly even
184 * spread in practice (if there are a small number of IPs
185 * involved, there usually aren't that many connections
186 * anyway). The consistency means that servers see the same
187 * client coming from the same IP (some Internet Banking sites
188 * like this), even across reboots. */
189 minip = ntohl(range->min_ip);
190 maxip = ntohl(range->max_ip);
191 j = jhash_2words((__force u32)tuple->src.u3.ip,
192 range->flags & IP_NAT_RANGE_PERSISTENT ?
193 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
194 j = ((u64)j * (maxip - minip + 1)) >> 32;
195 *var_ipp = htonl(minip + j);
196}
197
198/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
199 * we change the source to map into the range. For NF_INET_PRE_ROUTING
200 * and NF_INET_LOCAL_OUT, we change the destination to map into the
201 * range. It might not be possible to get a unique tuple, but we try.
202 * At worst (or if we race), we will end up with a final duplicate in
203 * __ip_conntrack_confirm and drop the packet. */
204static void
205get_unique_tuple(struct nf_conntrack_tuple *tuple,
206 const struct nf_conntrack_tuple *orig_tuple,
207 const struct nf_nat_range *range,
208 struct nf_conn *ct,
209 enum nf_nat_manip_type maniptype)
210{
211 struct net *net = nf_ct_net(ct);
212 const struct nf_nat_protocol *proto;
213 u16 zone = nf_ct_zone(ct);
214
215 /* 1) If this srcip/proto/src-proto-part is currently mapped,
216 and that same mapping gives a unique tuple within the given
217 range, use that.
218
219 This is only required for source (ie. NAT/masq) mappings.
220 So far, we don't do local source mappings, so multiple
221 manips not an issue. */
222 if (maniptype == IP_NAT_MANIP_SRC &&
223 !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
224 /* try the original tuple first */
225 if (in_range(orig_tuple, range)) {
226 if (!nf_nat_used_tuple(orig_tuple, ct)) {
227 *tuple = *orig_tuple;
228 return;
229 }
230 } else if (find_appropriate_src(net, zone, orig_tuple, tuple,
231 range)) {
232 pr_debug("get_unique_tuple: Found current src map\n");
233 if (!nf_nat_used_tuple(tuple, ct))
234 return;
235 }
236 }
237
238 /* 2) Select the least-used IP/proto combination in the given
239 range. */
240 *tuple = *orig_tuple;
241 find_best_ips_proto(zone, tuple, range, ct, maniptype);
242
243 /* 3) The per-protocol part of the manip is made to map into
244 the range to make a unique tuple. */
245
246 rcu_read_lock();
247 proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
248
249 /* Only bother mapping if it's not already in range and unique */
250 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
251 if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) {
252 if (proto->in_range(tuple, maniptype, &range->min,
253 &range->max) &&
254 (range->min.all == range->max.all ||
255 !nf_nat_used_tuple(tuple, ct)))
256 goto out;
257 } else if (!nf_nat_used_tuple(tuple, ct)) {
258 goto out;
259 }
260 }
261
262 /* Last change: get protocol to try to obtain unique tuple. */
263 proto->unique_tuple(tuple, range, maniptype, ct);
264out:
265 rcu_read_unlock();
266}
267
268unsigned int
269nf_nat_setup_info(struct nf_conn *ct,
270 const struct nf_nat_range *range,
271 enum nf_nat_manip_type maniptype)
272{
273 struct net *net = nf_ct_net(ct);
274 struct nf_conntrack_tuple curr_tuple, new_tuple;
275 struct nf_conn_nat *nat;
276
277 /* nat helper or nfctnetlink also setup binding */
278 nat = nfct_nat(ct);
279 if (!nat) {
280 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
281 if (nat == NULL) {
282 pr_debug("failed to add NAT extension\n");
283 return NF_ACCEPT;
284 }
285 }
286
287 NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
288 maniptype == IP_NAT_MANIP_DST);
289 BUG_ON(nf_nat_initialized(ct, maniptype));
290
291 /* What we've got will look like inverse of reply. Normally
292 this is what is in the conntrack, except for prior
293 manipulations (future optimization: if num_manips == 0,
294 orig_tp =
295 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
296 nf_ct_invert_tuplepr(&curr_tuple,
297 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
298
299 get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
300
301 if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
302 struct nf_conntrack_tuple reply;
303
304 /* Alter conntrack table so will recognize replies. */
305 nf_ct_invert_tuplepr(&reply, &new_tuple);
306 nf_conntrack_alter_reply(ct, &reply);
307
308 /* Non-atomic: we own this at the moment. */
309 if (maniptype == IP_NAT_MANIP_SRC)
310 ct->status |= IPS_SRC_NAT;
311 else
312 ct->status |= IPS_DST_NAT;
313 }
314
315 if (maniptype == IP_NAT_MANIP_SRC) {
316 unsigned int srchash;
317
318 srchash = hash_by_src(net, nf_ct_zone(ct),
319 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
320 spin_lock_bh(&nf_nat_lock);
321 /* nf_conntrack_alter_reply might re-allocate exntension aera */
322 nat = nfct_nat(ct);
323 nat->ct = ct;
324 hlist_add_head_rcu(&nat->bysource,
325 &net->ipv4.nat_bysource[srchash]);
326 spin_unlock_bh(&nf_nat_lock);
327 }
328
329 /* It's done. */
330 if (maniptype == IP_NAT_MANIP_DST)
331 ct->status |= IPS_DST_NAT_DONE;
332 else
333 ct->status |= IPS_SRC_NAT_DONE;
334
335 return NF_ACCEPT;
336}
337EXPORT_SYMBOL(nf_nat_setup_info);
338
339/* Returns true if succeeded. */
340static bool
341manip_pkt(u_int16_t proto,
342 struct sk_buff *skb,
343 unsigned int iphdroff,
344 const struct nf_conntrack_tuple *target,
345 enum nf_nat_manip_type maniptype)
346{
347 struct iphdr *iph;
348 const struct nf_nat_protocol *p;
349
350 if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
351 return false;
352
353 iph = (void *)skb->data + iphdroff;
354
355 /* Manipulate protcol part. */
356
357 /* rcu_read_lock()ed by nf_hook_slow */
358 p = __nf_nat_proto_find(proto);
359 if (!p->manip_pkt(skb, iphdroff, target, maniptype))
360 return false;
361
362 iph = (void *)skb->data + iphdroff;
363
364 if (maniptype == IP_NAT_MANIP_SRC) {
365 csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
366 iph->saddr = target->src.u3.ip;
367 } else {
368 csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
369 iph->daddr = target->dst.u3.ip;
370 }
371 return true;
372}
373
374/* Do packet manipulations according to nf_nat_setup_info. */
375unsigned int nf_nat_packet(struct nf_conn *ct,
376 enum ip_conntrack_info ctinfo,
377 unsigned int hooknum,
378 struct sk_buff *skb)
379{
380 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
381 unsigned long statusbit;
382 enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
383
384 if (mtype == IP_NAT_MANIP_SRC)
385 statusbit = IPS_SRC_NAT;
386 else
387 statusbit = IPS_DST_NAT;
388
389 /* Invert if this is reply dir. */
390 if (dir == IP_CT_DIR_REPLY)
391 statusbit ^= IPS_NAT_MASK;
392
393 /* Non-atomic: these bits don't change. */
394 if (ct->status & statusbit) {
395 struct nf_conntrack_tuple target;
396
397 /* We are aiming to look like inverse of other direction. */
398 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
399
400 if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype))
401 return NF_DROP;
402 }
403 return NF_ACCEPT;
404}
405EXPORT_SYMBOL_GPL(nf_nat_packet);
406
407/* Dir is direction ICMP is coming from (opposite to packet it contains) */
408int nf_nat_icmp_reply_translation(struct nf_conn *ct,
409 enum ip_conntrack_info ctinfo,
410 unsigned int hooknum,
411 struct sk_buff *skb)
412{
413 struct {
414 struct icmphdr icmp;
415 struct iphdr ip;
416 } *inside;
417 const struct nf_conntrack_l4proto *l4proto;
418 struct nf_conntrack_tuple inner, target;
419 int hdrlen = ip_hdrlen(skb);
420 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
421 unsigned long statusbit;
422 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
423
424 if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
425 return 0;
426
427 inside = (void *)skb->data + hdrlen;
428
429 /* We're actually going to mangle it beyond trivial checksum
430 adjustment, so make sure the current checksum is correct. */
431 if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
432 return 0;
433
434 /* Must be RELATED */
435 NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
436 skb->nfctinfo == IP_CT_RELATED_REPLY);
437
438 /* Redirects on non-null nats must be dropped, else they'll
439 start talking to each other without our translation, and be
440 confused... --RR */
441 if (inside->icmp.type == ICMP_REDIRECT) {
442 /* If NAT isn't finished, assume it and drop. */
443 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
444 return 0;
445
446 if (ct->status & IPS_NAT_MASK)
447 return 0;
448 }
449
450 if (manip == IP_NAT_MANIP_SRC)
451 statusbit = IPS_SRC_NAT;
452 else
453 statusbit = IPS_DST_NAT;
454
455 /* Invert if this is reply dir. */
456 if (dir == IP_CT_DIR_REPLY)
457 statusbit ^= IPS_NAT_MASK;
458
459 if (!(ct->status & statusbit))
460 return 1;
461
462 pr_debug("icmp_reply_translation: translating error %p manip %u "
463 "dir %s\n", skb, manip,
464 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
465
466 /* rcu_read_lock()ed by nf_hook_slow */
467 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
468
469 if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr),
470 (hdrlen +
471 sizeof(struct icmphdr) + inside->ip.ihl * 4),
472 (u_int16_t)AF_INET, inside->ip.protocol,
473 &inner, l3proto, l4proto))
474 return 0;
475
476 /* Change inner back to look like incoming packet. We do the
477 opposite manip on this hook to normal, because it might not
478 pass all hooks (locally-generated ICMP). Consider incoming
479 packet: PREROUTING (DST manip), routing produces ICMP, goes
480 through POSTROUTING (which must correct the DST manip). */
481 if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp),
482 &ct->tuplehash[!dir].tuple, !manip))
483 return 0;
484
485 if (skb->ip_summed != CHECKSUM_PARTIAL) {
486 /* Reloading "inside" here since manip_pkt inner. */
487 inside = (void *)skb->data + hdrlen;
488 inside->icmp.checksum = 0;
489 inside->icmp.checksum =
490 csum_fold(skb_checksum(skb, hdrlen,
491 skb->len - hdrlen, 0));
492 }
493
494 /* Change outer to look the reply to an incoming packet
495 * (proto 0 means don't invert per-proto part). */
496 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
497 if (!manip_pkt(0, skb, 0, &target, manip))
498 return 0;
499
500 return 1;
501}
502EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
503
504/* Protocol registration. */
505int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
506{
507 int ret = 0;
508
509 spin_lock_bh(&nf_nat_lock);
510 if (rcu_dereference_protected(
511 nf_nat_protos[proto->protonum],
512 lockdep_is_held(&nf_nat_lock)
513 ) != &nf_nat_unknown_protocol) {
514 ret = -EBUSY;
515 goto out;
516 }
517 rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
518 out:
519 spin_unlock_bh(&nf_nat_lock);
520 return ret;
521}
522EXPORT_SYMBOL(nf_nat_protocol_register);
523
524/* No one stores the protocol anywhere; simply delete it. */
525void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
526{
527 spin_lock_bh(&nf_nat_lock);
528 rcu_assign_pointer(nf_nat_protos[proto->protonum],
529 &nf_nat_unknown_protocol);
530 spin_unlock_bh(&nf_nat_lock);
531 synchronize_rcu();
532}
533EXPORT_SYMBOL(nf_nat_protocol_unregister);
534
535/* No one using conntrack by the time this called. */
536static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
537{
538 struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
539
540 if (nat == NULL || nat->ct == NULL)
541 return;
542
543 NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
544
545 spin_lock_bh(&nf_nat_lock);
546 hlist_del_rcu(&nat->bysource);
547 spin_unlock_bh(&nf_nat_lock);
548}
549
550static void nf_nat_move_storage(void *new, void *old)
551{
552 struct nf_conn_nat *new_nat = new;
553 struct nf_conn_nat *old_nat = old;
554 struct nf_conn *ct = old_nat->ct;
555
556 if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
557 return;
558
559 spin_lock_bh(&nf_nat_lock);
560 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
561 spin_unlock_bh(&nf_nat_lock);
562}
563
564static struct nf_ct_ext_type nat_extend __read_mostly = {
565 .len = sizeof(struct nf_conn_nat),
566 .align = __alignof__(struct nf_conn_nat),
567 .destroy = nf_nat_cleanup_conntrack,
568 .move = nf_nat_move_storage,
569 .id = NF_CT_EXT_NAT,
570 .flags = NF_CT_EXT_F_PREALLOC,
571};
572
573#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
574
575#include <linux/netfilter/nfnetlink.h>
576#include <linux/netfilter/nfnetlink_conntrack.h>
577
578static const struct nf_nat_protocol *
579nf_nat_proto_find_get(u_int8_t protonum)
580{
581 const struct nf_nat_protocol *p;
582
583 rcu_read_lock();
584 p = __nf_nat_proto_find(protonum);
585 if (!try_module_get(p->me))
586 p = &nf_nat_unknown_protocol;
587 rcu_read_unlock();
588
589 return p;
590}
591
592static void
593nf_nat_proto_put(const struct nf_nat_protocol *p)
594{
595 module_put(p->me);
596}
597
598static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
599 [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
600 [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
601};
602
603static int nfnetlink_parse_nat_proto(struct nlattr *attr,
604 const struct nf_conn *ct,
605 struct nf_nat_range *range)
606{
607 struct nlattr *tb[CTA_PROTONAT_MAX+1];
608 const struct nf_nat_protocol *npt;
609 int err;
610
611 err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
612 if (err < 0)
613 return err;
614
615 npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
616 if (npt->nlattr_to_range)
617 err = npt->nlattr_to_range(tb, range);
618 nf_nat_proto_put(npt);
619 return err;
620}
621
622static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
623 [CTA_NAT_MINIP] = { .type = NLA_U32 },
624 [CTA_NAT_MAXIP] = { .type = NLA_U32 },
625};
626
627static int
628nfnetlink_parse_nat(const struct nlattr *nat,
629 const struct nf_conn *ct, struct nf_nat_range *range)
630{
631 struct nlattr *tb[CTA_NAT_MAX+1];
632 int err;
633
634 memset(range, 0, sizeof(*range));
635
636 err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
637 if (err < 0)
638 return err;
639
640 if (tb[CTA_NAT_MINIP])
641 range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
642
643 if (!tb[CTA_NAT_MAXIP])
644 range->max_ip = range->min_ip;
645 else
646 range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
647
648 if (range->min_ip)
649 range->flags |= IP_NAT_RANGE_MAP_IPS;
650
651 if (!tb[CTA_NAT_PROTO])
652 return 0;
653
654 err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
655 if (err < 0)
656 return err;
657
658 return 0;
659}
660
661static int
662nfnetlink_parse_nat_setup(struct nf_conn *ct,
663 enum nf_nat_manip_type manip,
664 const struct nlattr *attr)
665{
666 struct nf_nat_range range;
667
668 if (nfnetlink_parse_nat(attr, ct, &range) < 0)
669 return -EINVAL;
670 if (nf_nat_initialized(ct, manip))
671 return -EEXIST;
672
673 return nf_nat_setup_info(ct, &range, manip);
674}
675#else
676static int
677nfnetlink_parse_nat_setup(struct nf_conn *ct,
678 enum nf_nat_manip_type manip,
679 const struct nlattr *attr)
680{
681 return -EOPNOTSUPP;
682}
683#endif
684
685static int __net_init nf_nat_net_init(struct net *net)
686{
687 /* Leave them the same for the moment. */
688 net->ipv4.nat_htable_size = net->ct.htable_size;
689 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
690 if (!net->ipv4.nat_bysource)
691 return -ENOMEM;
692 return 0;
693}
694
695/* Clear NAT section of all conntracks, in case we're loaded again. */
696static int clean_nat(struct nf_conn *i, void *data)
697{
698 struct nf_conn_nat *nat = nfct_nat(i);
699
700 if (!nat)
701 return 0;
702 memset(nat, 0, sizeof(*nat));
703 i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
704 return 0;
705}
706
707static void __net_exit nf_nat_net_exit(struct net *net)
708{
709 nf_ct_iterate_cleanup(net, &clean_nat, NULL);
710 synchronize_rcu();
711 nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
712}
713
714static struct pernet_operations nf_nat_net_ops = {
715 .init = nf_nat_net_init,
716 .exit = nf_nat_net_exit,
717};
718
719static int __init nf_nat_init(void)
720{
721 size_t i;
722 int ret;
723
724 need_ipv4_conntrack();
725
726 ret = nf_ct_extend_register(&nat_extend);
727 if (ret < 0) {
728 printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
729 return ret;
730 }
731
732 ret = register_pernet_subsys(&nf_nat_net_ops);
733 if (ret < 0)
734 goto cleanup_extend;
735
736 /* Sew in builtin protocols. */
737 spin_lock_bh(&nf_nat_lock);
738 for (i = 0; i < MAX_IP_NAT_PROTO; i++)
739 rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
740 rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
741 rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
742 rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
743 spin_unlock_bh(&nf_nat_lock);
744
745 /* Initialize fake conntrack so that NAT will skip it */
746 nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
747
748 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
749
750 BUG_ON(nf_nat_seq_adjust_hook != NULL);
751 rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
752 BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
753 rcu_assign_pointer(nfnetlink_parse_nat_setup_hook,
754 nfnetlink_parse_nat_setup);
755 BUG_ON(nf_ct_nat_offset != NULL);
756 rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset);
757 return 0;
758
759 cleanup_extend:
760 nf_ct_extend_unregister(&nat_extend);
761 return ret;
762}
763
764static void __exit nf_nat_cleanup(void)
765{
766 unregister_pernet_subsys(&nf_nat_net_ops);
767 nf_ct_l3proto_put(l3proto);
768 nf_ct_extend_unregister(&nat_extend);
769 rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
770 rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL);
771 rcu_assign_pointer(nf_ct_nat_offset, NULL);
772 synchronize_net();
773}
774
775MODULE_LICENSE("GPL");
776MODULE_ALIAS("nf-nat-ipv4");
777
778module_init(nf_nat_init);
779module_exit(nf_nat_cleanup);
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
new file mode 100644
index 00000000000..dc73abb3fe2
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -0,0 +1,137 @@
1/* FTP extension for TCP NAT alteration. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/moduleparam.h>
13#include <linux/ip.h>
14#include <linux/tcp.h>
15#include <linux/netfilter_ipv4.h>
16#include <net/netfilter/nf_nat.h>
17#include <net/netfilter/nf_nat_helper.h>
18#include <net/netfilter/nf_nat_rule.h>
19#include <net/netfilter/nf_conntrack_helper.h>
20#include <net/netfilter/nf_conntrack_expect.h>
21#include <linux/netfilter/nf_conntrack_ftp.h>
22
23MODULE_LICENSE("GPL");
24MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
25MODULE_DESCRIPTION("ftp NAT helper");
26MODULE_ALIAS("ip_nat_ftp");
27
28/* FIXME: Time out? --RR */
29
30static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type,
31 char *buffer, size_t buflen,
32 __be32 addr, u16 port)
33{
34 switch (type) {
35 case NF_CT_FTP_PORT:
36 case NF_CT_FTP_PASV:
37 return snprintf(buffer, buflen, "%u,%u,%u,%u,%u,%u",
38 ((unsigned char *)&addr)[0],
39 ((unsigned char *)&addr)[1],
40 ((unsigned char *)&addr)[2],
41 ((unsigned char *)&addr)[3],
42 port >> 8,
43 port & 0xFF);
44 case NF_CT_FTP_EPRT:
45 return snprintf(buffer, buflen, "|1|%pI4|%u|", &addr, port);
46 case NF_CT_FTP_EPSV:
47 return snprintf(buffer, buflen, "|||%u|", port);
48 }
49
50 return 0;
51}
52
53/* So, this packet has hit the connection tracking matching code.
54 Mangle it, and change the expectation to match the new version. */
55static unsigned int nf_nat_ftp(struct sk_buff *skb,
56 enum ip_conntrack_info ctinfo,
57 enum nf_ct_ftp_type type,
58 unsigned int matchoff,
59 unsigned int matchlen,
60 struct nf_conntrack_expect *exp)
61{
62 __be32 newip;
63 u_int16_t port;
64 int dir = CTINFO2DIR(ctinfo);
65 struct nf_conn *ct = exp->master;
66 char buffer[sizeof("|1|255.255.255.255|65535|")];
67 unsigned int buflen;
68
69 pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
70
71 /* Connection will come from wherever this packet goes, hence !dir */
72 newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
73 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
74 exp->dir = !dir;
75
76 /* When you see the packet, we need to NAT it the same as the
77 * this one. */
78 exp->expectfn = nf_nat_follow_master;
79
80 /* Try to get same port: if not, try to change it. */
81 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
82 int ret;
83
84 exp->tuple.dst.u.tcp.port = htons(port);
85 ret = nf_ct_expect_related(exp);
86 if (ret == 0)
87 break;
88 else if (ret != -EBUSY) {
89 port = 0;
90 break;
91 }
92 }
93
94 if (port == 0)
95 return NF_DROP;
96
97 buflen = nf_nat_ftp_fmt_cmd(type, buffer, sizeof(buffer), newip, port);
98 if (!buflen)
99 goto out;
100
101 pr_debug("calling nf_nat_mangle_tcp_packet\n");
102
103 if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
104 matchlen, buffer, buflen))
105 goto out;
106
107 return NF_ACCEPT;
108
109out:
110 nf_ct_unexpect_related(exp);
111 return NF_DROP;
112}
113
114static void __exit nf_nat_ftp_fini(void)
115{
116 rcu_assign_pointer(nf_nat_ftp_hook, NULL);
117 synchronize_rcu();
118}
119
120static int __init nf_nat_ftp_init(void)
121{
122 BUG_ON(nf_nat_ftp_hook != NULL);
123 rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp);
124 return 0;
125}
126
127/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
128static int warn_set(const char *val, struct kernel_param *kp)
129{
130 printk(KERN_INFO KBUILD_MODNAME
131 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
132 return 0;
133}
134module_param_call(ports, warn_set, NULL, NULL, 0);
135
136module_init(nf_nat_ftp_init);
137module_exit(nf_nat_ftp_fini);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
new file mode 100644
index 00000000000..ebc5f8894f9
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -0,0 +1,451 @@
1/* ip_nat_helper.c - generic support functions for NAT helpers
2 *
3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
4 * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/module.h>
11#include <linux/gfp.h>
12#include <linux/kmod.h>
13#include <linux/types.h>
14#include <linux/timer.h>
15#include <linux/skbuff.h>
16#include <linux/tcp.h>
17#include <linux/udp.h>
18#include <net/checksum.h>
19#include <net/tcp.h>
20#include <net/route.h>
21
22#include <linux/netfilter_ipv4.h>
23#include <net/netfilter/nf_conntrack.h>
24#include <net/netfilter/nf_conntrack_helper.h>
25#include <net/netfilter/nf_conntrack_ecache.h>
26#include <net/netfilter/nf_conntrack_expect.h>
27#include <net/netfilter/nf_nat.h>
28#include <net/netfilter/nf_nat_protocol.h>
29#include <net/netfilter/nf_nat_core.h>
30#include <net/netfilter/nf_nat_helper.h>
31
32#define DUMP_OFFSET(x) \
33 pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
34 x->offset_before, x->offset_after, x->correction_pos);
35
36static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
37
38/* Setup TCP sequence correction given this change at this sequence */
39static inline void
40adjust_tcp_sequence(u32 seq,
41 int sizediff,
42 struct nf_conn *ct,
43 enum ip_conntrack_info ctinfo)
44{
45 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
46 struct nf_conn_nat *nat = nfct_nat(ct);
47 struct nf_nat_seq *this_way = &nat->seq[dir];
48
49 pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
50 seq, sizediff);
51
52 pr_debug("adjust_tcp_sequence: Seq_offset before: ");
53 DUMP_OFFSET(this_way);
54
55 spin_lock_bh(&nf_nat_seqofs_lock);
56
57 /* SYN adjust. If it's uninitialized, or this is after last
58 * correction, record it: we don't handle more than one
59 * adjustment in the window, but do deal with common case of a
60 * retransmit */
61 if (this_way->offset_before == this_way->offset_after ||
62 before(this_way->correction_pos, seq)) {
63 this_way->correction_pos = seq;
64 this_way->offset_before = this_way->offset_after;
65 this_way->offset_after += sizediff;
66 }
67 spin_unlock_bh(&nf_nat_seqofs_lock);
68
69 pr_debug("adjust_tcp_sequence: Seq_offset after: ");
70 DUMP_OFFSET(this_way);
71}
72
73/* Get the offset value, for conntrack */
74s16 nf_nat_get_offset(const struct nf_conn *ct,
75 enum ip_conntrack_dir dir,
76 u32 seq)
77{
78 struct nf_conn_nat *nat = nfct_nat(ct);
79 struct nf_nat_seq *this_way;
80 s16 offset;
81
82 if (!nat)
83 return 0;
84
85 this_way = &nat->seq[dir];
86 spin_lock_bh(&nf_nat_seqofs_lock);
87 offset = after(seq, this_way->correction_pos)
88 ? this_way->offset_after : this_way->offset_before;
89 spin_unlock_bh(&nf_nat_seqofs_lock);
90
91 return offset;
92}
93EXPORT_SYMBOL_GPL(nf_nat_get_offset);
94
95/* Frobs data inside this packet, which is linear. */
96static void mangle_contents(struct sk_buff *skb,
97 unsigned int dataoff,
98 unsigned int match_offset,
99 unsigned int match_len,
100 const char *rep_buffer,
101 unsigned int rep_len)
102{
103 unsigned char *data;
104
105 BUG_ON(skb_is_nonlinear(skb));
106 data = skb_network_header(skb) + dataoff;
107
108 /* move post-replacement */
109 memmove(data + match_offset + rep_len,
110 data + match_offset + match_len,
111 skb->tail - (skb->network_header + dataoff +
112 match_offset + match_len));
113
114 /* insert data from buffer */
115 memcpy(data + match_offset, rep_buffer, rep_len);
116
117 /* update skb info */
118 if (rep_len > match_len) {
119 pr_debug("nf_nat_mangle_packet: Extending packet by "
120 "%u from %u bytes\n", rep_len - match_len, skb->len);
121 skb_put(skb, rep_len - match_len);
122 } else {
123 pr_debug("nf_nat_mangle_packet: Shrinking packet from "
124 "%u from %u bytes\n", match_len - rep_len, skb->len);
125 __skb_trim(skb, skb->len + rep_len - match_len);
126 }
127
128 /* fix IP hdr checksum information */
129 ip_hdr(skb)->tot_len = htons(skb->len);
130 ip_send_check(ip_hdr(skb));
131}
132
133/* Unusual, but possible case. */
134static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
135{
136 if (skb->len + extra > 65535)
137 return 0;
138
139 if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
140 return 0;
141
142 return 1;
143}
144
145void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
146 __be32 seq, s16 off)
147{
148 if (!off)
149 return;
150 set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
151 adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo);
152 nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
153}
154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
155
156static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data,
157 int datalen, __sum16 *check, int oldlen)
158{
159 struct rtable *rt = skb_rtable(skb);
160
161 if (skb->ip_summed != CHECKSUM_PARTIAL) {
162 if (!(rt->rt_flags & RTCF_LOCAL) &&
163 (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
164 skb->ip_summed = CHECKSUM_PARTIAL;
165 skb->csum_start = skb_headroom(skb) +
166 skb_network_offset(skb) +
167 iph->ihl * 4;
168 skb->csum_offset = (void *)check - data;
169 *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
170 datalen, iph->protocol, 0);
171 } else {
172 *check = 0;
173 *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
174 datalen, iph->protocol,
175 csum_partial(data, datalen,
176 0));
177 if (iph->protocol == IPPROTO_UDP && !*check)
178 *check = CSUM_MANGLED_0;
179 }
180 } else
181 inet_proto_csum_replace2(check, skb,
182 htons(oldlen), htons(datalen), 1);
183}
184
185/* Generic function for mangling variable-length address changes inside
186 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
187 * command in FTP).
188 *
189 * Takes care about all the nasty sequence number changes, checksumming,
190 * skb enlargement, ...
191 *
192 * */
193int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
194 struct nf_conn *ct,
195 enum ip_conntrack_info ctinfo,
196 unsigned int match_offset,
197 unsigned int match_len,
198 const char *rep_buffer,
199 unsigned int rep_len, bool adjust)
200{
201 struct iphdr *iph;
202 struct tcphdr *tcph;
203 int oldlen, datalen;
204
205 if (!skb_make_writable(skb, skb->len))
206 return 0;
207
208 if (rep_len > match_len &&
209 rep_len - match_len > skb_tailroom(skb) &&
210 !enlarge_skb(skb, rep_len - match_len))
211 return 0;
212
213 SKB_LINEAR_ASSERT(skb);
214
215 iph = ip_hdr(skb);
216 tcph = (void *)iph + iph->ihl*4;
217
218 oldlen = skb->len - iph->ihl*4;
219 mangle_contents(skb, iph->ihl*4 + tcph->doff*4,
220 match_offset, match_len, rep_buffer, rep_len);
221
222 datalen = skb->len - iph->ihl*4;
223 nf_nat_csum(skb, iph, tcph, datalen, &tcph->check, oldlen);
224
225 if (adjust && rep_len != match_len)
226 nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
227 (int)rep_len - (int)match_len);
228
229 return 1;
230}
231EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet);
232
233/* Generic function for mangling variable-length address changes inside
234 * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
235 * command in the Amanda protocol)
236 *
237 * Takes care about all the nasty sequence number changes, checksumming,
238 * skb enlargement, ...
239 *
240 * XXX - This function could be merged with nf_nat_mangle_tcp_packet which
241 * should be fairly easy to do.
242 */
243int
244nf_nat_mangle_udp_packet(struct sk_buff *skb,
245 struct nf_conn *ct,
246 enum ip_conntrack_info ctinfo,
247 unsigned int match_offset,
248 unsigned int match_len,
249 const char *rep_buffer,
250 unsigned int rep_len)
251{
252 struct iphdr *iph;
253 struct udphdr *udph;
254 int datalen, oldlen;
255
256 /* UDP helpers might accidentally mangle the wrong packet */
257 iph = ip_hdr(skb);
258 if (skb->len < iph->ihl*4 + sizeof(*udph) +
259 match_offset + match_len)
260 return 0;
261
262 if (!skb_make_writable(skb, skb->len))
263 return 0;
264
265 if (rep_len > match_len &&
266 rep_len - match_len > skb_tailroom(skb) &&
267 !enlarge_skb(skb, rep_len - match_len))
268 return 0;
269
270 iph = ip_hdr(skb);
271 udph = (void *)iph + iph->ihl*4;
272
273 oldlen = skb->len - iph->ihl*4;
274 mangle_contents(skb, iph->ihl*4 + sizeof(*udph),
275 match_offset, match_len, rep_buffer, rep_len);
276
277 /* update the length of the UDP packet */
278 datalen = skb->len - iph->ihl*4;
279 udph->len = htons(datalen);
280
281 /* fix udp checksum if udp checksum was previously calculated */
282 if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
283 return 1;
284
285 nf_nat_csum(skb, iph, udph, datalen, &udph->check, oldlen);
286
287 return 1;
288}
289EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
290
291/* Adjust one found SACK option including checksum correction */
292static void
293sack_adjust(struct sk_buff *skb,
294 struct tcphdr *tcph,
295 unsigned int sackoff,
296 unsigned int sackend,
297 struct nf_nat_seq *natseq)
298{
299 while (sackoff < sackend) {
300 struct tcp_sack_block_wire *sack;
301 __be32 new_start_seq, new_end_seq;
302
303 sack = (void *)skb->data + sackoff;
304 if (after(ntohl(sack->start_seq) - natseq->offset_before,
305 natseq->correction_pos))
306 new_start_seq = htonl(ntohl(sack->start_seq)
307 - natseq->offset_after);
308 else
309 new_start_seq = htonl(ntohl(sack->start_seq)
310 - natseq->offset_before);
311
312 if (after(ntohl(sack->end_seq) - natseq->offset_before,
313 natseq->correction_pos))
314 new_end_seq = htonl(ntohl(sack->end_seq)
315 - natseq->offset_after);
316 else
317 new_end_seq = htonl(ntohl(sack->end_seq)
318 - natseq->offset_before);
319
320 pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
321 ntohl(sack->start_seq), new_start_seq,
322 ntohl(sack->end_seq), new_end_seq);
323
324 inet_proto_csum_replace4(&tcph->check, skb,
325 sack->start_seq, new_start_seq, 0);
326 inet_proto_csum_replace4(&tcph->check, skb,
327 sack->end_seq, new_end_seq, 0);
328 sack->start_seq = new_start_seq;
329 sack->end_seq = new_end_seq;
330 sackoff += sizeof(*sack);
331 }
332}
333
334/* TCP SACK sequence number adjustment */
335static inline unsigned int
336nf_nat_sack_adjust(struct sk_buff *skb,
337 struct tcphdr *tcph,
338 struct nf_conn *ct,
339 enum ip_conntrack_info ctinfo)
340{
341 unsigned int dir, optoff, optend;
342 struct nf_conn_nat *nat = nfct_nat(ct);
343
344 optoff = ip_hdrlen(skb) + sizeof(struct tcphdr);
345 optend = ip_hdrlen(skb) + tcph->doff * 4;
346
347 if (!skb_make_writable(skb, optend))
348 return 0;
349
350 dir = CTINFO2DIR(ctinfo);
351
352 while (optoff < optend) {
353 /* Usually: option, length. */
354 unsigned char *op = skb->data + optoff;
355
356 switch (op[0]) {
357 case TCPOPT_EOL:
358 return 1;
359 case TCPOPT_NOP:
360 optoff++;
361 continue;
362 default:
363 /* no partial options */
364 if (optoff + 1 == optend ||
365 optoff + op[1] > optend ||
366 op[1] < 2)
367 return 0;
368 if (op[0] == TCPOPT_SACK &&
369 op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
370 ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
371 sack_adjust(skb, tcph, optoff+2,
372 optoff+op[1], &nat->seq[!dir]);
373 optoff += op[1];
374 }
375 }
376 return 1;
377}
378
379/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
380int
381nf_nat_seq_adjust(struct sk_buff *skb,
382 struct nf_conn *ct,
383 enum ip_conntrack_info ctinfo)
384{
385 struct tcphdr *tcph;
386 int dir;
387 __be32 newseq, newack;
388 s16 seqoff, ackoff;
389 struct nf_conn_nat *nat = nfct_nat(ct);
390 struct nf_nat_seq *this_way, *other_way;
391
392 dir = CTINFO2DIR(ctinfo);
393
394 this_way = &nat->seq[dir];
395 other_way = &nat->seq[!dir];
396
397 if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
398 return 0;
399
400 tcph = (void *)skb->data + ip_hdrlen(skb);
401 if (after(ntohl(tcph->seq), this_way->correction_pos))
402 seqoff = this_way->offset_after;
403 else
404 seqoff = this_way->offset_before;
405
406 if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
407 other_way->correction_pos))
408 ackoff = other_way->offset_after;
409 else
410 ackoff = other_way->offset_before;
411
412 newseq = htonl(ntohl(tcph->seq) + seqoff);
413 newack = htonl(ntohl(tcph->ack_seq) - ackoff);
414
415 inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
416 inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
417
418 pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
419 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
420 ntohl(newack));
421
422 tcph->seq = newseq;
423 tcph->ack_seq = newack;
424
425 return nf_nat_sack_adjust(skb, tcph, ct, ctinfo);
426}
427
428/* Setup NAT on this expected conntrack so it follows master. */
429/* If we fail to get a free NAT slot, we'll get dropped on confirm */
430void nf_nat_follow_master(struct nf_conn *ct,
431 struct nf_conntrack_expect *exp)
432{
433 struct nf_nat_range range;
434
435 /* This must be a fresh one. */
436 BUG_ON(ct->status & IPS_NAT_DONE_MASK);
437
438 /* Change src to where master sends to */
439 range.flags = IP_NAT_RANGE_MAP_IPS;
440 range.min_ip = range.max_ip
441 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
442 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
443
444 /* For DST manip, map port here to where it's expected. */
445 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
446 range.min = range.max = exp->saved_proto;
447 range.min_ip = range.max_ip
448 = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
449 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
450}
451EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
new file mode 100644
index 00000000000..535e1a80235
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -0,0 +1,99 @@
1/* IRC extension for TCP NAT alteration.
2 *
3 * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
4 * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
5 * based on a copy of RR's ip_nat_ftp.c
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/tcp.h>
16#include <linux/kernel.h>
17
18#include <net/netfilter/nf_nat.h>
19#include <net/netfilter/nf_nat_helper.h>
20#include <net/netfilter/nf_nat_rule.h>
21#include <net/netfilter/nf_conntrack_helper.h>
22#include <net/netfilter/nf_conntrack_expect.h>
23#include <linux/netfilter/nf_conntrack_irc.h>
24
25MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
26MODULE_DESCRIPTION("IRC (DCC) NAT helper");
27MODULE_LICENSE("GPL");
28MODULE_ALIAS("ip_nat_irc");
29
30static unsigned int help(struct sk_buff *skb,
31 enum ip_conntrack_info ctinfo,
32 unsigned int matchoff,
33 unsigned int matchlen,
34 struct nf_conntrack_expect *exp)
35{
36 char buffer[sizeof("4294967296 65635")];
37 u_int32_t ip;
38 u_int16_t port;
39 unsigned int ret;
40
41 /* Reply comes from server. */
42 exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
43 exp->dir = IP_CT_DIR_REPLY;
44 exp->expectfn = nf_nat_follow_master;
45
46 /* Try to get same port: if not, try to change it. */
47 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
48 int ret;
49
50 exp->tuple.dst.u.tcp.port = htons(port);
51 ret = nf_ct_expect_related(exp);
52 if (ret == 0)
53 break;
54 else if (ret != -EBUSY) {
55 port = 0;
56 break;
57 }
58 }
59
60 if (port == 0)
61 return NF_DROP;
62
63 ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip);
64 sprintf(buffer, "%u %u", ip, port);
65 pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n",
66 buffer, &ip, port);
67
68 ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo,
69 matchoff, matchlen, buffer,
70 strlen(buffer));
71 if (ret != NF_ACCEPT)
72 nf_ct_unexpect_related(exp);
73 return ret;
74}
75
76static void __exit nf_nat_irc_fini(void)
77{
78 rcu_assign_pointer(nf_nat_irc_hook, NULL);
79 synchronize_rcu();
80}
81
82static int __init nf_nat_irc_init(void)
83{
84 BUG_ON(nf_nat_irc_hook != NULL);
85 rcu_assign_pointer(nf_nat_irc_hook, help);
86 return 0;
87}
88
89/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
90static int warn_set(const char *val, struct kernel_param *kp)
91{
92 printk(KERN_INFO KBUILD_MODNAME
93 ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
94 return 0;
95}
96module_param_call(ports, warn_set, NULL, NULL, 0);
97
98module_init(nf_nat_irc_init);
99module_exit(nf_nat_irc_fini);
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
new file mode 100644
index 00000000000..f52d41ea069
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -0,0 +1,125 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2008 Patrick McHardy <kaber@trash.net>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/types.h>
11#include <linux/random.h>
12#include <linux/ip.h>
13
14#include <linux/netfilter.h>
15#include <net/secure_seq.h>
16#include <net/netfilter/nf_nat.h>
17#include <net/netfilter/nf_nat_core.h>
18#include <net/netfilter/nf_nat_rule.h>
19#include <net/netfilter/nf_nat_protocol.h>
20
21bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
22 enum nf_nat_manip_type maniptype,
23 const union nf_conntrack_man_proto *min,
24 const union nf_conntrack_man_proto *max)
25{
26 __be16 port;
27
28 if (maniptype == IP_NAT_MANIP_SRC)
29 port = tuple->src.u.all;
30 else
31 port = tuple->dst.u.all;
32
33 return ntohs(port) >= ntohs(min->all) &&
34 ntohs(port) <= ntohs(max->all);
35}
36EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
37
38void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
39 const struct nf_nat_range *range,
40 enum nf_nat_manip_type maniptype,
41 const struct nf_conn *ct,
42 u_int16_t *rover)
43{
44 unsigned int range_size, min, i;
45 __be16 *portptr;
46 u_int16_t off;
47
48 if (maniptype == IP_NAT_MANIP_SRC)
49 portptr = &tuple->src.u.all;
50 else
51 portptr = &tuple->dst.u.all;
52
53 /* If no range specified... */
54 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
55 /* If it's dst rewrite, can't change port */
56 if (maniptype == IP_NAT_MANIP_DST)
57 return;
58
59 if (ntohs(*portptr) < 1024) {
60 /* Loose convention: >> 512 is credential passing */
61 if (ntohs(*portptr) < 512) {
62 min = 1;
63 range_size = 511 - min + 1;
64 } else {
65 min = 600;
66 range_size = 1023 - min + 1;
67 }
68 } else {
69 min = 1024;
70 range_size = 65535 - 1024 + 1;
71 }
72 } else {
73 min = ntohs(range->min.all);
74 range_size = ntohs(range->max.all) - min + 1;
75 }
76
77 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
78 off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip,
79 maniptype == IP_NAT_MANIP_SRC
80 ? tuple->dst.u.all
81 : tuple->src.u.all);
82 else
83 off = *rover;
84
85 for (i = 0; ; ++off) {
86 *portptr = htons(min + off % range_size);
87 if (++i != range_size && nf_nat_used_tuple(tuple, ct))
88 continue;
89 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
90 *rover = off;
91 return;
92 }
93 return;
94}
95EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
96
97#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
98int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
99 const struct nf_nat_range *range)
100{
101 NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all);
102 NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all);
103 return 0;
104
105nla_put_failure:
106 return -1;
107}
108EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
109
110int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
111 struct nf_nat_range *range)
112{
113 if (tb[CTA_PROTONAT_PORT_MIN]) {
114 range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
115 range->max.all = range->min.tcp.port;
116 range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
117 }
118 if (tb[CTA_PROTONAT_PORT_MAX]) {
119 range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
120 range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
121 }
122 return 0;
123}
124EXPORT_SYMBOL_GPL(nf_nat_proto_range_to_nlattr);
125#endif
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
new file mode 100644
index 00000000000..570faf2667b
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -0,0 +1,108 @@
1/*
2 * DCCP NAT protocol helper
3 *
4 * Copyright (c) 2005, 2006. 2008 Patrick McHardy <kaber@trash.net>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 */
11
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/skbuff.h>
16#include <linux/ip.h>
17#include <linux/dccp.h>
18
19#include <net/netfilter/nf_conntrack.h>
20#include <net/netfilter/nf_nat.h>
21#include <net/netfilter/nf_nat_protocol.h>
22
23static u_int16_t dccp_port_rover;
24
25static void
26dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
27 const struct nf_nat_range *range,
28 enum nf_nat_manip_type maniptype,
29 const struct nf_conn *ct)
30{
31 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
32 &dccp_port_rover);
33}
34
35static bool
36dccp_manip_pkt(struct sk_buff *skb,
37 unsigned int iphdroff,
38 const struct nf_conntrack_tuple *tuple,
39 enum nf_nat_manip_type maniptype)
40{
41 const struct iphdr *iph = (const void *)(skb->data + iphdroff);
42 struct dccp_hdr *hdr;
43 unsigned int hdroff = iphdroff + iph->ihl * 4;
44 __be32 oldip, newip;
45 __be16 *portptr, oldport, newport;
46 int hdrsize = 8; /* DCCP connection tracking guarantees this much */
47
48 if (skb->len >= hdroff + sizeof(struct dccp_hdr))
49 hdrsize = sizeof(struct dccp_hdr);
50
51 if (!skb_make_writable(skb, hdroff + hdrsize))
52 return false;
53
54 iph = (struct iphdr *)(skb->data + iphdroff);
55 hdr = (struct dccp_hdr *)(skb->data + hdroff);
56
57 if (maniptype == IP_NAT_MANIP_SRC) {
58 oldip = iph->saddr;
59 newip = tuple->src.u3.ip;
60 newport = tuple->src.u.dccp.port;
61 portptr = &hdr->dccph_sport;
62 } else {
63 oldip = iph->daddr;
64 newip = tuple->dst.u3.ip;
65 newport = tuple->dst.u.dccp.port;
66 portptr = &hdr->dccph_dport;
67 }
68
69 oldport = *portptr;
70 *portptr = newport;
71
72 if (hdrsize < sizeof(*hdr))
73 return true;
74
75 inet_proto_csum_replace4(&hdr->dccph_checksum, skb, oldip, newip, 1);
76 inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
77 0);
78 return true;
79}
80
81static const struct nf_nat_protocol nf_nat_protocol_dccp = {
82 .protonum = IPPROTO_DCCP,
83 .me = THIS_MODULE,
84 .manip_pkt = dccp_manip_pkt,
85 .in_range = nf_nat_proto_in_range,
86 .unique_tuple = dccp_unique_tuple,
87#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
88 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
89 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
90#endif
91};
92
93static int __init nf_nat_proto_dccp_init(void)
94{
95 return nf_nat_protocol_register(&nf_nat_protocol_dccp);
96}
97
98static void __exit nf_nat_proto_dccp_fini(void)
99{
100 nf_nat_protocol_unregister(&nf_nat_protocol_dccp);
101}
102
103module_init(nf_nat_proto_dccp_init);
104module_exit(nf_nat_proto_dccp_fini);
105
106MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
107MODULE_DESCRIPTION("DCCP NAT protocol helper");
108MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
new file mode 100644
index 00000000000..756331d4266
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -0,0 +1,97 @@
1/*
2 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/init.h>
11#include <linux/ip.h>
12#include <linux/sctp.h>
13#include <net/sctp/checksum.h>
14
15#include <net/netfilter/nf_nat_protocol.h>
16
17static u_int16_t nf_sctp_port_rover;
18
19static void
20sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
21 const struct nf_nat_range *range,
22 enum nf_nat_manip_type maniptype,
23 const struct nf_conn *ct)
24{
25 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
26 &nf_sctp_port_rover);
27}
28
29static bool
30sctp_manip_pkt(struct sk_buff *skb,
31 unsigned int iphdroff,
32 const struct nf_conntrack_tuple *tuple,
33 enum nf_nat_manip_type maniptype)
34{
35 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
36 struct sk_buff *frag;
37 sctp_sctphdr_t *hdr;
38 unsigned int hdroff = iphdroff + iph->ihl*4;
39 __be32 oldip, newip;
40 __be32 crc32;
41
42 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
43 return false;
44
45 iph = (struct iphdr *)(skb->data + iphdroff);
46 hdr = (struct sctphdr *)(skb->data + hdroff);
47
48 if (maniptype == IP_NAT_MANIP_SRC) {
49 /* Get rid of src ip and src pt */
50 oldip = iph->saddr;
51 newip = tuple->src.u3.ip;
52 hdr->source = tuple->src.u.sctp.port;
53 } else {
54 /* Get rid of dst ip and dst pt */
55 oldip = iph->daddr;
56 newip = tuple->dst.u3.ip;
57 hdr->dest = tuple->dst.u.sctp.port;
58 }
59
60 crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff);
61 skb_walk_frags(skb, frag)
62 crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag),
63 crc32);
64 crc32 = sctp_end_cksum(crc32);
65 hdr->checksum = crc32;
66
67 return true;
68}
69
70static const struct nf_nat_protocol nf_nat_protocol_sctp = {
71 .protonum = IPPROTO_SCTP,
72 .me = THIS_MODULE,
73 .manip_pkt = sctp_manip_pkt,
74 .in_range = nf_nat_proto_in_range,
75 .unique_tuple = sctp_unique_tuple,
76#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
77 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
78 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
79#endif
80};
81
82static int __init nf_nat_proto_sctp_init(void)
83{
84 return nf_nat_protocol_register(&nf_nat_protocol_sctp);
85}
86
87static void __exit nf_nat_proto_sctp_exit(void)
88{
89 nf_nat_protocol_unregister(&nf_nat_protocol_sctp);
90}
91
92module_init(nf_nat_proto_sctp_init);
93module_exit(nf_nat_proto_sctp_exit);
94
95MODULE_LICENSE("GPL");
96MODULE_DESCRIPTION("SCTP NAT protocol helper");
97MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
new file mode 100644
index 00000000000..aa460a595d5
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -0,0 +1,92 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/init.h>
11#include <linux/ip.h>
12#include <linux/tcp.h>
13
14#include <linux/netfilter.h>
15#include <linux/netfilter/nfnetlink_conntrack.h>
16#include <net/netfilter/nf_nat.h>
17#include <net/netfilter/nf_nat_rule.h>
18#include <net/netfilter/nf_nat_protocol.h>
19#include <net/netfilter/nf_nat_core.h>
20
21static u_int16_t tcp_port_rover;
22
23static void
24tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
25 const struct nf_nat_range *range,
26 enum nf_nat_manip_type maniptype,
27 const struct nf_conn *ct)
28{
29 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover);
30}
31
32static bool
33tcp_manip_pkt(struct sk_buff *skb,
34 unsigned int iphdroff,
35 const struct nf_conntrack_tuple *tuple,
36 enum nf_nat_manip_type maniptype)
37{
38 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
39 struct tcphdr *hdr;
40 unsigned int hdroff = iphdroff + iph->ihl*4;
41 __be32 oldip, newip;
42 __be16 *portptr, newport, oldport;
43 int hdrsize = 8; /* TCP connection tracking guarantees this much */
44
45 /* this could be a inner header returned in icmp packet; in such
46 cases we cannot update the checksum field since it is outside of
47 the 8 bytes of transport layer headers we are guaranteed */
48 if (skb->len >= hdroff + sizeof(struct tcphdr))
49 hdrsize = sizeof(struct tcphdr);
50
51 if (!skb_make_writable(skb, hdroff + hdrsize))
52 return false;
53
54 iph = (struct iphdr *)(skb->data + iphdroff);
55 hdr = (struct tcphdr *)(skb->data + hdroff);
56
57 if (maniptype == IP_NAT_MANIP_SRC) {
58 /* Get rid of src ip and src pt */
59 oldip = iph->saddr;
60 newip = tuple->src.u3.ip;
61 newport = tuple->src.u.tcp.port;
62 portptr = &hdr->source;
63 } else {
64 /* Get rid of dst ip and dst pt */
65 oldip = iph->daddr;
66 newip = tuple->dst.u3.ip;
67 newport = tuple->dst.u.tcp.port;
68 portptr = &hdr->dest;
69 }
70
71 oldport = *portptr;
72 *portptr = newport;
73
74 if (hdrsize < sizeof(*hdr))
75 return true;
76
77 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
78 inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
79 return true;
80}
81
82const struct nf_nat_protocol nf_nat_protocol_tcp = {
83 .protonum = IPPROTO_TCP,
84 .me = THIS_MODULE,
85 .manip_pkt = tcp_manip_pkt,
86 .in_range = nf_nat_proto_in_range,
87 .unique_tuple = tcp_unique_tuple,
88#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
89 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
90 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
91#endif
92};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
new file mode 100644
index 00000000000..dfe65c7e292
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -0,0 +1,83 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/init.h>
11#include <linux/ip.h>
12#include <linux/udp.h>
13
14#include <linux/netfilter.h>
15#include <net/netfilter/nf_nat.h>
16#include <net/netfilter/nf_nat_core.h>
17#include <net/netfilter/nf_nat_rule.h>
18#include <net/netfilter/nf_nat_protocol.h>
19
20static u_int16_t udp_port_rover;
21
22static void
23udp_unique_tuple(struct nf_conntrack_tuple *tuple,
24 const struct nf_nat_range *range,
25 enum nf_nat_manip_type maniptype,
26 const struct nf_conn *ct)
27{
28 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover);
29}
30
31static bool
32udp_manip_pkt(struct sk_buff *skb,
33 unsigned int iphdroff,
34 const struct nf_conntrack_tuple *tuple,
35 enum nf_nat_manip_type maniptype)
36{
37 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
38 struct udphdr *hdr;
39 unsigned int hdroff = iphdroff + iph->ihl*4;
40 __be32 oldip, newip;
41 __be16 *portptr, newport;
42
43 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
44 return false;
45
46 iph = (struct iphdr *)(skb->data + iphdroff);
47 hdr = (struct udphdr *)(skb->data + hdroff);
48
49 if (maniptype == IP_NAT_MANIP_SRC) {
50 /* Get rid of src ip and src pt */
51 oldip = iph->saddr;
52 newip = tuple->src.u3.ip;
53 newport = tuple->src.u.udp.port;
54 portptr = &hdr->source;
55 } else {
56 /* Get rid of dst ip and dst pt */
57 oldip = iph->daddr;
58 newip = tuple->dst.u3.ip;
59 newport = tuple->dst.u.udp.port;
60 portptr = &hdr->dest;
61 }
62 if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
63 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
64 inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
65 0);
66 if (!hdr->check)
67 hdr->check = CSUM_MANGLED_0;
68 }
69 *portptr = newport;
70 return true;
71}
72
73const struct nf_nat_protocol nf_nat_protocol_udp = {
74 .protonum = IPPROTO_UDP,
75 .me = THIS_MODULE,
76 .manip_pkt = udp_manip_pkt,
77 .in_range = nf_nat_proto_in_range,
78 .unique_tuple = udp_unique_tuple,
79#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
80 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
81 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
82#endif
83};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
new file mode 100644
index 00000000000..3cc8c8af39e
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -0,0 +1,99 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2008 Patrick McHardy <kaber@trash.net>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/types.h>
11#include <linux/init.h>
12#include <linux/ip.h>
13#include <linux/udp.h>
14
15#include <linux/netfilter.h>
16#include <net/netfilter/nf_nat.h>
17#include <net/netfilter/nf_nat_protocol.h>
18
19static u_int16_t udplite_port_rover;
20
21static void
22udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
23 const struct nf_nat_range *range,
24 enum nf_nat_manip_type maniptype,
25 const struct nf_conn *ct)
26{
27 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
28 &udplite_port_rover);
29}
30
31static bool
32udplite_manip_pkt(struct sk_buff *skb,
33 unsigned int iphdroff,
34 const struct nf_conntrack_tuple *tuple,
35 enum nf_nat_manip_type maniptype)
36{
37 const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
38 struct udphdr *hdr;
39 unsigned int hdroff = iphdroff + iph->ihl*4;
40 __be32 oldip, newip;
41 __be16 *portptr, newport;
42
43 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
44 return false;
45
46 iph = (struct iphdr *)(skb->data + iphdroff);
47 hdr = (struct udphdr *)(skb->data + hdroff);
48
49 if (maniptype == IP_NAT_MANIP_SRC) {
50 /* Get rid of src ip and src pt */
51 oldip = iph->saddr;
52 newip = tuple->src.u3.ip;
53 newport = tuple->src.u.udp.port;
54 portptr = &hdr->source;
55 } else {
56 /* Get rid of dst ip and dst pt */
57 oldip = iph->daddr;
58 newip = tuple->dst.u3.ip;
59 newport = tuple->dst.u.udp.port;
60 portptr = &hdr->dest;
61 }
62
63 inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
64 inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0);
65 if (!hdr->check)
66 hdr->check = CSUM_MANGLED_0;
67
68 *portptr = newport;
69 return true;
70}
71
72static const struct nf_nat_protocol nf_nat_protocol_udplite = {
73 .protonum = IPPROTO_UDPLITE,
74 .me = THIS_MODULE,
75 .manip_pkt = udplite_manip_pkt,
76 .in_range = nf_nat_proto_in_range,
77 .unique_tuple = udplite_unique_tuple,
78#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
79 .range_to_nlattr = nf_nat_proto_range_to_nlattr,
80 .nlattr_to_range = nf_nat_proto_nlattr_to_range,
81#endif
82};
83
84static int __init nf_nat_proto_udplite_init(void)
85{
86 return nf_nat_protocol_register(&nf_nat_protocol_udplite);
87}
88
89static void __exit nf_nat_proto_udplite_fini(void)
90{
91 nf_nat_protocol_unregister(&nf_nat_protocol_udplite);
92}
93
94module_init(nf_nat_proto_udplite_init);
95module_exit(nf_nat_proto_udplite_fini);
96
97MODULE_LICENSE("GPL");
98MODULE_DESCRIPTION("UDP-Lite NAT protocol helper");
99MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
new file mode 100644
index 00000000000..a50f2bc1c73
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -0,0 +1,53 @@
1/* The "unknown" protocol. This is what is used for protocols we
2 * don't understand. It's returned by ip_ct_find_proto().
3 */
4
5/* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/types.h>
14#include <linux/init.h>
15
16#include <linux/netfilter.h>
17#include <net/netfilter/nf_nat.h>
18#include <net/netfilter/nf_nat_rule.h>
19#include <net/netfilter/nf_nat_protocol.h>
20
21static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
22 enum nf_nat_manip_type manip_type,
23 const union nf_conntrack_man_proto *min,
24 const union nf_conntrack_man_proto *max)
25{
26 return true;
27}
28
29static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
30 const struct nf_nat_range *range,
31 enum nf_nat_manip_type maniptype,
32 const struct nf_conn *ct)
33{
34 /* Sorry: we can't help you; if it's not unique, we can't frob
35 anything. */
36 return;
37}
38
39static bool
40unknown_manip_pkt(struct sk_buff *skb,
41 unsigned int iphdroff,
42 const struct nf_conntrack_tuple *tuple,
43 enum nf_nat_manip_type maniptype)
44{
45 return true;
46}
47
48const struct nf_nat_protocol nf_nat_unknown_protocol = {
49 /* .me isn't set: getting a ref to this cannot fail. */
50 .manip_pkt = unknown_manip_pkt,
51 .in_range = unknown_in_range,
52 .unique_tuple = unknown_unique_tuple,
53};
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
new file mode 100644
index 00000000000..733c9abc1cb
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -0,0 +1,214 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9/* Everything about the rules for NAT. */
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11#include <linux/types.h>
12#include <linux/ip.h>
13#include <linux/netfilter.h>
14#include <linux/netfilter_ipv4.h>
15#include <linux/module.h>
16#include <linux/kmod.h>
17#include <linux/skbuff.h>
18#include <linux/proc_fs.h>
19#include <linux/slab.h>
20#include <net/checksum.h>
21#include <net/route.h>
22#include <linux/bitops.h>
23
24#include <linux/netfilter_ipv4/ip_tables.h>
25#include <net/netfilter/nf_nat.h>
26#include <net/netfilter/nf_nat_core.h>
27#include <net/netfilter/nf_nat_rule.h>
28
29#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
30 (1 << NF_INET_POST_ROUTING) | \
31 (1 << NF_INET_LOCAL_OUT) | \
32 (1 << NF_INET_LOCAL_IN))
33
34static const struct xt_table nat_table = {
35 .name = "nat",
36 .valid_hooks = NAT_VALID_HOOKS,
37 .me = THIS_MODULE,
38 .af = NFPROTO_IPV4,
39};
40
41/* Source NAT */
42static unsigned int
43ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
44{
45 struct nf_conn *ct;
46 enum ip_conntrack_info ctinfo;
47 const struct nf_nat_multi_range_compat *mr = par->targinfo;
48
49 NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
50 par->hooknum == NF_INET_LOCAL_IN);
51
52 ct = nf_ct_get(skb, &ctinfo);
53
54 /* Connection must be valid and new. */
55 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
56 ctinfo == IP_CT_RELATED_REPLY));
57 NF_CT_ASSERT(par->out != NULL);
58
59 return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
60}
61
62static unsigned int
63ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
64{
65 struct nf_conn *ct;
66 enum ip_conntrack_info ctinfo;
67 const struct nf_nat_multi_range_compat *mr = par->targinfo;
68
69 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
70 par->hooknum == NF_INET_LOCAL_OUT);
71
72 ct = nf_ct_get(skb, &ctinfo);
73
74 /* Connection must be valid and new. */
75 NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
76
77 return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
78}
79
80static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
81{
82 const struct nf_nat_multi_range_compat *mr = par->targinfo;
83
84 /* Must be a valid range */
85 if (mr->rangesize != 1) {
86 pr_info("SNAT: multiple ranges no longer supported\n");
87 return -EINVAL;
88 }
89 return 0;
90}
91
92static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
93{
94 const struct nf_nat_multi_range_compat *mr = par->targinfo;
95
96 /* Must be a valid range */
97 if (mr->rangesize != 1) {
98 pr_info("DNAT: multiple ranges no longer supported\n");
99 return -EINVAL;
100 }
101 return 0;
102}
103
104static unsigned int
105alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
106{
107 /* Force range to this IP; let proto decide mapping for
108 per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
109 */
110 struct nf_nat_range range;
111
112 range.flags = 0;
113 pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
114 HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ?
115 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
116 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
117
118 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
119}
120
121int nf_nat_rule_find(struct sk_buff *skb,
122 unsigned int hooknum,
123 const struct net_device *in,
124 const struct net_device *out,
125 struct nf_conn *ct)
126{
127 struct net *net = nf_ct_net(ct);
128 int ret;
129
130 ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
131
132 if (ret == NF_ACCEPT) {
133 if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
134 /* NUL mapping */
135 ret = alloc_null_binding(ct, hooknum);
136 }
137 return ret;
138}
139
140static struct xt_target ipt_snat_reg __read_mostly = {
141 .name = "SNAT",
142 .target = ipt_snat_target,
143 .targetsize = sizeof(struct nf_nat_multi_range_compat),
144 .table = "nat",
145 .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
146 .checkentry = ipt_snat_checkentry,
147 .family = AF_INET,
148};
149
150static struct xt_target ipt_dnat_reg __read_mostly = {
151 .name = "DNAT",
152 .target = ipt_dnat_target,
153 .targetsize = sizeof(struct nf_nat_multi_range_compat),
154 .table = "nat",
155 .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
156 .checkentry = ipt_dnat_checkentry,
157 .family = AF_INET,
158};
159
160static int __net_init nf_nat_rule_net_init(struct net *net)
161{
162 struct ipt_replace *repl;
163
164 repl = ipt_alloc_initial_table(&nat_table);
165 if (repl == NULL)
166 return -ENOMEM;
167 net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl);
168 kfree(repl);
169 if (IS_ERR(net->ipv4.nat_table))
170 return PTR_ERR(net->ipv4.nat_table);
171 return 0;
172}
173
174static void __net_exit nf_nat_rule_net_exit(struct net *net)
175{
176 ipt_unregister_table(net, net->ipv4.nat_table);
177}
178
179static struct pernet_operations nf_nat_rule_net_ops = {
180 .init = nf_nat_rule_net_init,
181 .exit = nf_nat_rule_net_exit,
182};
183
184int __init nf_nat_rule_init(void)
185{
186 int ret;
187
188 ret = register_pernet_subsys(&nf_nat_rule_net_ops);
189 if (ret != 0)
190 goto out;
191 ret = xt_register_target(&ipt_snat_reg);
192 if (ret != 0)
193 goto unregister_table;
194
195 ret = xt_register_target(&ipt_dnat_reg);
196 if (ret != 0)
197 goto unregister_snat;
198
199 return ret;
200
201 unregister_snat:
202 xt_unregister_target(&ipt_snat_reg);
203 unregister_table:
204 unregister_pernet_subsys(&nf_nat_rule_net_ops);
205 out:
206 return ret;
207}
208
209void nf_nat_rule_cleanup(void)
210{
211 xt_unregister_target(&ipt_dnat_reg);
212 xt_unregister_target(&ipt_snat_reg);
213 unregister_pernet_subsys(&nf_nat_rule_net_ops);
214}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
new file mode 100644
index 00000000000..e40cf7816fd
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -0,0 +1,561 @@
1/* SIP extension for NAT alteration.
2 *
3 * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
4 * based on RR's ip_nat_ftp.c and other modules.
5 * (C) 2007 United Security Providers
6 * (C) 2007, 2008 Patrick McHardy <kaber@trash.net>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/ip.h>
16#include <net/ip.h>
17#include <linux/udp.h>
18#include <linux/tcp.h>
19
20#include <net/netfilter/nf_nat.h>
21#include <net/netfilter/nf_nat_helper.h>
22#include <net/netfilter/nf_nat_rule.h>
23#include <net/netfilter/nf_conntrack_helper.h>
24#include <net/netfilter/nf_conntrack_expect.h>
25#include <linux/netfilter/nf_conntrack_sip.h>
26
27MODULE_LICENSE("GPL");
28MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
29MODULE_DESCRIPTION("SIP NAT helper");
30MODULE_ALIAS("ip_nat_sip");
31
32
33static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
34 const char **dptr, unsigned int *datalen,
35 unsigned int matchoff, unsigned int matchlen,
36 const char *buffer, unsigned int buflen)
37{
38 enum ip_conntrack_info ctinfo;
39 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
40 struct tcphdr *th;
41 unsigned int baseoff;
42
43 if (nf_ct_protonum(ct) == IPPROTO_TCP) {
44 th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
45 baseoff = ip_hdrlen(skb) + th->doff * 4;
46 matchoff += dataoff - baseoff;
47
48 if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
49 matchoff, matchlen,
50 buffer, buflen, false))
51 return 0;
52 } else {
53 baseoff = ip_hdrlen(skb) + sizeof(struct udphdr);
54 matchoff += dataoff - baseoff;
55
56 if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
57 matchoff, matchlen,
58 buffer, buflen))
59 return 0;
60 }
61
62 /* Reload data pointer and adjust datalen value */
63 *dptr = skb->data + dataoff;
64 *datalen += buflen - matchlen;
65 return 1;
66}
67
68static int map_addr(struct sk_buff *skb, unsigned int dataoff,
69 const char **dptr, unsigned int *datalen,
70 unsigned int matchoff, unsigned int matchlen,
71 union nf_inet_addr *addr, __be16 port)
72{
73 enum ip_conntrack_info ctinfo;
74 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
75 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
76 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
77 unsigned int buflen;
78 __be32 newaddr;
79 __be16 newport;
80
81 if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip &&
82 ct->tuplehash[dir].tuple.src.u.udp.port == port) {
83 newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
84 newport = ct->tuplehash[!dir].tuple.dst.u.udp.port;
85 } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip &&
86 ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
87 newaddr = ct->tuplehash[!dir].tuple.src.u3.ip;
88 newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
89 } else
90 return 1;
91
92 if (newaddr == addr->ip && newport == port)
93 return 1;
94
95 buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport));
96
97 return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
98 buffer, buflen);
99}
100
101static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff,
102 const char **dptr, unsigned int *datalen,
103 enum sip_header_types type)
104{
105 enum ip_conntrack_info ctinfo;
106 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
107 unsigned int matchlen, matchoff;
108 union nf_inet_addr addr;
109 __be16 port;
110
111 if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
112 &matchoff, &matchlen, &addr, &port) <= 0)
113 return 1;
114 return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
115 &addr, port);
116}
117
118static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
119 const char **dptr, unsigned int *datalen)
120{
121 enum ip_conntrack_info ctinfo;
122 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
123 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
124 unsigned int coff, matchoff, matchlen;
125 enum sip_header_types hdr;
126 union nf_inet_addr addr;
127 __be16 port;
128 int request, in_header;
129
130 /* Basic rules: requests and responses. */
131 if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) {
132 if (ct_sip_parse_request(ct, *dptr, *datalen,
133 &matchoff, &matchlen,
134 &addr, &port) > 0 &&
135 !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
136 &addr, port))
137 return NF_DROP;
138 request = 1;
139 } else
140 request = 0;
141
142 if (nf_ct_protonum(ct) == IPPROTO_TCP)
143 hdr = SIP_HDR_VIA_TCP;
144 else
145 hdr = SIP_HDR_VIA_UDP;
146
147 /* Translate topmost Via header and parameters */
148 if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
149 hdr, NULL, &matchoff, &matchlen,
150 &addr, &port) > 0) {
151 unsigned int matchend, poff, plen, buflen, n;
152 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
153
154 /* We're only interested in headers related to this
155 * connection */
156 if (request) {
157 if (addr.ip != ct->tuplehash[dir].tuple.src.u3.ip ||
158 port != ct->tuplehash[dir].tuple.src.u.udp.port)
159 goto next;
160 } else {
161 if (addr.ip != ct->tuplehash[dir].tuple.dst.u3.ip ||
162 port != ct->tuplehash[dir].tuple.dst.u.udp.port)
163 goto next;
164 }
165
166 if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
167 &addr, port))
168 return NF_DROP;
169
170 matchend = matchoff + matchlen;
171
172 /* The maddr= parameter (RFC 2361) specifies where to send
173 * the reply. */
174 if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
175 "maddr=", &poff, &plen,
176 &addr) > 0 &&
177 addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
178 addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
179 buflen = sprintf(buffer, "%pI4",
180 &ct->tuplehash[!dir].tuple.dst.u3.ip);
181 if (!mangle_packet(skb, dataoff, dptr, datalen,
182 poff, plen, buffer, buflen))
183 return NF_DROP;
184 }
185
186 /* The received= parameter (RFC 2361) contains the address
187 * from which the server received the request. */
188 if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
189 "received=", &poff, &plen,
190 &addr) > 0 &&
191 addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
192 addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
193 buflen = sprintf(buffer, "%pI4",
194 &ct->tuplehash[!dir].tuple.src.u3.ip);
195 if (!mangle_packet(skb, dataoff, dptr, datalen,
196 poff, plen, buffer, buflen))
197 return NF_DROP;
198 }
199
200 /* The rport= parameter (RFC 3581) contains the port number
201 * from which the server received the request. */
202 if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen,
203 "rport=", &poff, &plen,
204 &n) > 0 &&
205 htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port &&
206 htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
207 __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
208 buflen = sprintf(buffer, "%u", ntohs(p));
209 if (!mangle_packet(skb, dataoff, dptr, datalen,
210 poff, plen, buffer, buflen))
211 return NF_DROP;
212 }
213 }
214
215next:
216 /* Translate Contact headers */
217 coff = 0;
218 in_header = 0;
219 while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen,
220 SIP_HDR_CONTACT, &in_header,
221 &matchoff, &matchlen,
222 &addr, &port) > 0) {
223 if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
224 &addr, port))
225 return NF_DROP;
226 }
227
228 if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) ||
229 !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO))
230 return NF_DROP;
231
232 return NF_ACCEPT;
233}
234
235static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
236{
237 enum ip_conntrack_info ctinfo;
238 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
239 const struct tcphdr *th;
240
241 if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0)
242 return;
243
244 th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
245 nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
246}
247
248/* Handles expected signalling connections and media streams */
249static void ip_nat_sip_expected(struct nf_conn *ct,
250 struct nf_conntrack_expect *exp)
251{
252 struct nf_nat_range range;
253
254 /* This must be a fresh one. */
255 BUG_ON(ct->status & IPS_NAT_DONE_MASK);
256
257 /* For DST manip, map port here to where it's expected. */
258 range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
259 range.min = range.max = exp->saved_proto;
260 range.min_ip = range.max_ip = exp->saved_ip;
261 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
262
263 /* Change src to where master sends to, but only if the connection
264 * actually came from the same source. */
265 if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
266 ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
267 range.flags = IP_NAT_RANGE_MAP_IPS;
268 range.min_ip = range.max_ip
269 = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
270 nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
271 }
272}
273
274static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
275 const char **dptr, unsigned int *datalen,
276 struct nf_conntrack_expect *exp,
277 unsigned int matchoff,
278 unsigned int matchlen)
279{
280 enum ip_conntrack_info ctinfo;
281 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
282 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
283 __be32 newip;
284 u_int16_t port;
285 char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
286 unsigned buflen;
287
288 /* Connection will come from reply */
289 if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip)
290 newip = exp->tuple.dst.u3.ip;
291 else
292 newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
293
294 /* If the signalling port matches the connection's source port in the
295 * original direction, try to use the destination port in the opposite
296 * direction. */
297 if (exp->tuple.dst.u.udp.port ==
298 ct->tuplehash[dir].tuple.src.u.udp.port)
299 port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
300 else
301 port = ntohs(exp->tuple.dst.u.udp.port);
302
303 exp->saved_ip = exp->tuple.dst.u3.ip;
304 exp->tuple.dst.u3.ip = newip;
305 exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
306 exp->dir = !dir;
307 exp->expectfn = ip_nat_sip_expected;
308
309 for (; port != 0; port++) {
310 int ret;
311
312 exp->tuple.dst.u.udp.port = htons(port);
313 ret = nf_ct_expect_related(exp);
314 if (ret == 0)
315 break;
316 else if (ret != -EBUSY) {
317 port = 0;
318 break;
319 }
320 }
321
322 if (port == 0)
323 return NF_DROP;
324
325 if (exp->tuple.dst.u3.ip != exp->saved_ip ||
326 exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
327 buflen = sprintf(buffer, "%pI4:%u", &newip, port);
328 if (!mangle_packet(skb, dataoff, dptr, datalen,
329 matchoff, matchlen, buffer, buflen))
330 goto err;
331 }
332 return NF_ACCEPT;
333
334err:
335 nf_ct_unexpect_related(exp);
336 return NF_DROP;
337}
338
339static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff,
340 const char **dptr, unsigned int *datalen)
341{
342 enum ip_conntrack_info ctinfo;
343 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
344 unsigned int matchoff, matchlen;
345 char buffer[sizeof("65536")];
346 int buflen, c_len;
347
348 /* Get actual SDP length */
349 if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
350 SDP_HDR_VERSION, SDP_HDR_UNSPEC,
351 &matchoff, &matchlen) <= 0)
352 return 0;
353 c_len = *datalen - matchoff + strlen("v=");
354
355 /* Now, update SDP length */
356 if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH,
357 &matchoff, &matchlen) <= 0)
358 return 0;
359
360 buflen = sprintf(buffer, "%u", c_len);
361 return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
362 buffer, buflen);
363}
364
365static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff,
366 const char **dptr, unsigned int *datalen,
367 unsigned int sdpoff,
368 enum sdp_header_types type,
369 enum sdp_header_types term,
370 char *buffer, int buflen)
371{
372 enum ip_conntrack_info ctinfo;
373 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
374 unsigned int matchlen, matchoff;
375
376 if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term,
377 &matchoff, &matchlen) <= 0)
378 return -ENOENT;
379 return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
380 buffer, buflen) ? 0 : -EINVAL;
381}
382
383static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff,
384 const char **dptr, unsigned int *datalen,
385 unsigned int sdpoff,
386 enum sdp_header_types type,
387 enum sdp_header_types term,
388 const union nf_inet_addr *addr)
389{
390 char buffer[sizeof("nnn.nnn.nnn.nnn")];
391 unsigned int buflen;
392
393 buflen = sprintf(buffer, "%pI4", &addr->ip);
394 if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term,
395 buffer, buflen))
396 return 0;
397
398 return mangle_content_len(skb, dataoff, dptr, datalen);
399}
400
401static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff,
402 const char **dptr, unsigned int *datalen,
403 unsigned int matchoff,
404 unsigned int matchlen,
405 u_int16_t port)
406{
407 char buffer[sizeof("nnnnn")];
408 unsigned int buflen;
409
410 buflen = sprintf(buffer, "%u", port);
411 if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
412 buffer, buflen))
413 return 0;
414
415 return mangle_content_len(skb, dataoff, dptr, datalen);
416}
417
418static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff,
419 const char **dptr, unsigned int *datalen,
420 unsigned int sdpoff,
421 const union nf_inet_addr *addr)
422{
423 char buffer[sizeof("nnn.nnn.nnn.nnn")];
424 unsigned int buflen;
425
426 /* Mangle session description owner and contact addresses */
427 buflen = sprintf(buffer, "%pI4", &addr->ip);
428 if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
429 SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
430 buffer, buflen))
431 return 0;
432
433 switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
434 SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
435 buffer, buflen)) {
436 case 0:
437 /*
438 * RFC 2327:
439 *
440 * Session description
441 *
442 * c=* (connection information - not required if included in all media)
443 */
444 case -ENOENT:
445 break;
446 default:
447 return 0;
448 }
449
450 return mangle_content_len(skb, dataoff, dptr, datalen);
451}
452
453/* So, this packet has hit the connection tracking matching code.
454 Mangle it, and change the expectation to match the new version. */
455static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
456 const char **dptr, unsigned int *datalen,
457 struct nf_conntrack_expect *rtp_exp,
458 struct nf_conntrack_expect *rtcp_exp,
459 unsigned int mediaoff,
460 unsigned int medialen,
461 union nf_inet_addr *rtp_addr)
462{
463 enum ip_conntrack_info ctinfo;
464 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
465 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
466 u_int16_t port;
467
468 /* Connection will come from reply */
469 if (ct->tuplehash[dir].tuple.src.u3.ip ==
470 ct->tuplehash[!dir].tuple.dst.u3.ip)
471 rtp_addr->ip = rtp_exp->tuple.dst.u3.ip;
472 else
473 rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
474
475 rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip;
476 rtp_exp->tuple.dst.u3.ip = rtp_addr->ip;
477 rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
478 rtp_exp->dir = !dir;
479 rtp_exp->expectfn = ip_nat_sip_expected;
480
481 rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip;
482 rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip;
483 rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
484 rtcp_exp->dir = !dir;
485 rtcp_exp->expectfn = ip_nat_sip_expected;
486
487 /* Try to get same pair of ports: if not, try to change them. */
488 for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
489 port != 0; port += 2) {
490 int ret;
491
492 rtp_exp->tuple.dst.u.udp.port = htons(port);
493 ret = nf_ct_expect_related(rtp_exp);
494 if (ret == -EBUSY)
495 continue;
496 else if (ret < 0) {
497 port = 0;
498 break;
499 }
500 rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
501 ret = nf_ct_expect_related(rtcp_exp);
502 if (ret == 0)
503 break;
504 else if (ret != -EBUSY) {
505 nf_ct_unexpect_related(rtp_exp);
506 port = 0;
507 break;
508 }
509 }
510
511 if (port == 0)
512 goto err1;
513
514 /* Update media port. */
515 if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
516 !ip_nat_sdp_port(skb, dataoff, dptr, datalen,
517 mediaoff, medialen, port))
518 goto err2;
519
520 return NF_ACCEPT;
521
522err2:
523 nf_ct_unexpect_related(rtp_exp);
524 nf_ct_unexpect_related(rtcp_exp);
525err1:
526 return NF_DROP;
527}
528
529static void __exit nf_nat_sip_fini(void)
530{
531 rcu_assign_pointer(nf_nat_sip_hook, NULL);
532 rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL);
533 rcu_assign_pointer(nf_nat_sip_expect_hook, NULL);
534 rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL);
535 rcu_assign_pointer(nf_nat_sdp_port_hook, NULL);
536 rcu_assign_pointer(nf_nat_sdp_session_hook, NULL);
537 rcu_assign_pointer(nf_nat_sdp_media_hook, NULL);
538 synchronize_rcu();
539}
540
541static int __init nf_nat_sip_init(void)
542{
543 BUG_ON(nf_nat_sip_hook != NULL);
544 BUG_ON(nf_nat_sip_seq_adjust_hook != NULL);
545 BUG_ON(nf_nat_sip_expect_hook != NULL);
546 BUG_ON(nf_nat_sdp_addr_hook != NULL);
547 BUG_ON(nf_nat_sdp_port_hook != NULL);
548 BUG_ON(nf_nat_sdp_session_hook != NULL);
549 BUG_ON(nf_nat_sdp_media_hook != NULL);
550 rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
551 rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust);
552 rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect);
553 rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
554 rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port);
555 rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session);
556 rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media);
557 return 0;
558}
559
560module_init(nf_nat_sip_init);
561module_exit(nf_nat_sip_fini);
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
new file mode 100644
index 00000000000..a6e606e8482
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -0,0 +1,326 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/types.h>
9#include <linux/icmp.h>
10#include <linux/gfp.h>
11#include <linux/ip.h>
12#include <linux/netfilter.h>
13#include <linux/netfilter_ipv4.h>
14#include <linux/module.h>
15#include <linux/skbuff.h>
16#include <linux/proc_fs.h>
17#include <net/ip.h>
18#include <net/checksum.h>
19#include <linux/spinlock.h>
20
21#include <net/netfilter/nf_conntrack.h>
22#include <net/netfilter/nf_conntrack_core.h>
23#include <net/netfilter/nf_conntrack_extend.h>
24#include <net/netfilter/nf_nat.h>
25#include <net/netfilter/nf_nat_rule.h>
26#include <net/netfilter/nf_nat_protocol.h>
27#include <net/netfilter/nf_nat_core.h>
28#include <net/netfilter/nf_nat_helper.h>
29#include <linux/netfilter_ipv4/ip_tables.h>
30
31#ifdef CONFIG_XFRM
32static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
33{
34 struct flowi4 *fl4 = &fl->u.ip4;
35 const struct nf_conn *ct;
36 const struct nf_conntrack_tuple *t;
37 enum ip_conntrack_info ctinfo;
38 enum ip_conntrack_dir dir;
39 unsigned long statusbit;
40
41 ct = nf_ct_get(skb, &ctinfo);
42 if (ct == NULL)
43 return;
44 dir = CTINFO2DIR(ctinfo);
45 t = &ct->tuplehash[dir].tuple;
46
47 if (dir == IP_CT_DIR_ORIGINAL)
48 statusbit = IPS_DST_NAT;
49 else
50 statusbit = IPS_SRC_NAT;
51
52 if (ct->status & statusbit) {
53 fl4->daddr = t->dst.u3.ip;
54 if (t->dst.protonum == IPPROTO_TCP ||
55 t->dst.protonum == IPPROTO_UDP ||
56 t->dst.protonum == IPPROTO_UDPLITE ||
57 t->dst.protonum == IPPROTO_DCCP ||
58 t->dst.protonum == IPPROTO_SCTP)
59 fl4->fl4_dport = t->dst.u.tcp.port;
60 }
61
62 statusbit ^= IPS_NAT_MASK;
63
64 if (ct->status & statusbit) {
65 fl4->saddr = t->src.u3.ip;
66 if (t->dst.protonum == IPPROTO_TCP ||
67 t->dst.protonum == IPPROTO_UDP ||
68 t->dst.protonum == IPPROTO_UDPLITE ||
69 t->dst.protonum == IPPROTO_DCCP ||
70 t->dst.protonum == IPPROTO_SCTP)
71 fl4->fl4_sport = t->src.u.tcp.port;
72 }
73}
74#endif
75
76static unsigned int
77nf_nat_fn(unsigned int hooknum,
78 struct sk_buff *skb,
79 const struct net_device *in,
80 const struct net_device *out,
81 int (*okfn)(struct sk_buff *))
82{
83 struct nf_conn *ct;
84 enum ip_conntrack_info ctinfo;
85 struct nf_conn_nat *nat;
86 /* maniptype == SRC for postrouting. */
87 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
88
89 /* We never see fragments: conntrack defrags on pre-routing
90 and local-out, and nf_nat_out protects post-routing. */
91 NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
92
93 ct = nf_ct_get(skb, &ctinfo);
94 /* Can't track? It's not due to stress, or conntrack would
95 have dropped it. Hence it's the user's responsibilty to
96 packet filter it out, or implement conntrack/NAT for that
97 protocol. 8) --RR */
98 if (!ct)
99 return NF_ACCEPT;
100
101 /* Don't try to NAT if this packet is not conntracked */
102 if (nf_ct_is_untracked(ct))
103 return NF_ACCEPT;
104
105 nat = nfct_nat(ct);
106 if (!nat) {
107 /* NAT module was loaded late. */
108 if (nf_ct_is_confirmed(ct))
109 return NF_ACCEPT;
110 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
111 if (nat == NULL) {
112 pr_debug("failed to add NAT extension\n");
113 return NF_ACCEPT;
114 }
115 }
116
117 switch (ctinfo) {
118 case IP_CT_RELATED:
119 case IP_CT_RELATED_REPLY:
120 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
121 if (!nf_nat_icmp_reply_translation(ct, ctinfo,
122 hooknum, skb))
123 return NF_DROP;
124 else
125 return NF_ACCEPT;
126 }
127 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
128 case IP_CT_NEW:
129
130 /* Seen it before? This can happen for loopback, retrans,
131 or local packets.. */
132 if (!nf_nat_initialized(ct, maniptype)) {
133 unsigned int ret;
134
135 ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
136 if (ret != NF_ACCEPT)
137 return ret;
138 } else
139 pr_debug("Already setup manip %s for ct %p\n",
140 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
141 ct);
142 break;
143
144 default:
145 /* ESTABLISHED */
146 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
147 ctinfo == IP_CT_ESTABLISHED_REPLY);
148 }
149
150 return nf_nat_packet(ct, ctinfo, hooknum, skb);
151}
152
153static unsigned int
154nf_nat_in(unsigned int hooknum,
155 struct sk_buff *skb,
156 const struct net_device *in,
157 const struct net_device *out,
158 int (*okfn)(struct sk_buff *))
159{
160 unsigned int ret;
161 __be32 daddr = ip_hdr(skb)->daddr;
162
163 ret = nf_nat_fn(hooknum, skb, in, out, okfn);
164 if (ret != NF_DROP && ret != NF_STOLEN &&
165 daddr != ip_hdr(skb)->daddr)
166 skb_dst_drop(skb);
167
168 return ret;
169}
170
171static unsigned int
172nf_nat_out(unsigned int hooknum,
173 struct sk_buff *skb,
174 const struct net_device *in,
175 const struct net_device *out,
176 int (*okfn)(struct sk_buff *))
177{
178#ifdef CONFIG_XFRM
179 const struct nf_conn *ct;
180 enum ip_conntrack_info ctinfo;
181#endif
182 unsigned int ret;
183
184 /* root is playing with raw sockets. */
185 if (skb->len < sizeof(struct iphdr) ||
186 ip_hdrlen(skb) < sizeof(struct iphdr))
187 return NF_ACCEPT;
188
189 ret = nf_nat_fn(hooknum, skb, in, out, okfn);
190#ifdef CONFIG_XFRM
191 if (ret != NF_DROP && ret != NF_STOLEN &&
192 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
193 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
194
195 if ((ct->tuplehash[dir].tuple.src.u3.ip !=
196 ct->tuplehash[!dir].tuple.dst.u3.ip) ||
197 (ct->tuplehash[dir].tuple.src.u.all !=
198 ct->tuplehash[!dir].tuple.dst.u.all)
199 )
200 return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
201 }
202#endif
203 return ret;
204}
205
206static unsigned int
207nf_nat_local_fn(unsigned int hooknum,
208 struct sk_buff *skb,
209 const struct net_device *in,
210 const struct net_device *out,
211 int (*okfn)(struct sk_buff *))
212{
213 const struct nf_conn *ct;
214 enum ip_conntrack_info ctinfo;
215 unsigned int ret;
216
217 /* root is playing with raw sockets. */
218 if (skb->len < sizeof(struct iphdr) ||
219 ip_hdrlen(skb) < sizeof(struct iphdr))
220 return NF_ACCEPT;
221
222 ret = nf_nat_fn(hooknum, skb, in, out, okfn);
223 if (ret != NF_DROP && ret != NF_STOLEN &&
224 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
225 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
226
227 if (ct->tuplehash[dir].tuple.dst.u3.ip !=
228 ct->tuplehash[!dir].tuple.src.u3.ip) {
229 if (ip_route_me_harder(skb, RTN_UNSPEC))
230 ret = NF_DROP;
231 }
232#ifdef CONFIG_XFRM
233 else if (ct->tuplehash[dir].tuple.dst.u.all !=
234 ct->tuplehash[!dir].tuple.src.u.all)
235 if (ip_xfrm_me_harder(skb))
236 ret = NF_DROP;
237#endif
238 }
239 return ret;
240}
241
242/* We must be after connection tracking and before packet filtering. */
243
244static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
245 /* Before packet filtering, change destination */
246 {
247 .hook = nf_nat_in,
248 .owner = THIS_MODULE,
249 .pf = NFPROTO_IPV4,
250 .hooknum = NF_INET_PRE_ROUTING,
251 .priority = NF_IP_PRI_NAT_DST,
252 },
253 /* After packet filtering, change source */
254 {
255 .hook = nf_nat_out,
256 .owner = THIS_MODULE,
257 .pf = NFPROTO_IPV4,
258 .hooknum = NF_INET_POST_ROUTING,
259 .priority = NF_IP_PRI_NAT_SRC,
260 },
261 /* Before packet filtering, change destination */
262 {
263 .hook = nf_nat_local_fn,
264 .owner = THIS_MODULE,
265 .pf = NFPROTO_IPV4,
266 .hooknum = NF_INET_LOCAL_OUT,
267 .priority = NF_IP_PRI_NAT_DST,
268 },
269 /* After packet filtering, change source */
270 {
271 .hook = nf_nat_fn,
272 .owner = THIS_MODULE,
273 .pf = NFPROTO_IPV4,
274 .hooknum = NF_INET_LOCAL_IN,
275 .priority = NF_IP_PRI_NAT_SRC,
276 },
277};
278
279static int __init nf_nat_standalone_init(void)
280{
281 int ret = 0;
282
283 need_ipv4_conntrack();
284
285#ifdef CONFIG_XFRM
286 BUG_ON(ip_nat_decode_session != NULL);
287 rcu_assign_pointer(ip_nat_decode_session, nat_decode_session);
288#endif
289 ret = nf_nat_rule_init();
290 if (ret < 0) {
291 pr_err("nf_nat_init: can't setup rules.\n");
292 goto cleanup_decode_session;
293 }
294 ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
295 if (ret < 0) {
296 pr_err("nf_nat_init: can't register hooks.\n");
297 goto cleanup_rule_init;
298 }
299 return ret;
300
301 cleanup_rule_init:
302 nf_nat_rule_cleanup();
303 cleanup_decode_session:
304#ifdef CONFIG_XFRM
305 rcu_assign_pointer(ip_nat_decode_session, NULL);
306 synchronize_net();
307#endif
308 return ret;
309}
310
311static void __exit nf_nat_standalone_fini(void)
312{
313 nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
314 nf_nat_rule_cleanup();
315#ifdef CONFIG_XFRM
316 rcu_assign_pointer(ip_nat_decode_session, NULL);
317 synchronize_net();
318#endif
319 /* Conntrack caches are unregistered in nf_conntrack_cleanup */
320}
321
322module_init(nf_nat_standalone_init);
323module_exit(nf_nat_standalone_fini);
324
325MODULE_LICENSE("GPL");
326MODULE_ALIAS("ip_nat");
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
new file mode 100644
index 00000000000..7274a43c7a1
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -0,0 +1,51 @@
1/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8#include <linux/module.h>
9#include <linux/udp.h>
10
11#include <net/netfilter/nf_nat_helper.h>
12#include <net/netfilter/nf_nat_rule.h>
13#include <net/netfilter/nf_conntrack_helper.h>
14#include <net/netfilter/nf_conntrack_expect.h>
15#include <linux/netfilter/nf_conntrack_tftp.h>
16
17MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
18MODULE_DESCRIPTION("TFTP NAT helper");
19MODULE_LICENSE("GPL");
20MODULE_ALIAS("ip_nat_tftp");
21
22static unsigned int help(struct sk_buff *skb,
23 enum ip_conntrack_info ctinfo,
24 struct nf_conntrack_expect *exp)
25{
26 const struct nf_conn *ct = exp->master;
27
28 exp->saved_proto.udp.port
29 = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
30 exp->dir = IP_CT_DIR_REPLY;
31 exp->expectfn = nf_nat_follow_master;
32 if (nf_ct_expect_related(exp) != 0)
33 return NF_DROP;
34 return NF_ACCEPT;
35}
36
37static void __exit nf_nat_tftp_fini(void)
38{
39 rcu_assign_pointer(nf_nat_tftp_hook, NULL);
40 synchronize_rcu();
41}
42
43static int __init nf_nat_tftp_init(void)
44{
45 BUG_ON(nf_nat_tftp_hook != NULL);
46 rcu_assign_pointer(nf_nat_tftp_hook, help);
47 return 0;
48}
49
50module_init(nf_nat_tftp_init);
51module_exit(nf_nat_tftp_fini);
diff --git a/net/ipv4/sysfs_net_ipv4.c b/net/ipv4/sysfs_net_ipv4.c
new file mode 100644
index 00000000000..0cbbf10026a
--- /dev/null
+++ b/net/ipv4/sysfs_net_ipv4.c
@@ -0,0 +1,88 @@
1/*
2 * net/ipv4/sysfs_net_ipv4.c
3 *
4 * sysfs-based networking knobs (so we can, unlike with sysctl, control perms)
5 *
6 * Copyright (C) 2008 Google, Inc.
7 *
8 * Robert Love <rlove@google.com>
9 *
10 * This software is licensed under the terms of the GNU General Public
11 * License version 2, as published by the Free Software Foundation, and
12 * may be copied, distributed, and modified under those terms.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 */
19
20#include <linux/kobject.h>
21#include <linux/string.h>
22#include <linux/sysfs.h>
23#include <linux/init.h>
24#include <net/tcp.h>
25
26#define CREATE_IPV4_FILE(_name, _var) \
27static ssize_t _name##_show(struct kobject *kobj, \
28 struct kobj_attribute *attr, char *buf) \
29{ \
30 return sprintf(buf, "%d\n", _var); \
31} \
32static ssize_t _name##_store(struct kobject *kobj, \
33 struct kobj_attribute *attr, \
34 const char *buf, size_t count) \
35{ \
36 int val, ret; \
37 ret = sscanf(buf, "%d", &val); \
38 if (ret != 1) \
39 return -EINVAL; \
40 if (val < 0) \
41 return -EINVAL; \
42 _var = val; \
43 return count; \
44} \
45static struct kobj_attribute _name##_attr = \
46 __ATTR(_name, 0644, _name##_show, _name##_store)
47
48CREATE_IPV4_FILE(tcp_wmem_min, sysctl_tcp_wmem[0]);
49CREATE_IPV4_FILE(tcp_wmem_def, sysctl_tcp_wmem[1]);
50CREATE_IPV4_FILE(tcp_wmem_max, sysctl_tcp_wmem[2]);
51
52CREATE_IPV4_FILE(tcp_rmem_min, sysctl_tcp_rmem[0]);
53CREATE_IPV4_FILE(tcp_rmem_def, sysctl_tcp_rmem[1]);
54CREATE_IPV4_FILE(tcp_rmem_max, sysctl_tcp_rmem[2]);
55
56static struct attribute *ipv4_attrs[] = {
57 &tcp_wmem_min_attr.attr,
58 &tcp_wmem_def_attr.attr,
59 &tcp_wmem_max_attr.attr,
60 &tcp_rmem_min_attr.attr,
61 &tcp_rmem_def_attr.attr,
62 &tcp_rmem_max_attr.attr,
63 NULL
64};
65
66static struct attribute_group ipv4_attr_group = {
67 .attrs = ipv4_attrs,
68};
69
70static __init int sysfs_ipv4_init(void)
71{
72 struct kobject *ipv4_kobject;
73 int ret;
74
75 ipv4_kobject = kobject_create_and_add("ipv4", kernel_kobj);
76 if (!ipv4_kobject)
77 return -ENOMEM;
78
79 ret = sysfs_create_group(ipv4_kobject, &ipv4_attr_group);
80 if (ret) {
81 kobject_put(ipv4_kobject);
82 return ret;
83 }
84
85 return 0;
86}
87
88subsys_initcall(sysfs_ipv4_init);
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
new file mode 100644
index 00000000000..e63c3972a73
--- /dev/null
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -0,0 +1,638 @@
1/*
2 * This is a module which is used for queueing IPv6 packets and
3 * communicating with userspace via netlink.
4 *
5 * (C) 2001 Fernando Anton, this code is GPL.
6 * IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
7 * Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
8 * Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
9 * email: fanton@it.uc3m.es
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15#include <linux/module.h>
16#include <linux/skbuff.h>
17#include <linux/init.h>
18#include <linux/ipv6.h>
19#include <linux/notifier.h>
20#include <linux/netdevice.h>
21#include <linux/netfilter.h>
22#include <linux/netlink.h>
23#include <linux/spinlock.h>
24#include <linux/sysctl.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/mutex.h>
28#include <linux/slab.h>
29#include <net/net_namespace.h>
30#include <net/sock.h>
31#include <net/ipv6.h>
32#include <net/ip6_route.h>
33#include <net/netfilter/nf_queue.h>
34#include <linux/netfilter_ipv4/ip_queue.h>
35#include <linux/netfilter_ipv4/ip_tables.h>
36#include <linux/netfilter_ipv6/ip6_tables.h>
37
38#define IPQ_QMAX_DEFAULT 1024
39#define IPQ_PROC_FS_NAME "ip6_queue"
40#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
41
42typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
43
44static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
45static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
46static DEFINE_SPINLOCK(queue_lock);
47static int peer_pid __read_mostly;
48static unsigned int copy_range __read_mostly;
49static unsigned int queue_total;
50static unsigned int queue_dropped = 0;
51static unsigned int queue_user_dropped = 0;
52static struct sock *ipqnl __read_mostly;
53static LIST_HEAD(queue_list);
54static DEFINE_MUTEX(ipqnl_mutex);
55
56static inline void
57__ipq_enqueue_entry(struct nf_queue_entry *entry)
58{
59 list_add_tail(&entry->list, &queue_list);
60 queue_total++;
61}
62
63static inline int
64__ipq_set_mode(unsigned char mode, unsigned int range)
65{
66 int status = 0;
67
68 switch(mode) {
69 case IPQ_COPY_NONE:
70 case IPQ_COPY_META:
71 copy_mode = mode;
72 copy_range = 0;
73 break;
74
75 case IPQ_COPY_PACKET:
76 if (range > 0xFFFF)
77 range = 0xFFFF;
78 copy_range = range;
79 copy_mode = mode;
80 break;
81
82 default:
83 status = -EINVAL;
84
85 }
86 return status;
87}
88
89static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
90
91static inline void
92__ipq_reset(void)
93{
94 peer_pid = 0;
95 net_disable_timestamp();
96 __ipq_set_mode(IPQ_COPY_NONE, 0);
97 __ipq_flush(NULL, 0);
98}
99
100static struct nf_queue_entry *
101ipq_find_dequeue_entry(unsigned long id)
102{
103 struct nf_queue_entry *entry = NULL, *i;
104
105 spin_lock_bh(&queue_lock);
106
107 list_for_each_entry(i, &queue_list, list) {
108 if ((unsigned long)i == id) {
109 entry = i;
110 break;
111 }
112 }
113
114 if (entry) {
115 list_del(&entry->list);
116 queue_total--;
117 }
118
119 spin_unlock_bh(&queue_lock);
120 return entry;
121}
122
123static void
124__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
125{
126 struct nf_queue_entry *entry, *next;
127
128 list_for_each_entry_safe(entry, next, &queue_list, list) {
129 if (!cmpfn || cmpfn(entry, data)) {
130 list_del(&entry->list);
131 queue_total--;
132 nf_reinject(entry, NF_DROP);
133 }
134 }
135}
136
137static void
138ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
139{
140 spin_lock_bh(&queue_lock);
141 __ipq_flush(cmpfn, data);
142 spin_unlock_bh(&queue_lock);
143}
144
145static struct sk_buff *
146ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
147{
148 sk_buff_data_t old_tail;
149 size_t size = 0;
150 size_t data_len = 0;
151 struct sk_buff *skb;
152 struct ipq_packet_msg *pmsg;
153 struct nlmsghdr *nlh;
154 struct timeval tv;
155
156 switch (ACCESS_ONCE(copy_mode)) {
157 case IPQ_COPY_META:
158 case IPQ_COPY_NONE:
159 size = NLMSG_SPACE(sizeof(*pmsg));
160 break;
161
162 case IPQ_COPY_PACKET:
163 if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
164 (*errp = skb_checksum_help(entry->skb)))
165 return NULL;
166
167 data_len = ACCESS_ONCE(copy_range);
168 if (data_len == 0 || data_len > entry->skb->len)
169 data_len = entry->skb->len;
170
171 size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
172 break;
173
174 default:
175 *errp = -EINVAL;
176 return NULL;
177 }
178
179 skb = alloc_skb(size, GFP_ATOMIC);
180 if (!skb)
181 goto nlmsg_failure;
182
183 old_tail = skb->tail;
184 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
185 pmsg = NLMSG_DATA(nlh);
186 memset(pmsg, 0, sizeof(*pmsg));
187
188 pmsg->packet_id = (unsigned long )entry;
189 pmsg->data_len = data_len;
190 tv = ktime_to_timeval(entry->skb->tstamp);
191 pmsg->timestamp_sec = tv.tv_sec;
192 pmsg->timestamp_usec = tv.tv_usec;
193 pmsg->mark = entry->skb->mark;
194 pmsg->hook = entry->hook;
195 pmsg->hw_protocol = entry->skb->protocol;
196
197 if (entry->indev)
198 strcpy(pmsg->indev_name, entry->indev->name);
199 else
200 pmsg->indev_name[0] = '\0';
201
202 if (entry->outdev)
203 strcpy(pmsg->outdev_name, entry->outdev->name);
204 else
205 pmsg->outdev_name[0] = '\0';
206
207 if (entry->indev && entry->skb->dev &&
208 entry->skb->mac_header != entry->skb->network_header) {
209 pmsg->hw_type = entry->skb->dev->type;
210 pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr);
211 }
212
213 if (data_len)
214 if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
215 BUG();
216
217 nlh->nlmsg_len = skb->tail - old_tail;
218 return skb;
219
220nlmsg_failure:
221 kfree_skb(skb);
222 *errp = -EINVAL;
223 printk(KERN_ERR "ip6_queue: error creating packet message\n");
224 return NULL;
225}
226
227static int
228ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
229{
230 int status = -EINVAL;
231 struct sk_buff *nskb;
232
233 if (copy_mode == IPQ_COPY_NONE)
234 return -EAGAIN;
235
236 nskb = ipq_build_packet_message(entry, &status);
237 if (nskb == NULL)
238 return status;
239
240 spin_lock_bh(&queue_lock);
241
242 if (!peer_pid)
243 goto err_out_free_nskb;
244
245 if (queue_total >= queue_maxlen) {
246 queue_dropped++;
247 status = -ENOSPC;
248 if (net_ratelimit())
249 printk (KERN_WARNING "ip6_queue: fill at %d entries, "
250 "dropping packet(s). Dropped: %d\n", queue_total,
251 queue_dropped);
252 goto err_out_free_nskb;
253 }
254
255 /* netlink_unicast will either free the nskb or attach it to a socket */
256 status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
257 if (status < 0) {
258 queue_user_dropped++;
259 goto err_out_unlock;
260 }
261
262 __ipq_enqueue_entry(entry);
263
264 spin_unlock_bh(&queue_lock);
265 return status;
266
267err_out_free_nskb:
268 kfree_skb(nskb);
269
270err_out_unlock:
271 spin_unlock_bh(&queue_lock);
272 return status;
273}
274
275static int
276ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
277{
278 int diff;
279 struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
280 struct sk_buff *nskb;
281
282 if (v->data_len < sizeof(*user_iph))
283 return 0;
284 diff = v->data_len - e->skb->len;
285 if (diff < 0) {
286 if (pskb_trim(e->skb, v->data_len))
287 return -ENOMEM;
288 } else if (diff > 0) {
289 if (v->data_len > 0xFFFF)
290 return -EINVAL;
291 if (diff > skb_tailroom(e->skb)) {
292 nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
293 diff, GFP_ATOMIC);
294 if (!nskb) {
295 printk(KERN_WARNING "ip6_queue: OOM "
296 "in mangle, dropping packet\n");
297 return -ENOMEM;
298 }
299 kfree_skb(e->skb);
300 e->skb = nskb;
301 }
302 skb_put(e->skb, diff);
303 }
304 if (!skb_make_writable(e->skb, v->data_len))
305 return -ENOMEM;
306 skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
307 e->skb->ip_summed = CHECKSUM_NONE;
308
309 return 0;
310}
311
312static int
313ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
314{
315 struct nf_queue_entry *entry;
316
317 if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)
318 return -EINVAL;
319
320 entry = ipq_find_dequeue_entry(vmsg->id);
321 if (entry == NULL)
322 return -ENOENT;
323 else {
324 int verdict = vmsg->value;
325
326 if (vmsg->data_len && vmsg->data_len == len)
327 if (ipq_mangle_ipv6(vmsg, entry) < 0)
328 verdict = NF_DROP;
329
330 nf_reinject(entry, verdict);
331 return 0;
332 }
333}
334
335static int
336ipq_set_mode(unsigned char mode, unsigned int range)
337{
338 int status;
339
340 spin_lock_bh(&queue_lock);
341 status = __ipq_set_mode(mode, range);
342 spin_unlock_bh(&queue_lock);
343 return status;
344}
345
346static int
347ipq_receive_peer(struct ipq_peer_msg *pmsg,
348 unsigned char type, unsigned int len)
349{
350 int status = 0;
351
352 if (len < sizeof(*pmsg))
353 return -EINVAL;
354
355 switch (type) {
356 case IPQM_MODE:
357 status = ipq_set_mode(pmsg->msg.mode.value,
358 pmsg->msg.mode.range);
359 break;
360
361 case IPQM_VERDICT:
362 status = ipq_set_verdict(&pmsg->msg.verdict,
363 len - sizeof(*pmsg));
364 break;
365 default:
366 status = -EINVAL;
367 }
368 return status;
369}
370
371static int
372dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
373{
374 if (entry->indev)
375 if (entry->indev->ifindex == ifindex)
376 return 1;
377
378 if (entry->outdev)
379 if (entry->outdev->ifindex == ifindex)
380 return 1;
381#ifdef CONFIG_BRIDGE_NETFILTER
382 if (entry->skb->nf_bridge) {
383 if (entry->skb->nf_bridge->physindev &&
384 entry->skb->nf_bridge->physindev->ifindex == ifindex)
385 return 1;
386 if (entry->skb->nf_bridge->physoutdev &&
387 entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
388 return 1;
389 }
390#endif
391 return 0;
392}
393
394static void
395ipq_dev_drop(int ifindex)
396{
397 ipq_flush(dev_cmp, ifindex);
398}
399
400#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
401
402static inline void
403__ipq_rcv_skb(struct sk_buff *skb)
404{
405 int status, type, pid, flags;
406 unsigned int nlmsglen, skblen;
407 struct nlmsghdr *nlh;
408
409 skblen = skb->len;
410 if (skblen < sizeof(*nlh))
411 return;
412
413 nlh = nlmsg_hdr(skb);
414 nlmsglen = nlh->nlmsg_len;
415 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
416 return;
417
418 pid = nlh->nlmsg_pid;
419 flags = nlh->nlmsg_flags;
420
421 if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
422 RCV_SKB_FAIL(-EINVAL);
423
424 if (flags & MSG_TRUNC)
425 RCV_SKB_FAIL(-ECOMM);
426
427 type = nlh->nlmsg_type;
428 if (type < NLMSG_NOOP || type >= IPQM_MAX)
429 RCV_SKB_FAIL(-EINVAL);
430
431 if (type <= IPQM_BASE)
432 return;
433
434 if (security_netlink_recv(skb, CAP_NET_ADMIN))
435 RCV_SKB_FAIL(-EPERM);
436
437 spin_lock_bh(&queue_lock);
438
439 if (peer_pid) {
440 if (peer_pid != pid) {
441 spin_unlock_bh(&queue_lock);
442 RCV_SKB_FAIL(-EBUSY);
443 }
444 } else {
445 net_enable_timestamp();
446 peer_pid = pid;
447 }
448
449 spin_unlock_bh(&queue_lock);
450
451 status = ipq_receive_peer(NLMSG_DATA(nlh), type,
452 nlmsglen - NLMSG_LENGTH(0));
453 if (status < 0)
454 RCV_SKB_FAIL(status);
455
456 if (flags & NLM_F_ACK)
457 netlink_ack(skb, nlh, 0);
458}
459
460static void
461ipq_rcv_skb(struct sk_buff *skb)
462{
463 mutex_lock(&ipqnl_mutex);
464 __ipq_rcv_skb(skb);
465 mutex_unlock(&ipqnl_mutex);
466}
467
468static int
469ipq_rcv_dev_event(struct notifier_block *this,
470 unsigned long event, void *ptr)
471{
472 struct net_device *dev = ptr;
473
474 if (!net_eq(dev_net(dev), &init_net))
475 return NOTIFY_DONE;
476
477 /* Drop any packets associated with the downed device */
478 if (event == NETDEV_DOWN)
479 ipq_dev_drop(dev->ifindex);
480 return NOTIFY_DONE;
481}
482
483static struct notifier_block ipq_dev_notifier = {
484 .notifier_call = ipq_rcv_dev_event,
485};
486
487static int
488ipq_rcv_nl_event(struct notifier_block *this,
489 unsigned long event, void *ptr)
490{
491 struct netlink_notify *n = ptr;
492
493 if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
494 spin_lock_bh(&queue_lock);
495 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
496 __ipq_reset();
497 spin_unlock_bh(&queue_lock);
498 }
499 return NOTIFY_DONE;
500}
501
502static struct notifier_block ipq_nl_notifier = {
503 .notifier_call = ipq_rcv_nl_event,
504};
505
506#ifdef CONFIG_SYSCTL
507static struct ctl_table_header *ipq_sysctl_header;
508
509static ctl_table ipq_table[] = {
510 {
511 .procname = NET_IPQ_QMAX_NAME,
512 .data = &queue_maxlen,
513 .maxlen = sizeof(queue_maxlen),
514 .mode = 0644,
515 .proc_handler = proc_dointvec
516 },
517 { }
518};
519#endif
520
521#ifdef CONFIG_PROC_FS
522static int ip6_queue_show(struct seq_file *m, void *v)
523{
524 spin_lock_bh(&queue_lock);
525
526 seq_printf(m,
527 "Peer PID : %d\n"
528 "Copy mode : %hu\n"
529 "Copy range : %u\n"
530 "Queue length : %u\n"
531 "Queue max. length : %u\n"
532 "Queue dropped : %u\n"
533 "Netfilter dropped : %u\n",
534 peer_pid,
535 copy_mode,
536 copy_range,
537 queue_total,
538 queue_maxlen,
539 queue_dropped,
540 queue_user_dropped);
541
542 spin_unlock_bh(&queue_lock);
543 return 0;
544}
545
546static int ip6_queue_open(struct inode *inode, struct file *file)
547{
548 return single_open(file, ip6_queue_show, NULL);
549}
550
551static const struct file_operations ip6_queue_proc_fops = {
552 .open = ip6_queue_open,
553 .read = seq_read,
554 .llseek = seq_lseek,
555 .release = single_release,
556 .owner = THIS_MODULE,
557};
558#endif
559
560static const struct nf_queue_handler nfqh = {
561 .name = "ip6_queue",
562 .outfn = &ipq_enqueue_packet,
563};
564
565static int __init ip6_queue_init(void)
566{
567 int status = -ENOMEM;
568 struct proc_dir_entry *proc __maybe_unused;
569
570 netlink_register_notifier(&ipq_nl_notifier);
571 ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0,
572 ipq_rcv_skb, NULL, THIS_MODULE);
573 if (ipqnl == NULL) {
574 printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
575 goto cleanup_netlink_notifier;
576 }
577
578#ifdef CONFIG_PROC_FS
579 proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
580 &ip6_queue_proc_fops);
581 if (!proc) {
582 printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
583 goto cleanup_ipqnl;
584 }
585#endif
586 register_netdevice_notifier(&ipq_dev_notifier);
587#ifdef CONFIG_SYSCTL
588 ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table);
589#endif
590 status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh);
591 if (status < 0) {
592 printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
593 goto cleanup_sysctl;
594 }
595 return status;
596
597cleanup_sysctl:
598#ifdef CONFIG_SYSCTL
599 unregister_sysctl_table(ipq_sysctl_header);
600#endif
601 unregister_netdevice_notifier(&ipq_dev_notifier);
602 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
603
604cleanup_ipqnl: __maybe_unused
605 netlink_kernel_release(ipqnl);
606 mutex_lock(&ipqnl_mutex);
607 mutex_unlock(&ipqnl_mutex);
608
609cleanup_netlink_notifier:
610 netlink_unregister_notifier(&ipq_nl_notifier);
611 return status;
612}
613
614static void __exit ip6_queue_fini(void)
615{
616 nf_unregister_queue_handlers(&nfqh);
617
618 ipq_flush(NULL, 0);
619
620#ifdef CONFIG_SYSCTL
621 unregister_sysctl_table(ipq_sysctl_header);
622#endif
623 unregister_netdevice_notifier(&ipq_dev_notifier);
624 proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
625
626 netlink_kernel_release(ipqnl);
627 mutex_lock(&ipqnl_mutex);
628 mutex_unlock(&ipqnl_mutex);
629
630 netlink_unregister_notifier(&ipq_nl_notifier);
631}
632
633MODULE_DESCRIPTION("IPv6 packet queue handler");
634MODULE_LICENSE("GPL");
635MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW);
636
637module_init(ip6_queue_init);
638module_exit(ip6_queue_fini);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
new file mode 100644
index 00000000000..e6af8d72f26
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -0,0 +1,527 @@
1/*
2 * This is a module which is used for logging packets.
3 */
4
5/* (C) 2001 Jan Rekorajski <baggins@pld.org.pl>
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/if_arp.h>
16#include <linux/ip.h>
17#include <linux/spinlock.h>
18#include <linux/icmpv6.h>
19#include <net/udp.h>
20#include <net/tcp.h>
21#include <net/ipv6.h>
22#include <linux/netfilter.h>
23#include <linux/netfilter/x_tables.h>
24#include <linux/netfilter_ipv6/ip6_tables.h>
25#include <net/netfilter/nf_log.h>
26#include <net/netfilter/xt_log.h>
27
28MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
29MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog");
30MODULE_LICENSE("GPL");
31
32struct in_device;
33#include <net/route.h>
34#include <linux/netfilter_ipv6/ip6t_LOG.h>
35
36/* One level of recursion won't kill us */
37static void dump_packet(struct sbuff *m,
38 const struct nf_loginfo *info,
39 const struct sk_buff *skb, unsigned int ip6hoff,
40 int recurse)
41{
42 u_int8_t currenthdr;
43 int fragment;
44 struct ipv6hdr _ip6h;
45 const struct ipv6hdr *ih;
46 unsigned int ptr;
47 unsigned int hdrlen = 0;
48 unsigned int logflags;
49
50 if (info->type == NF_LOG_TYPE_LOG)
51 logflags = info->u.log.logflags;
52 else
53 logflags = NF_LOG_MASK;
54
55 ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
56 if (ih == NULL) {
57 sb_add(m, "TRUNCATED");
58 return;
59 }
60
61 /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
62 sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
63
64 /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
65 sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
66 ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
67 (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
68 ih->hop_limit,
69 (ntohl(*(__be32 *)ih) & 0x000fffff));
70
71 fragment = 0;
72 ptr = ip6hoff + sizeof(struct ipv6hdr);
73 currenthdr = ih->nexthdr;
74 while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
75 struct ipv6_opt_hdr _hdr;
76 const struct ipv6_opt_hdr *hp;
77
78 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
79 if (hp == NULL) {
80 sb_add(m, "TRUNCATED");
81 return;
82 }
83
84 /* Max length: 48 "OPT (...) " */
85 if (logflags & IP6T_LOG_IPOPT)
86 sb_add(m, "OPT ( ");
87
88 switch (currenthdr) {
89 case IPPROTO_FRAGMENT: {
90 struct frag_hdr _fhdr;
91 const struct frag_hdr *fh;
92
93 sb_add(m, "FRAG:");
94 fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
95 &_fhdr);
96 if (fh == NULL) {
97 sb_add(m, "TRUNCATED ");
98 return;
99 }
100
101 /* Max length: 6 "65535 " */
102 sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
103
104 /* Max length: 11 "INCOMPLETE " */
105 if (fh->frag_off & htons(0x0001))
106 sb_add(m, "INCOMPLETE ");
107
108 sb_add(m, "ID:%08x ", ntohl(fh->identification));
109
110 if (ntohs(fh->frag_off) & 0xFFF8)
111 fragment = 1;
112
113 hdrlen = 8;
114
115 break;
116 }
117 case IPPROTO_DSTOPTS:
118 case IPPROTO_ROUTING:
119 case IPPROTO_HOPOPTS:
120 if (fragment) {
121 if (logflags & IP6T_LOG_IPOPT)
122 sb_add(m, ")");
123 return;
124 }
125 hdrlen = ipv6_optlen(hp);
126 break;
127 /* Max Length */
128 case IPPROTO_AH:
129 if (logflags & IP6T_LOG_IPOPT) {
130 struct ip_auth_hdr _ahdr;
131 const struct ip_auth_hdr *ah;
132
133 /* Max length: 3 "AH " */
134 sb_add(m, "AH ");
135
136 if (fragment) {
137 sb_add(m, ")");
138 return;
139 }
140
141 ah = skb_header_pointer(skb, ptr, sizeof(_ahdr),
142 &_ahdr);
143 if (ah == NULL) {
144 /*
145 * Max length: 26 "INCOMPLETE [65535
146 * bytes] )"
147 */
148 sb_add(m, "INCOMPLETE [%u bytes] )",
149 skb->len - ptr);
150 return;
151 }
152
153 /* Length: 15 "SPI=0xF1234567 */
154 sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
155
156 }
157
158 hdrlen = (hp->hdrlen+2)<<2;
159 break;
160 case IPPROTO_ESP:
161 if (logflags & IP6T_LOG_IPOPT) {
162 struct ip_esp_hdr _esph;
163 const struct ip_esp_hdr *eh;
164
165 /* Max length: 4 "ESP " */
166 sb_add(m, "ESP ");
167
168 if (fragment) {
169 sb_add(m, ")");
170 return;
171 }
172
173 /*
174 * Max length: 26 "INCOMPLETE [65535 bytes] )"
175 */
176 eh = skb_header_pointer(skb, ptr, sizeof(_esph),
177 &_esph);
178 if (eh == NULL) {
179 sb_add(m, "INCOMPLETE [%u bytes] )",
180 skb->len - ptr);
181 return;
182 }
183
184 /* Length: 16 "SPI=0xF1234567 )" */
185 sb_add(m, "SPI=0x%x )", ntohl(eh->spi) );
186
187 }
188 return;
189 default:
190 /* Max length: 20 "Unknown Ext Hdr 255" */
191 sb_add(m, "Unknown Ext Hdr %u", currenthdr);
192 return;
193 }
194 if (logflags & IP6T_LOG_IPOPT)
195 sb_add(m, ") ");
196
197 currenthdr = hp->nexthdr;
198 ptr += hdrlen;
199 }
200
201 switch (currenthdr) {
202 case IPPROTO_TCP: {
203 struct tcphdr _tcph;
204 const struct tcphdr *th;
205
206 /* Max length: 10 "PROTO=TCP " */
207 sb_add(m, "PROTO=TCP ");
208
209 if (fragment)
210 break;
211
212 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
213 th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph);
214 if (th == NULL) {
215 sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
216 return;
217 }
218
219 /* Max length: 20 "SPT=65535 DPT=65535 " */
220 sb_add(m, "SPT=%u DPT=%u ",
221 ntohs(th->source), ntohs(th->dest));
222 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
223 if (logflags & IP6T_LOG_TCPSEQ)
224 sb_add(m, "SEQ=%u ACK=%u ",
225 ntohl(th->seq), ntohl(th->ack_seq));
226 /* Max length: 13 "WINDOW=65535 " */
227 sb_add(m, "WINDOW=%u ", ntohs(th->window));
228 /* Max length: 9 "RES=0x3C " */
229 sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
230 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
231 if (th->cwr)
232 sb_add(m, "CWR ");
233 if (th->ece)
234 sb_add(m, "ECE ");
235 if (th->urg)
236 sb_add(m, "URG ");
237 if (th->ack)
238 sb_add(m, "ACK ");
239 if (th->psh)
240 sb_add(m, "PSH ");
241 if (th->rst)
242 sb_add(m, "RST ");
243 if (th->syn)
244 sb_add(m, "SYN ");
245 if (th->fin)
246 sb_add(m, "FIN ");
247 /* Max length: 11 "URGP=65535 " */
248 sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
249
250 if ((logflags & IP6T_LOG_TCPOPT) &&
251 th->doff * 4 > sizeof(struct tcphdr)) {
252 u_int8_t _opt[60 - sizeof(struct tcphdr)];
253 const u_int8_t *op;
254 unsigned int i;
255 unsigned int optsize = th->doff * 4
256 - sizeof(struct tcphdr);
257
258 op = skb_header_pointer(skb,
259 ptr + sizeof(struct tcphdr),
260 optsize, _opt);
261 if (op == NULL) {
262 sb_add(m, "OPT (TRUNCATED)");
263 return;
264 }
265
266 /* Max length: 127 "OPT (" 15*4*2chars ") " */
267 sb_add(m, "OPT (");
268 for (i =0; i < optsize; i++)
269 sb_add(m, "%02X", op[i]);
270 sb_add(m, ") ");
271 }
272 break;
273 }
274 case IPPROTO_UDP:
275 case IPPROTO_UDPLITE: {
276 struct udphdr _udph;
277 const struct udphdr *uh;
278
279 if (currenthdr == IPPROTO_UDP)
280 /* Max length: 10 "PROTO=UDP " */
281 sb_add(m, "PROTO=UDP " );
282 else /* Max length: 14 "PROTO=UDPLITE " */
283 sb_add(m, "PROTO=UDPLITE ");
284
285 if (fragment)
286 break;
287
288 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
289 uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph);
290 if (uh == NULL) {
291 sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
292 return;
293 }
294
295 /* Max length: 20 "SPT=65535 DPT=65535 " */
296 sb_add(m, "SPT=%u DPT=%u LEN=%u ",
297 ntohs(uh->source), ntohs(uh->dest),
298 ntohs(uh->len));
299 break;
300 }
301 case IPPROTO_ICMPV6: {
302 struct icmp6hdr _icmp6h;
303 const struct icmp6hdr *ic;
304
305 /* Max length: 13 "PROTO=ICMPv6 " */
306 sb_add(m, "PROTO=ICMPv6 ");
307
308 if (fragment)
309 break;
310
311 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
312 ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
313 if (ic == NULL) {
314 sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
315 return;
316 }
317
318 /* Max length: 18 "TYPE=255 CODE=255 " */
319 sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code);
320
321 switch (ic->icmp6_type) {
322 case ICMPV6_ECHO_REQUEST:
323 case ICMPV6_ECHO_REPLY:
324 /* Max length: 19 "ID=65535 SEQ=65535 " */
325 sb_add(m, "ID=%u SEQ=%u ",
326 ntohs(ic->icmp6_identifier),
327 ntohs(ic->icmp6_sequence));
328 break;
329 case ICMPV6_MGM_QUERY:
330 case ICMPV6_MGM_REPORT:
331 case ICMPV6_MGM_REDUCTION:
332 break;
333
334 case ICMPV6_PARAMPROB:
335 /* Max length: 17 "POINTER=ffffffff " */
336 sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer));
337 /* Fall through */
338 case ICMPV6_DEST_UNREACH:
339 case ICMPV6_PKT_TOOBIG:
340 case ICMPV6_TIME_EXCEED:
341 /* Max length: 3+maxlen */
342 if (recurse) {
343 sb_add(m, "[");
344 dump_packet(m, info, skb,
345 ptr + sizeof(_icmp6h), 0);
346 sb_add(m, "] ");
347 }
348
349 /* Max length: 10 "MTU=65535 " */
350 if (ic->icmp6_type == ICMPV6_PKT_TOOBIG)
351 sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu));
352 }
353 break;
354 }
355 /* Max length: 10 "PROTO=255 " */
356 default:
357 sb_add(m, "PROTO=%u ", currenthdr);
358 }
359
360 /* Max length: 15 "UID=4294967295 " */
361 if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
362 read_lock_bh(&skb->sk->sk_callback_lock);
363 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
364 sb_add(m, "UID=%u GID=%u ",
365 skb->sk->sk_socket->file->f_cred->fsuid,
366 skb->sk->sk_socket->file->f_cred->fsgid);
367 read_unlock_bh(&skb->sk->sk_callback_lock);
368 }
369
370 /* Max length: 16 "MARK=0xFFFFFFFF " */
371 if (!recurse && skb->mark)
372 sb_add(m, "MARK=0x%x ", skb->mark);
373}
374
375static void dump_mac_header(struct sbuff *m,
376 const struct nf_loginfo *info,
377 const struct sk_buff *skb)
378{
379 struct net_device *dev = skb->dev;
380 unsigned int logflags = 0;
381
382 if (info->type == NF_LOG_TYPE_LOG)
383 logflags = info->u.log.logflags;
384
385 if (!(logflags & IP6T_LOG_MACDECODE))
386 goto fallback;
387
388 switch (dev->type) {
389 case ARPHRD_ETHER:
390 sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
391 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
392 ntohs(eth_hdr(skb)->h_proto));
393 return;
394 default:
395 break;
396 }
397
398fallback:
399 sb_add(m, "MAC=");
400 if (dev->hard_header_len &&
401 skb->mac_header != skb->network_header) {
402 const unsigned char *p = skb_mac_header(skb);
403 unsigned int len = dev->hard_header_len;
404 unsigned int i;
405
406 if (dev->type == ARPHRD_SIT &&
407 (p -= ETH_HLEN) < skb->head)
408 p = NULL;
409
410 if (p != NULL) {
411 sb_add(m, "%02x", *p++);
412 for (i = 1; i < len; i++)
413 sb_add(m, ":%02x", *p++);
414 }
415 sb_add(m, " ");
416
417 if (dev->type == ARPHRD_SIT) {
418 const struct iphdr *iph =
419 (struct iphdr *)skb_mac_header(skb);
420 sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr);
421 }
422 } else
423 sb_add(m, " ");
424}
425
426static struct nf_loginfo default_loginfo = {
427 .type = NF_LOG_TYPE_LOG,
428 .u = {
429 .log = {
430 .level = 5,
431 .logflags = NF_LOG_MASK,
432 },
433 },
434};
435
436static void
437ip6t_log_packet(u_int8_t pf,
438 unsigned int hooknum,
439 const struct sk_buff *skb,
440 const struct net_device *in,
441 const struct net_device *out,
442 const struct nf_loginfo *loginfo,
443 const char *prefix)
444{
445 struct sbuff *m = sb_open();
446
447 if (!loginfo)
448 loginfo = &default_loginfo;
449
450 sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
451 prefix,
452 in ? in->name : "",
453 out ? out->name : "");
454
455 if (in != NULL)
456 dump_mac_header(m, loginfo, skb);
457
458 dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
459
460 sb_close(m);
461}
462
463static unsigned int
464log_tg6(struct sk_buff *skb, const struct xt_action_param *par)
465{
466 const struct ip6t_log_info *loginfo = par->targinfo;
467 struct nf_loginfo li;
468
469 li.type = NF_LOG_TYPE_LOG;
470 li.u.log.level = loginfo->level;
471 li.u.log.logflags = loginfo->logflags;
472
473 ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in, par->out,
474 &li, loginfo->prefix);
475 return XT_CONTINUE;
476}
477
478
479static int log_tg6_check(const struct xt_tgchk_param *par)
480{
481 const struct ip6t_log_info *loginfo = par->targinfo;
482
483 if (loginfo->level >= 8) {
484 pr_debug("level %u >= 8\n", loginfo->level);
485 return -EINVAL;
486 }
487 if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
488 pr_debug("prefix not null-terminated\n");
489 return -EINVAL;
490 }
491 return 0;
492}
493
494static struct xt_target log_tg6_reg __read_mostly = {
495 .name = "LOG",
496 .family = NFPROTO_IPV6,
497 .target = log_tg6,
498 .targetsize = sizeof(struct ip6t_log_info),
499 .checkentry = log_tg6_check,
500 .me = THIS_MODULE,
501};
502
503static struct nf_logger ip6t_logger __read_mostly = {
504 .name = "ip6t_LOG",
505 .logfn = &ip6t_log_packet,
506 .me = THIS_MODULE,
507};
508
509static int __init log_tg6_init(void)
510{
511 int ret;
512
513 ret = xt_register_target(&log_tg6_reg);
514 if (ret < 0)
515 return ret;
516 nf_log_register(NFPROTO_IPV6, &ip6t_logger);
517 return 0;
518}
519
520static void __exit log_tg6_exit(void)
521{
522 nf_log_unregister(&ip6t_logger);
523 xt_unregister_target(&log_tg6_reg);
524}
525
526module_init(log_tg6_init);
527module_exit(log_tg6_exit);
diff --git a/net/mac80211/driver-trace.c b/net/mac80211/driver-trace.c
new file mode 100644
index 00000000000..8ed8711b1a6
--- /dev/null
+++ b/net/mac80211/driver-trace.c
@@ -0,0 +1,9 @@
1/* bug in tracepoint.h, it should include this */
2#include <linux/module.h>
3
4/* sparse isn't too happy with all macros... */
5#ifndef __CHECKER__
6#include "driver-ops.h"
7#define CREATE_TRACE_POINTS
8#include "driver-trace.h"
9#endif
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
new file mode 100644
index 00000000000..f47b00dc7af
--- /dev/null
+++ b/net/mac80211/driver-trace.h
@@ -0,0 +1,1492 @@
1#if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
2#define __MAC80211_DRIVER_TRACE
3
4#include <linux/tracepoint.h>
5#include <net/mac80211.h>
6#include "ieee80211_i.h"
7
8#if !defined(CONFIG_MAC80211_DRIVER_API_TRACER) || defined(__CHECKER__)
9#undef TRACE_EVENT
10#define TRACE_EVENT(name, proto, ...) \
11static inline void trace_ ## name(proto) {}
12#undef DECLARE_EVENT_CLASS
13#define DECLARE_EVENT_CLASS(...)
14#undef DEFINE_EVENT
15#define DEFINE_EVENT(evt_class, name, proto, ...) \
16static inline void trace_ ## name(proto) {}
17#endif
18
19#undef TRACE_SYSTEM
20#define TRACE_SYSTEM mac80211
21
22#define MAXNAME 32
23#define LOCAL_ENTRY __array(char, wiphy_name, 32)
24#define LOCAL_ASSIGN strlcpy(__entry->wiphy_name, wiphy_name(local->hw.wiphy), MAXNAME)
25#define LOCAL_PR_FMT "%s"
26#define LOCAL_PR_ARG __entry->wiphy_name
27
28#define STA_ENTRY __array(char, sta_addr, ETH_ALEN)
29#define STA_ASSIGN (sta ? memcpy(__entry->sta_addr, sta->addr, ETH_ALEN) : memset(__entry->sta_addr, 0, ETH_ALEN))
30#define STA_PR_FMT " sta:%pM"
31#define STA_PR_ARG __entry->sta_addr
32
33#define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \
34 __field(bool, p2p) \
35 __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
36#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \
37 __entry->p2p = sdata->vif.p2p; \
38 __assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
39#define VIF_PR_FMT " vif:%s(%d%s)"
40#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
41
42/*
43 * Tracing for driver callbacks.
44 */
45
46DECLARE_EVENT_CLASS(local_only_evt,
47 TP_PROTO(struct ieee80211_local *local),
48 TP_ARGS(local),
49 TP_STRUCT__entry(
50 LOCAL_ENTRY
51 ),
52 TP_fast_assign(
53 LOCAL_ASSIGN;
54 ),
55 TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
56);
57
58DECLARE_EVENT_CLASS(local_sdata_addr_evt,
59 TP_PROTO(struct ieee80211_local *local,
60 struct ieee80211_sub_if_data *sdata),
61 TP_ARGS(local, sdata),
62
63 TP_STRUCT__entry(
64 LOCAL_ENTRY
65 VIF_ENTRY
66 __array(char, addr, 6)
67 ),
68
69 TP_fast_assign(
70 LOCAL_ASSIGN;
71 VIF_ASSIGN;
72 memcpy(__entry->addr, sdata->vif.addr, 6);
73 ),
74
75 TP_printk(
76 LOCAL_PR_FMT VIF_PR_FMT " addr:%pM",
77 LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr
78 )
79);
80
81DECLARE_EVENT_CLASS(local_u32_evt,
82 TP_PROTO(struct ieee80211_local *local, u32 value),
83 TP_ARGS(local, value),
84
85 TP_STRUCT__entry(
86 LOCAL_ENTRY
87 __field(u32, value)
88 ),
89
90 TP_fast_assign(
91 LOCAL_ASSIGN;
92 __entry->value = value;
93 ),
94
95 TP_printk(
96 LOCAL_PR_FMT " value:%d",
97 LOCAL_PR_ARG, __entry->value
98 )
99);
100
101DECLARE_EVENT_CLASS(local_sdata_evt,
102 TP_PROTO(struct ieee80211_local *local,
103 struct ieee80211_sub_if_data *sdata),
104 TP_ARGS(local, sdata),
105
106 TP_STRUCT__entry(
107 LOCAL_ENTRY
108 VIF_ENTRY
109 ),
110
111 TP_fast_assign(
112 LOCAL_ASSIGN;
113 VIF_ASSIGN;
114 ),
115
116 TP_printk(
117 LOCAL_PR_FMT VIF_PR_FMT,
118 LOCAL_PR_ARG, VIF_PR_ARG
119 )
120);
121
122DEFINE_EVENT(local_only_evt, drv_return_void,
123 TP_PROTO(struct ieee80211_local *local),
124 TP_ARGS(local)
125);
126
127TRACE_EVENT(drv_return_int,
128 TP_PROTO(struct ieee80211_local *local, int ret),
129 TP_ARGS(local, ret),
130 TP_STRUCT__entry(
131 LOCAL_ENTRY
132 __field(int, ret)
133 ),
134 TP_fast_assign(
135 LOCAL_ASSIGN;
136 __entry->ret = ret;
137 ),
138 TP_printk(LOCAL_PR_FMT " - %d", LOCAL_PR_ARG, __entry->ret)
139);
140
141TRACE_EVENT(drv_return_bool,
142 TP_PROTO(struct ieee80211_local *local, bool ret),
143 TP_ARGS(local, ret),
144 TP_STRUCT__entry(
145 LOCAL_ENTRY
146 __field(bool, ret)
147 ),
148 TP_fast_assign(
149 LOCAL_ASSIGN;
150 __entry->ret = ret;
151 ),
152 TP_printk(LOCAL_PR_FMT " - %s", LOCAL_PR_ARG, (__entry->ret) ?
153 "true" : "false")
154);
155
156TRACE_EVENT(drv_return_u64,
157 TP_PROTO(struct ieee80211_local *local, u64 ret),
158 TP_ARGS(local, ret),
159 TP_STRUCT__entry(
160 LOCAL_ENTRY
161 __field(u64, ret)
162 ),
163 TP_fast_assign(
164 LOCAL_ASSIGN;
165 __entry->ret = ret;
166 ),
167 TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret)
168);
169
170DEFINE_EVENT(local_only_evt, drv_start,
171 TP_PROTO(struct ieee80211_local *local),
172 TP_ARGS(local)
173);
174
175DEFINE_EVENT(local_only_evt, drv_suspend,
176 TP_PROTO(struct ieee80211_local *local),
177 TP_ARGS(local)
178);
179
180DEFINE_EVENT(local_only_evt, drv_resume,
181 TP_PROTO(struct ieee80211_local *local),
182 TP_ARGS(local)
183);
184
185DEFINE_EVENT(local_only_evt, drv_stop,
186 TP_PROTO(struct ieee80211_local *local),
187 TP_ARGS(local)
188);
189
190DEFINE_EVENT(local_sdata_addr_evt, drv_add_interface,
191 TP_PROTO(struct ieee80211_local *local,
192 struct ieee80211_sub_if_data *sdata),
193 TP_ARGS(local, sdata)
194);
195
196TRACE_EVENT(drv_change_interface,
197 TP_PROTO(struct ieee80211_local *local,
198 struct ieee80211_sub_if_data *sdata,
199 enum nl80211_iftype type, bool p2p),
200
201 TP_ARGS(local, sdata, type, p2p),
202
203 TP_STRUCT__entry(
204 LOCAL_ENTRY
205 VIF_ENTRY
206 __field(u32, new_type)
207 __field(bool, new_p2p)
208 ),
209
210 TP_fast_assign(
211 LOCAL_ASSIGN;
212 VIF_ASSIGN;
213 __entry->new_type = type;
214 __entry->new_p2p = p2p;
215 ),
216
217 TP_printk(
218 LOCAL_PR_FMT VIF_PR_FMT " new type:%d%s",
219 LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type,
220 __entry->new_p2p ? "/p2p" : ""
221 )
222);
223
224DEFINE_EVENT(local_sdata_addr_evt, drv_remove_interface,
225 TP_PROTO(struct ieee80211_local *local,
226 struct ieee80211_sub_if_data *sdata),
227 TP_ARGS(local, sdata)
228);
229
230TRACE_EVENT(drv_config,
231 TP_PROTO(struct ieee80211_local *local,
232 u32 changed),
233
234 TP_ARGS(local, changed),
235
236 TP_STRUCT__entry(
237 LOCAL_ENTRY
238 __field(u32, changed)
239 __field(u32, flags)
240 __field(int, power_level)
241 __field(int, dynamic_ps_timeout)
242 __field(int, max_sleep_period)
243 __field(u16, listen_interval)
244 __field(u8, long_frame_max_tx_count)
245 __field(u8, short_frame_max_tx_count)
246 __field(int, center_freq)
247 __field(int, channel_type)
248 __field(int, smps)
249 ),
250
251 TP_fast_assign(
252 LOCAL_ASSIGN;
253 __entry->changed = changed;
254 __entry->flags = local->hw.conf.flags;
255 __entry->power_level = local->hw.conf.power_level;
256 __entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout;
257 __entry->max_sleep_period = local->hw.conf.max_sleep_period;
258 __entry->listen_interval = local->hw.conf.listen_interval;
259 __entry->long_frame_max_tx_count = local->hw.conf.long_frame_max_tx_count;
260 __entry->short_frame_max_tx_count = local->hw.conf.short_frame_max_tx_count;
261 __entry->center_freq = local->hw.conf.channel->center_freq;
262 __entry->channel_type = local->hw.conf.channel_type;
263 __entry->smps = local->hw.conf.smps_mode;
264 ),
265
266 TP_printk(
267 LOCAL_PR_FMT " ch:%#x freq:%d",
268 LOCAL_PR_ARG, __entry->changed, __entry->center_freq
269 )
270);
271
272TRACE_EVENT(drv_bss_info_changed,
273 TP_PROTO(struct ieee80211_local *local,
274 struct ieee80211_sub_if_data *sdata,
275 struct ieee80211_bss_conf *info,
276 u32 changed),
277
278 TP_ARGS(local, sdata, info, changed),
279
280 TP_STRUCT__entry(
281 LOCAL_ENTRY
282 VIF_ENTRY
283 __field(bool, assoc)
284 __field(u16, aid)
285 __field(bool, cts)
286 __field(bool, shortpre)
287 __field(bool, shortslot)
288 __field(u8, dtimper)
289 __field(u16, bcnint)
290 __field(u16, assoc_cap)
291 __field(u64, timestamp)
292 __field(u32, basic_rates)
293 __field(u32, changed)
294 __field(bool, enable_beacon)
295 __field(u16, ht_operation_mode)
296 ),
297
298 TP_fast_assign(
299 LOCAL_ASSIGN;
300 VIF_ASSIGN;
301 __entry->changed = changed;
302 __entry->aid = info->aid;
303 __entry->assoc = info->assoc;
304 __entry->shortpre = info->use_short_preamble;
305 __entry->cts = info->use_cts_prot;
306 __entry->shortslot = info->use_short_slot;
307 __entry->dtimper = info->dtim_period;
308 __entry->bcnint = info->beacon_int;
309 __entry->assoc_cap = info->assoc_capability;
310 __entry->timestamp = info->timestamp;
311 __entry->basic_rates = info->basic_rates;
312 __entry->enable_beacon = info->enable_beacon;
313 __entry->ht_operation_mode = info->ht_operation_mode;
314 ),
315
316 TP_printk(
317 LOCAL_PR_FMT VIF_PR_FMT " changed:%#x",
318 LOCAL_PR_ARG, VIF_PR_ARG, __entry->changed
319 )
320);
321
322DECLARE_EVENT_CLASS(tx_sync_evt,
323 TP_PROTO(struct ieee80211_local *local,
324 struct ieee80211_sub_if_data *sdata,
325 const u8 *bssid,
326 enum ieee80211_tx_sync_type type),
327 TP_ARGS(local, sdata, bssid, type),
328
329 TP_STRUCT__entry(
330 LOCAL_ENTRY
331 VIF_ENTRY
332 __array(char, bssid, ETH_ALEN)
333 __field(u32, sync_type)
334 ),
335
336 TP_fast_assign(
337 LOCAL_ASSIGN;
338 VIF_ASSIGN;
339 memcpy(__entry->bssid, bssid, ETH_ALEN);
340 __entry->sync_type = type;
341 ),
342
343 TP_printk(
344 LOCAL_PR_FMT VIF_PR_FMT " bssid:%pM type:%d",
345 LOCAL_PR_ARG, VIF_PR_ARG, __entry->bssid, __entry->sync_type
346 )
347);
348
349DEFINE_EVENT(tx_sync_evt, drv_tx_sync,
350 TP_PROTO(struct ieee80211_local *local,
351 struct ieee80211_sub_if_data *sdata,
352 const u8 *bssid,
353 enum ieee80211_tx_sync_type type),
354 TP_ARGS(local, sdata, bssid, type)
355);
356
357DEFINE_EVENT(tx_sync_evt, drv_finish_tx_sync,
358 TP_PROTO(struct ieee80211_local *local,
359 struct ieee80211_sub_if_data *sdata,
360 const u8 *bssid,
361 enum ieee80211_tx_sync_type type),
362 TP_ARGS(local, sdata, bssid, type)
363);
364
365TRACE_EVENT(drv_prepare_multicast,
366 TP_PROTO(struct ieee80211_local *local, int mc_count),
367
368 TP_ARGS(local, mc_count),
369
370 TP_STRUCT__entry(
371 LOCAL_ENTRY
372 __field(int, mc_count)
373 ),
374
375 TP_fast_assign(
376 LOCAL_ASSIGN;
377 __entry->mc_count = mc_count;
378 ),
379
380 TP_printk(
381 LOCAL_PR_FMT " prepare mc (%d)",
382 LOCAL_PR_ARG, __entry->mc_count
383 )
384);
385
386TRACE_EVENT(drv_configure_filter,
387 TP_PROTO(struct ieee80211_local *local,
388 unsigned int changed_flags,
389 unsigned int *total_flags,
390 u64 multicast),
391
392 TP_ARGS(local, changed_flags, total_flags, multicast),
393
394 TP_STRUCT__entry(
395 LOCAL_ENTRY
396 __field(unsigned int, changed)
397 __field(unsigned int, total)
398 __field(u64, multicast)
399 ),
400
401 TP_fast_assign(
402 LOCAL_ASSIGN;
403 __entry->changed = changed_flags;
404 __entry->total = *total_flags;
405 __entry->multicast = multicast;
406 ),
407
408 TP_printk(
409 LOCAL_PR_FMT " changed:%#x total:%#x",
410 LOCAL_PR_ARG, __entry->changed, __entry->total
411 )
412);
413
414TRACE_EVENT(drv_set_tim,
415 TP_PROTO(struct ieee80211_local *local,
416 struct ieee80211_sta *sta, bool set),
417
418 TP_ARGS(local, sta, set),
419
420 TP_STRUCT__entry(
421 LOCAL_ENTRY
422 STA_ENTRY
423 __field(bool, set)
424 ),
425
426 TP_fast_assign(
427 LOCAL_ASSIGN;
428 STA_ASSIGN;
429 __entry->set = set;
430 ),
431
432 TP_printk(
433 LOCAL_PR_FMT STA_PR_FMT " set:%d",
434 LOCAL_PR_ARG, STA_PR_FMT, __entry->set
435 )
436);
437
438TRACE_EVENT(drv_set_key,
439 TP_PROTO(struct ieee80211_local *local,
440 enum set_key_cmd cmd, struct ieee80211_sub_if_data *sdata,
441 struct ieee80211_sta *sta,
442 struct ieee80211_key_conf *key),
443
444 TP_ARGS(local, cmd, sdata, sta, key),
445
446 TP_STRUCT__entry(
447 LOCAL_ENTRY
448 VIF_ENTRY
449 STA_ENTRY
450 __field(u32, cipher)
451 __field(u8, hw_key_idx)
452 __field(u8, flags)
453 __field(s8, keyidx)
454 ),
455
456 TP_fast_assign(
457 LOCAL_ASSIGN;
458 VIF_ASSIGN;
459 STA_ASSIGN;
460 __entry->cipher = key->cipher;
461 __entry->flags = key->flags;
462 __entry->keyidx = key->keyidx;
463 __entry->hw_key_idx = key->hw_key_idx;
464 ),
465
466 TP_printk(
467 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT,
468 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
469 )
470);
471
472TRACE_EVENT(drv_update_tkip_key,
473 TP_PROTO(struct ieee80211_local *local,
474 struct ieee80211_sub_if_data *sdata,
475 struct ieee80211_key_conf *conf,
476 struct ieee80211_sta *sta, u32 iv32),
477
478 TP_ARGS(local, sdata, conf, sta, iv32),
479
480 TP_STRUCT__entry(
481 LOCAL_ENTRY
482 VIF_ENTRY
483 STA_ENTRY
484 __field(u32, iv32)
485 ),
486
487 TP_fast_assign(
488 LOCAL_ASSIGN;
489 VIF_ASSIGN;
490 STA_ASSIGN;
491 __entry->iv32 = iv32;
492 ),
493
494 TP_printk(
495 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " iv32:%#x",
496 LOCAL_PR_ARG,VIF_PR_ARG,STA_PR_ARG, __entry->iv32
497 )
498);
499
500DEFINE_EVENT(local_sdata_evt, drv_hw_scan,
501 TP_PROTO(struct ieee80211_local *local,
502 struct ieee80211_sub_if_data *sdata),
503 TP_ARGS(local, sdata)
504);
505
506DEFINE_EVENT(local_sdata_evt, drv_cancel_hw_scan,
507 TP_PROTO(struct ieee80211_local *local,
508 struct ieee80211_sub_if_data *sdata),
509 TP_ARGS(local, sdata)
510);
511
512DEFINE_EVENT(local_sdata_evt, drv_sched_scan_start,
513 TP_PROTO(struct ieee80211_local *local,
514 struct ieee80211_sub_if_data *sdata),
515 TP_ARGS(local, sdata)
516);
517
518DEFINE_EVENT(local_sdata_evt, drv_sched_scan_stop,
519 TP_PROTO(struct ieee80211_local *local,
520 struct ieee80211_sub_if_data *sdata),
521 TP_ARGS(local, sdata)
522);
523
524DEFINE_EVENT(local_only_evt, drv_sw_scan_start,
525 TP_PROTO(struct ieee80211_local *local),
526 TP_ARGS(local)
527);
528
529DEFINE_EVENT(local_only_evt, drv_sw_scan_complete,
530 TP_PROTO(struct ieee80211_local *local),
531 TP_ARGS(local)
532);
533
534TRACE_EVENT(drv_get_stats,
535 TP_PROTO(struct ieee80211_local *local,
536 struct ieee80211_low_level_stats *stats,
537 int ret),
538
539 TP_ARGS(local, stats, ret),
540
541 TP_STRUCT__entry(
542 LOCAL_ENTRY
543 __field(int, ret)
544 __field(unsigned int, ackfail)
545 __field(unsigned int, rtsfail)
546 __field(unsigned int, fcserr)
547 __field(unsigned int, rtssucc)
548 ),
549
550 TP_fast_assign(
551 LOCAL_ASSIGN;
552 __entry->ret = ret;
553 __entry->ackfail = stats->dot11ACKFailureCount;
554 __entry->rtsfail = stats->dot11RTSFailureCount;
555 __entry->fcserr = stats->dot11FCSErrorCount;
556 __entry->rtssucc = stats->dot11RTSSuccessCount;
557 ),
558
559 TP_printk(
560 LOCAL_PR_FMT " ret:%d",
561 LOCAL_PR_ARG, __entry->ret
562 )
563);
564
565TRACE_EVENT(drv_get_tkip_seq,
566 TP_PROTO(struct ieee80211_local *local,
567 u8 hw_key_idx, u32 *iv32, u16 *iv16),
568
569 TP_ARGS(local, hw_key_idx, iv32, iv16),
570
571 TP_STRUCT__entry(
572 LOCAL_ENTRY
573 __field(u8, hw_key_idx)
574 __field(u32, iv32)
575 __field(u16, iv16)
576 ),
577
578 TP_fast_assign(
579 LOCAL_ASSIGN;
580 __entry->hw_key_idx = hw_key_idx;
581 __entry->iv32 = *iv32;
582 __entry->iv16 = *iv16;
583 ),
584
585 TP_printk(
586 LOCAL_PR_FMT, LOCAL_PR_ARG
587 )
588);
589
590DEFINE_EVENT(local_u32_evt, drv_set_frag_threshold,
591 TP_PROTO(struct ieee80211_local *local, u32 value),
592 TP_ARGS(local, value)
593);
594
595DEFINE_EVENT(local_u32_evt, drv_set_rts_threshold,
596 TP_PROTO(struct ieee80211_local *local, u32 value),
597 TP_ARGS(local, value)
598);
599
600TRACE_EVENT(drv_set_coverage_class,
601 TP_PROTO(struct ieee80211_local *local, u8 value),
602
603 TP_ARGS(local, value),
604
605 TP_STRUCT__entry(
606 LOCAL_ENTRY
607 __field(u8, value)
608 ),
609
610 TP_fast_assign(
611 LOCAL_ASSIGN;
612 __entry->value = value;
613 ),
614
615 TP_printk(
616 LOCAL_PR_FMT " value:%d",
617 LOCAL_PR_ARG, __entry->value
618 )
619);
620
621TRACE_EVENT(drv_sta_notify,
622 TP_PROTO(struct ieee80211_local *local,
623 struct ieee80211_sub_if_data *sdata,
624 enum sta_notify_cmd cmd,
625 struct ieee80211_sta *sta),
626
627 TP_ARGS(local, sdata, cmd, sta),
628
629 TP_STRUCT__entry(
630 LOCAL_ENTRY
631 VIF_ENTRY
632 STA_ENTRY
633 __field(u32, cmd)
634 ),
635
636 TP_fast_assign(
637 LOCAL_ASSIGN;
638 VIF_ASSIGN;
639 STA_ASSIGN;
640 __entry->cmd = cmd;
641 ),
642
643 TP_printk(
644 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " cmd:%d",
645 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->cmd
646 )
647);
648
649TRACE_EVENT(drv_sta_add,
650 TP_PROTO(struct ieee80211_local *local,
651 struct ieee80211_sub_if_data *sdata,
652 struct ieee80211_sta *sta),
653
654 TP_ARGS(local, sdata, sta),
655
656 TP_STRUCT__entry(
657 LOCAL_ENTRY
658 VIF_ENTRY
659 STA_ENTRY
660 ),
661
662 TP_fast_assign(
663 LOCAL_ASSIGN;
664 VIF_ASSIGN;
665 STA_ASSIGN;
666 ),
667
668 TP_printk(
669 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT,
670 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
671 )
672);
673
674TRACE_EVENT(drv_sta_remove,
675 TP_PROTO(struct ieee80211_local *local,
676 struct ieee80211_sub_if_data *sdata,
677 struct ieee80211_sta *sta),
678
679 TP_ARGS(local, sdata, sta),
680
681 TP_STRUCT__entry(
682 LOCAL_ENTRY
683 VIF_ENTRY
684 STA_ENTRY
685 ),
686
687 TP_fast_assign(
688 LOCAL_ASSIGN;
689 VIF_ASSIGN;
690 STA_ASSIGN;
691 ),
692
693 TP_printk(
694 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT,
695 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
696 )
697);
698
699TRACE_EVENT(drv_conf_tx,
700 TP_PROTO(struct ieee80211_local *local, u16 queue,
701 const struct ieee80211_tx_queue_params *params),
702
703 TP_ARGS(local, queue, params),
704
705 TP_STRUCT__entry(
706 LOCAL_ENTRY
707 __field(u16, queue)
708 __field(u16, txop)
709 __field(u16, cw_min)
710 __field(u16, cw_max)
711 __field(u8, aifs)
712 ),
713
714 TP_fast_assign(
715 LOCAL_ASSIGN;
716 __entry->queue = queue;
717 __entry->txop = params->txop;
718 __entry->cw_max = params->cw_max;
719 __entry->cw_min = params->cw_min;
720 __entry->aifs = params->aifs;
721 ),
722
723 TP_printk(
724 LOCAL_PR_FMT " queue:%d",
725 LOCAL_PR_ARG, __entry->queue
726 )
727);
728
729DEFINE_EVENT(local_only_evt, drv_get_tsf,
730 TP_PROTO(struct ieee80211_local *local),
731 TP_ARGS(local)
732);
733
734TRACE_EVENT(drv_set_tsf,
735 TP_PROTO(struct ieee80211_local *local, u64 tsf),
736
737 TP_ARGS(local, tsf),
738
739 TP_STRUCT__entry(
740 LOCAL_ENTRY
741 __field(u64, tsf)
742 ),
743
744 TP_fast_assign(
745 LOCAL_ASSIGN;
746 __entry->tsf = tsf;
747 ),
748
749 TP_printk(
750 LOCAL_PR_FMT " tsf:%llu",
751 LOCAL_PR_ARG, (unsigned long long)__entry->tsf
752 )
753);
754
755DEFINE_EVENT(local_only_evt, drv_reset_tsf,
756 TP_PROTO(struct ieee80211_local *local),
757 TP_ARGS(local)
758);
759
760DEFINE_EVENT(local_only_evt, drv_tx_last_beacon,
761 TP_PROTO(struct ieee80211_local *local),
762 TP_ARGS(local)
763);
764
765TRACE_EVENT(drv_ampdu_action,
766 TP_PROTO(struct ieee80211_local *local,
767 struct ieee80211_sub_if_data *sdata,
768 enum ieee80211_ampdu_mlme_action action,
769 struct ieee80211_sta *sta, u16 tid,
770 u16 *ssn, u8 buf_size),
771
772 TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size),
773
774 TP_STRUCT__entry(
775 LOCAL_ENTRY
776 STA_ENTRY
777 __field(u32, action)
778 __field(u16, tid)
779 __field(u16, ssn)
780 __field(u8, buf_size)
781 VIF_ENTRY
782 ),
783
784 TP_fast_assign(
785 LOCAL_ASSIGN;
786 VIF_ASSIGN;
787 STA_ASSIGN;
788 __entry->action = action;
789 __entry->tid = tid;
790 __entry->ssn = ssn ? *ssn : 0;
791 __entry->buf_size = buf_size;
792 ),
793
794 TP_printk(
795 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d",
796 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action,
797 __entry->tid, __entry->buf_size
798 )
799);
800
801TRACE_EVENT(drv_get_survey,
802 TP_PROTO(struct ieee80211_local *local, int idx,
803 struct survey_info *survey),
804
805 TP_ARGS(local, idx, survey),
806
807 TP_STRUCT__entry(
808 LOCAL_ENTRY
809 __field(int, idx)
810 ),
811
812 TP_fast_assign(
813 LOCAL_ASSIGN;
814 __entry->idx = idx;
815 ),
816
817 TP_printk(
818 LOCAL_PR_FMT " idx:%d",
819 LOCAL_PR_ARG, __entry->idx
820 )
821);
822
823TRACE_EVENT(drv_flush,
824 TP_PROTO(struct ieee80211_local *local, bool drop),
825
826 TP_ARGS(local, drop),
827
828 TP_STRUCT__entry(
829 LOCAL_ENTRY
830 __field(bool, drop)
831 ),
832
833 TP_fast_assign(
834 LOCAL_ASSIGN;
835 __entry->drop = drop;
836 ),
837
838 TP_printk(
839 LOCAL_PR_FMT " drop:%d",
840 LOCAL_PR_ARG, __entry->drop
841 )
842);
843
844TRACE_EVENT(drv_channel_switch,
845 TP_PROTO(struct ieee80211_local *local,
846 struct ieee80211_channel_switch *ch_switch),
847
848 TP_ARGS(local, ch_switch),
849
850 TP_STRUCT__entry(
851 LOCAL_ENTRY
852 __field(u64, timestamp)
853 __field(bool, block_tx)
854 __field(u16, freq)
855 __field(u8, count)
856 ),
857
858 TP_fast_assign(
859 LOCAL_ASSIGN;
860 __entry->timestamp = ch_switch->timestamp;
861 __entry->block_tx = ch_switch->block_tx;
862 __entry->freq = ch_switch->channel->center_freq;
863 __entry->count = ch_switch->count;
864 ),
865
866 TP_printk(
867 LOCAL_PR_FMT " new freq:%u count:%d",
868 LOCAL_PR_ARG, __entry->freq, __entry->count
869 )
870);
871
872TRACE_EVENT(drv_set_antenna,
873 TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret),
874
875 TP_ARGS(local, tx_ant, rx_ant, ret),
876
877 TP_STRUCT__entry(
878 LOCAL_ENTRY
879 __field(u32, tx_ant)
880 __field(u32, rx_ant)
881 __field(int, ret)
882 ),
883
884 TP_fast_assign(
885 LOCAL_ASSIGN;
886 __entry->tx_ant = tx_ant;
887 __entry->rx_ant = rx_ant;
888 __entry->ret = ret;
889 ),
890
891 TP_printk(
892 LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d",
893 LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret
894 )
895);
896
897TRACE_EVENT(drv_get_antenna,
898 TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret),
899
900 TP_ARGS(local, tx_ant, rx_ant, ret),
901
902 TP_STRUCT__entry(
903 LOCAL_ENTRY
904 __field(u32, tx_ant)
905 __field(u32, rx_ant)
906 __field(int, ret)
907 ),
908
909 TP_fast_assign(
910 LOCAL_ASSIGN;
911 __entry->tx_ant = tx_ant;
912 __entry->rx_ant = rx_ant;
913 __entry->ret = ret;
914 ),
915
916 TP_printk(
917 LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d",
918 LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret
919 )
920);
921
922TRACE_EVENT(drv_remain_on_channel,
923 TP_PROTO(struct ieee80211_local *local, struct ieee80211_channel *chan,
924 enum nl80211_channel_type chantype, unsigned int duration),
925
926 TP_ARGS(local, chan, chantype, duration),
927
928 TP_STRUCT__entry(
929 LOCAL_ENTRY
930 __field(int, center_freq)
931 __field(int, channel_type)
932 __field(unsigned int, duration)
933 ),
934
935 TP_fast_assign(
936 LOCAL_ASSIGN;
937 __entry->center_freq = chan->center_freq;
938 __entry->channel_type = chantype;
939 __entry->duration = duration;
940 ),
941
942 TP_printk(
943 LOCAL_PR_FMT " freq:%dMHz duration:%dms",
944 LOCAL_PR_ARG, __entry->center_freq, __entry->duration
945 )
946);
947
948DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel,
949 TP_PROTO(struct ieee80211_local *local),
950 TP_ARGS(local)
951);
952
953TRACE_EVENT(drv_offchannel_tx,
954 TP_PROTO(struct ieee80211_local *local, struct sk_buff *skb,
955 struct ieee80211_channel *chan,
956 enum nl80211_channel_type channel_type,
957 unsigned int wait),
958
959 TP_ARGS(local, skb, chan, channel_type, wait),
960
961 TP_STRUCT__entry(
962 LOCAL_ENTRY
963 __field(int, center_freq)
964 __field(int, channel_type)
965 __field(unsigned int, wait)
966 ),
967
968 TP_fast_assign(
969 LOCAL_ASSIGN;
970 __entry->center_freq = chan->center_freq;
971 __entry->channel_type = channel_type;
972 __entry->wait = wait;
973 ),
974
975 TP_printk(
976 LOCAL_PR_FMT " freq:%dMHz, wait:%dms",
977 LOCAL_PR_ARG, __entry->center_freq, __entry->wait
978 )
979);
980
981TRACE_EVENT(drv_set_ringparam,
982 TP_PROTO(struct ieee80211_local *local, u32 tx, u32 rx),
983
984 TP_ARGS(local, tx, rx),
985
986 TP_STRUCT__entry(
987 LOCAL_ENTRY
988 __field(u32, tx)
989 __field(u32, rx)
990 ),
991
992 TP_fast_assign(
993 LOCAL_ASSIGN;
994 __entry->tx = tx;
995 __entry->rx = rx;
996 ),
997
998 TP_printk(
999 LOCAL_PR_FMT " tx:%d rx %d",
1000 LOCAL_PR_ARG, __entry->tx, __entry->rx
1001 )
1002);
1003
1004TRACE_EVENT(drv_get_ringparam,
1005 TP_PROTO(struct ieee80211_local *local, u32 *tx, u32 *tx_max,
1006 u32 *rx, u32 *rx_max),
1007
1008 TP_ARGS(local, tx, tx_max, rx, rx_max),
1009
1010 TP_STRUCT__entry(
1011 LOCAL_ENTRY
1012 __field(u32, tx)
1013 __field(u32, tx_max)
1014 __field(u32, rx)
1015 __field(u32, rx_max)
1016 ),
1017
1018 TP_fast_assign(
1019 LOCAL_ASSIGN;
1020 __entry->tx = *tx;
1021 __entry->tx_max = *tx_max;
1022 __entry->rx = *rx;
1023 __entry->rx_max = *rx_max;
1024 ),
1025
1026 TP_printk(
1027 LOCAL_PR_FMT " tx:%d tx_max %d rx %d rx_max %d",
1028 LOCAL_PR_ARG,
1029 __entry->tx, __entry->tx_max, __entry->rx, __entry->rx_max
1030 )
1031);
1032
1033DEFINE_EVENT(local_only_evt, drv_tx_frames_pending,
1034 TP_PROTO(struct ieee80211_local *local),
1035 TP_ARGS(local)
1036);
1037
1038DEFINE_EVENT(local_only_evt, drv_offchannel_tx_cancel_wait,
1039 TP_PROTO(struct ieee80211_local *local),
1040 TP_ARGS(local)
1041);
1042
1043TRACE_EVENT(drv_set_bitrate_mask,
1044 TP_PROTO(struct ieee80211_local *local,
1045 struct ieee80211_sub_if_data *sdata,
1046 const struct cfg80211_bitrate_mask *mask),
1047
1048 TP_ARGS(local, sdata, mask),
1049
1050 TP_STRUCT__entry(
1051 LOCAL_ENTRY
1052 VIF_ENTRY
1053 __field(u32, legacy_2g)
1054 __field(u32, legacy_5g)
1055 ),
1056
1057 TP_fast_assign(
1058 LOCAL_ASSIGN;
1059 VIF_ASSIGN;
1060 __entry->legacy_2g = mask->control[IEEE80211_BAND_2GHZ].legacy;
1061 __entry->legacy_5g = mask->control[IEEE80211_BAND_5GHZ].legacy;
1062 ),
1063
1064 TP_printk(
1065 LOCAL_PR_FMT VIF_PR_FMT " 2G Mask:0x%x 5G Mask:0x%x",
1066 LOCAL_PR_ARG, VIF_PR_ARG, __entry->legacy_2g, __entry->legacy_5g
1067 )
1068);
1069
1070TRACE_EVENT(drv_set_rekey_data,
1071 TP_PROTO(struct ieee80211_local *local,
1072 struct ieee80211_sub_if_data *sdata,
1073 struct cfg80211_gtk_rekey_data *data),
1074
1075 TP_ARGS(local, sdata, data),
1076
1077 TP_STRUCT__entry(
1078 LOCAL_ENTRY
1079 VIF_ENTRY
1080 __array(u8, kek, NL80211_KEK_LEN)
1081 __array(u8, kck, NL80211_KCK_LEN)
1082 __array(u8, replay_ctr, NL80211_REPLAY_CTR_LEN)
1083 ),
1084
1085 TP_fast_assign(
1086 LOCAL_ASSIGN;
1087 VIF_ASSIGN;
1088 memcpy(__entry->kek, data->kek, NL80211_KEK_LEN);
1089 memcpy(__entry->kck, data->kck, NL80211_KCK_LEN);
1090 memcpy(__entry->replay_ctr, data->replay_ctr,
1091 NL80211_REPLAY_CTR_LEN);
1092 ),
1093
1094 TP_printk(LOCAL_PR_FMT VIF_PR_FMT,
1095 LOCAL_PR_ARG, VIF_PR_ARG)
1096);
1097
1098TRACE_EVENT(drv_rssi_callback,
1099 TP_PROTO(struct ieee80211_local *local,
1100 enum ieee80211_rssi_event rssi_event),
1101
1102 TP_ARGS(local, rssi_event),
1103
1104 TP_STRUCT__entry(
1105 LOCAL_ENTRY
1106 __field(u32, rssi_event)
1107 ),
1108
1109 TP_fast_assign(
1110 LOCAL_ASSIGN;
1111 __entry->rssi_event = rssi_event;
1112 ),
1113
1114 TP_printk(
1115 LOCAL_PR_FMT " rssi_event:%d",
1116 LOCAL_PR_ARG, __entry->rssi_event
1117 )
1118);
1119
1120/*
1121 * Tracing for API calls that drivers call.
1122 */
1123
1124TRACE_EVENT(api_start_tx_ba_session,
1125 TP_PROTO(struct ieee80211_sta *sta, u16 tid),
1126
1127 TP_ARGS(sta, tid),
1128
1129 TP_STRUCT__entry(
1130 STA_ENTRY
1131 __field(u16, tid)
1132 ),
1133
1134 TP_fast_assign(
1135 STA_ASSIGN;
1136 __entry->tid = tid;
1137 ),
1138
1139 TP_printk(
1140 STA_PR_FMT " tid:%d",
1141 STA_PR_ARG, __entry->tid
1142 )
1143);
1144
1145TRACE_EVENT(api_start_tx_ba_cb,
1146 TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
1147
1148 TP_ARGS(sdata, ra, tid),
1149
1150 TP_STRUCT__entry(
1151 VIF_ENTRY
1152 __array(u8, ra, ETH_ALEN)
1153 __field(u16, tid)
1154 ),
1155
1156 TP_fast_assign(
1157 VIF_ASSIGN;
1158 memcpy(__entry->ra, ra, ETH_ALEN);
1159 __entry->tid = tid;
1160 ),
1161
1162 TP_printk(
1163 VIF_PR_FMT " ra:%pM tid:%d",
1164 VIF_PR_ARG, __entry->ra, __entry->tid
1165 )
1166);
1167
1168TRACE_EVENT(api_stop_tx_ba_session,
1169 TP_PROTO(struct ieee80211_sta *sta, u16 tid),
1170
1171 TP_ARGS(sta, tid),
1172
1173 TP_STRUCT__entry(
1174 STA_ENTRY
1175 __field(u16, tid)
1176 ),
1177
1178 TP_fast_assign(
1179 STA_ASSIGN;
1180 __entry->tid = tid;
1181 ),
1182
1183 TP_printk(
1184 STA_PR_FMT " tid:%d",
1185 STA_PR_ARG, __entry->tid
1186 )
1187);
1188
1189TRACE_EVENT(api_stop_tx_ba_cb,
1190 TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
1191
1192 TP_ARGS(sdata, ra, tid),
1193
1194 TP_STRUCT__entry(
1195 VIF_ENTRY
1196 __array(u8, ra, ETH_ALEN)
1197 __field(u16, tid)
1198 ),
1199
1200 TP_fast_assign(
1201 VIF_ASSIGN;
1202 memcpy(__entry->ra, ra, ETH_ALEN);
1203 __entry->tid = tid;
1204 ),
1205
1206 TP_printk(
1207 VIF_PR_FMT " ra:%pM tid:%d",
1208 VIF_PR_ARG, __entry->ra, __entry->tid
1209 )
1210);
1211
1212DEFINE_EVENT(local_only_evt, api_restart_hw,
1213 TP_PROTO(struct ieee80211_local *local),
1214 TP_ARGS(local)
1215);
1216
1217TRACE_EVENT(api_beacon_loss,
1218 TP_PROTO(struct ieee80211_sub_if_data *sdata),
1219
1220 TP_ARGS(sdata),
1221
1222 TP_STRUCT__entry(
1223 VIF_ENTRY
1224 ),
1225
1226 TP_fast_assign(
1227 VIF_ASSIGN;
1228 ),
1229
1230 TP_printk(
1231 VIF_PR_FMT,
1232 VIF_PR_ARG
1233 )
1234);
1235
1236TRACE_EVENT(api_connection_loss,
1237 TP_PROTO(struct ieee80211_sub_if_data *sdata),
1238
1239 TP_ARGS(sdata),
1240
1241 TP_STRUCT__entry(
1242 VIF_ENTRY
1243 ),
1244
1245 TP_fast_assign(
1246 VIF_ASSIGN;
1247 ),
1248
1249 TP_printk(
1250 VIF_PR_FMT,
1251 VIF_PR_ARG
1252 )
1253);
1254
1255TRACE_EVENT(api_cqm_rssi_notify,
1256 TP_PROTO(struct ieee80211_sub_if_data *sdata,
1257 enum nl80211_cqm_rssi_threshold_event rssi_event),
1258
1259 TP_ARGS(sdata, rssi_event),
1260
1261 TP_STRUCT__entry(
1262 VIF_ENTRY
1263 __field(u32, rssi_event)
1264 ),
1265
1266 TP_fast_assign(
1267 VIF_ASSIGN;
1268 __entry->rssi_event = rssi_event;
1269 ),
1270
1271 TP_printk(
1272 VIF_PR_FMT " event:%d",
1273 VIF_PR_ARG, __entry->rssi_event
1274 )
1275);
1276
1277TRACE_EVENT(api_scan_completed,
1278 TP_PROTO(struct ieee80211_local *local, bool aborted),
1279
1280 TP_ARGS(local, aborted),
1281
1282 TP_STRUCT__entry(
1283 LOCAL_ENTRY
1284 __field(bool, aborted)
1285 ),
1286
1287 TP_fast_assign(
1288 LOCAL_ASSIGN;
1289 __entry->aborted = aborted;
1290 ),
1291
1292 TP_printk(
1293 LOCAL_PR_FMT " aborted:%d",
1294 LOCAL_PR_ARG, __entry->aborted
1295 )
1296);
1297
1298TRACE_EVENT(api_sched_scan_results,
1299 TP_PROTO(struct ieee80211_local *local),
1300
1301 TP_ARGS(local),
1302
1303 TP_STRUCT__entry(
1304 LOCAL_ENTRY
1305 ),
1306
1307 TP_fast_assign(
1308 LOCAL_ASSIGN;
1309 ),
1310
1311 TP_printk(
1312 LOCAL_PR_FMT, LOCAL_PR_ARG
1313 )
1314);
1315
1316TRACE_EVENT(api_sched_scan_stopped,
1317 TP_PROTO(struct ieee80211_local *local),
1318
1319 TP_ARGS(local),
1320
1321 TP_STRUCT__entry(
1322 LOCAL_ENTRY
1323 ),
1324
1325 TP_fast_assign(
1326 LOCAL_ASSIGN;
1327 ),
1328
1329 TP_printk(
1330 LOCAL_PR_FMT, LOCAL_PR_ARG
1331 )
1332);
1333
1334TRACE_EVENT(api_sta_block_awake,
1335 TP_PROTO(struct ieee80211_local *local,
1336 struct ieee80211_sta *sta, bool block),
1337
1338 TP_ARGS(local, sta, block),
1339
1340 TP_STRUCT__entry(
1341 LOCAL_ENTRY
1342 STA_ENTRY
1343 __field(bool, block)
1344 ),
1345
1346 TP_fast_assign(
1347 LOCAL_ASSIGN;
1348 STA_ASSIGN;
1349 __entry->block = block;
1350 ),
1351
1352 TP_printk(
1353 LOCAL_PR_FMT STA_PR_FMT " block:%d",
1354 LOCAL_PR_ARG, STA_PR_FMT, __entry->block
1355 )
1356);
1357
1358TRACE_EVENT(api_chswitch_done,
1359 TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success),
1360
1361 TP_ARGS(sdata, success),
1362
1363 TP_STRUCT__entry(
1364 VIF_ENTRY
1365 __field(bool, success)
1366 ),
1367
1368 TP_fast_assign(
1369 VIF_ASSIGN;
1370 __entry->success = success;
1371 ),
1372
1373 TP_printk(
1374 VIF_PR_FMT " success=%d",
1375 VIF_PR_ARG, __entry->success
1376 )
1377);
1378
1379DEFINE_EVENT(local_only_evt, api_ready_on_channel,
1380 TP_PROTO(struct ieee80211_local *local),
1381 TP_ARGS(local)
1382);
1383
1384DEFINE_EVENT(local_only_evt, api_remain_on_channel_expired,
1385 TP_PROTO(struct ieee80211_local *local),
1386 TP_ARGS(local)
1387);
1388
1389TRACE_EVENT(api_gtk_rekey_notify,
1390 TP_PROTO(struct ieee80211_sub_if_data *sdata,
1391 const u8 *bssid, const u8 *replay_ctr),
1392
1393 TP_ARGS(sdata, bssid, replay_ctr),
1394
1395 TP_STRUCT__entry(
1396 VIF_ENTRY
1397 __array(u8, bssid, ETH_ALEN)
1398 __array(u8, replay_ctr, NL80211_REPLAY_CTR_LEN)
1399 ),
1400
1401 TP_fast_assign(
1402 VIF_ASSIGN;
1403 memcpy(__entry->bssid, bssid, ETH_ALEN);
1404 memcpy(__entry->replay_ctr, replay_ctr, NL80211_REPLAY_CTR_LEN);
1405 ),
1406
1407 TP_printk(VIF_PR_FMT, VIF_PR_ARG)
1408);
1409
1410TRACE_EVENT(api_enable_rssi_reports,
1411 TP_PROTO(struct ieee80211_sub_if_data *sdata,
1412 int rssi_min_thold, int rssi_max_thold),
1413
1414 TP_ARGS(sdata, rssi_min_thold, rssi_max_thold),
1415
1416 TP_STRUCT__entry(
1417 VIF_ENTRY
1418 __field(int, rssi_min_thold)
1419 __field(int, rssi_max_thold)
1420 ),
1421
1422 TP_fast_assign(
1423 VIF_ASSIGN;
1424 __entry->rssi_min_thold = rssi_min_thold;
1425 __entry->rssi_max_thold = rssi_max_thold;
1426 ),
1427
1428 TP_printk(
1429 VIF_PR_FMT " rssi_min_thold =%d, rssi_max_thold = %d",
1430 VIF_PR_ARG, __entry->rssi_min_thold, __entry->rssi_max_thold
1431 )
1432);
1433
1434/*
1435 * Tracing for internal functions
1436 * (which may also be called in response to driver calls)
1437 */
1438
1439TRACE_EVENT(wake_queue,
1440 TP_PROTO(struct ieee80211_local *local, u16 queue,
1441 enum queue_stop_reason reason),
1442
1443 TP_ARGS(local, queue, reason),
1444
1445 TP_STRUCT__entry(
1446 LOCAL_ENTRY
1447 __field(u16, queue)
1448 __field(u32, reason)
1449 ),
1450
1451 TP_fast_assign(
1452 LOCAL_ASSIGN;
1453 __entry->queue = queue;
1454 __entry->reason = reason;
1455 ),
1456
1457 TP_printk(
1458 LOCAL_PR_FMT " queue:%d, reason:%d",
1459 LOCAL_PR_ARG, __entry->queue, __entry->reason
1460 )
1461);
1462
1463TRACE_EVENT(stop_queue,
1464 TP_PROTO(struct ieee80211_local *local, u16 queue,
1465 enum queue_stop_reason reason),
1466
1467 TP_ARGS(local, queue, reason),
1468
1469 TP_STRUCT__entry(
1470 LOCAL_ENTRY
1471 __field(u16, queue)
1472 __field(u32, reason)
1473 ),
1474
1475 TP_fast_assign(
1476 LOCAL_ASSIGN;
1477 __entry->queue = queue;
1478 __entry->reason = reason;
1479 ),
1480
1481 TP_printk(
1482 LOCAL_PR_FMT " queue:%d, reason:%d",
1483 LOCAL_PR_ARG, __entry->queue, __entry->reason
1484 )
1485);
1486#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
1487
1488#undef TRACE_INCLUDE_PATH
1489#define TRACE_INCLUDE_PATH .
1490#undef TRACE_INCLUDE_FILE
1491#define TRACE_INCLUDE_FILE driver-trace
1492#include <trace/define_trace.h>
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
new file mode 100644
index 00000000000..7737f204d3f
--- /dev/null
+++ b/net/mac80211/work.c
@@ -0,0 +1,1302 @@
1/*
2 * mac80211 work implementation
3 *
4 * Copyright 2003-2008, Jouni Malinen <j@w1.fi>
5 * Copyright 2004, Instant802 Networks, Inc.
6 * Copyright 2005, Devicescape Software, Inc.
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15
16#include <linux/delay.h>
17#include <linux/if_ether.h>
18#include <linux/skbuff.h>
19#include <linux/if_arp.h>
20#include <linux/etherdevice.h>
21#include <linux/crc32.h>
22#include <linux/slab.h>
23#include <net/mac80211.h>
24#include <asm/unaligned.h>
25
26#include "ieee80211_i.h"
27#include "rate.h"
28#include "driver-ops.h"
29
30#define IEEE80211_AUTH_TIMEOUT (HZ / 5)
31#define IEEE80211_AUTH_MAX_TRIES 3
32#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
33#define IEEE80211_ASSOC_MAX_TRIES 3
34
35enum work_action {
36 WORK_ACT_MISMATCH,
37 WORK_ACT_NONE,
38 WORK_ACT_TIMEOUT,
39 WORK_ACT_DONE,
40};
41
42
43/* utils */
44static inline void ASSERT_WORK_MTX(struct ieee80211_local *local)
45{
46 lockdep_assert_held(&local->mtx);
47}
48
49/*
50 * We can have multiple work items (and connection probing)
51 * scheduling this timer, but we need to take care to only
52 * reschedule it when it should fire _earlier_ than it was
53 * asked for before, or if it's not pending right now. This
54 * function ensures that. Note that it then is required to
55 * run this function for all timeouts after the first one
56 * has happened -- the work that runs from this timer will
57 * do that.
58 */
59static void run_again(struct ieee80211_local *local,
60 unsigned long timeout)
61{
62 ASSERT_WORK_MTX(local);
63
64 if (!timer_pending(&local->work_timer) ||
65 time_before(timeout, local->work_timer.expires))
66 mod_timer(&local->work_timer, timeout);
67}
68
69void free_work(struct ieee80211_work *wk)
70{
71 kfree_rcu(wk, rcu_head);
72}
73
74static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len,
75 struct ieee80211_supported_band *sband,
76 u32 *rates)
77{
78 int i, j, count;
79 *rates = 0;
80 count = 0;
81 for (i = 0; i < supp_rates_len; i++) {
82 int rate = (supp_rates[i] & 0x7F) * 5;
83
84 for (j = 0; j < sband->n_bitrates; j++)
85 if (sband->bitrates[j].bitrate == rate) {
86 *rates |= BIT(j);
87 count++;
88 break;
89 }
90 }
91
92 return count;
93}
94
95/* frame sending functions */
96
97static void ieee80211_add_ht_ie(struct sk_buff *skb, const u8 *ht_info_ie,
98 struct ieee80211_supported_band *sband,
99 struct ieee80211_channel *channel,
100 enum ieee80211_smps_mode smps)
101{
102 struct ieee80211_ht_info *ht_info;
103 u8 *pos;
104 u32 flags = channel->flags;
105 u16 cap = sband->ht_cap.cap;
106 __le16 tmp;
107
108 if (!sband->ht_cap.ht_supported)
109 return;
110
111 if (!ht_info_ie)
112 return;
113
114 if (ht_info_ie[1] < sizeof(struct ieee80211_ht_info))
115 return;
116
117 ht_info = (struct ieee80211_ht_info *)(ht_info_ie + 2);
118
119 /* determine capability flags */
120
121 switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) {
122 case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
123 if (flags & IEEE80211_CHAN_NO_HT40PLUS) {
124 cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
125 cap &= ~IEEE80211_HT_CAP_SGI_40;
126 }
127 break;
128 case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
129 if (flags & IEEE80211_CHAN_NO_HT40MINUS) {
130 cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
131 cap &= ~IEEE80211_HT_CAP_SGI_40;
132 }
133 break;
134 }
135
136 /* set SM PS mode properly */
137 cap &= ~IEEE80211_HT_CAP_SM_PS;
138 switch (smps) {
139 case IEEE80211_SMPS_AUTOMATIC:
140 case IEEE80211_SMPS_NUM_MODES:
141 WARN_ON(1);
142 case IEEE80211_SMPS_OFF:
143 cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
144 IEEE80211_HT_CAP_SM_PS_SHIFT;
145 break;
146 case IEEE80211_SMPS_STATIC:
147 cap |= WLAN_HT_CAP_SM_PS_STATIC <<
148 IEEE80211_HT_CAP_SM_PS_SHIFT;
149 break;
150 case IEEE80211_SMPS_DYNAMIC:
151 cap |= WLAN_HT_CAP_SM_PS_DYNAMIC <<
152 IEEE80211_HT_CAP_SM_PS_SHIFT;
153 break;
154 }
155
156 /* reserve and fill IE */
157
158 pos = skb_put(skb, sizeof(struct ieee80211_ht_cap) + 2);
159 *pos++ = WLAN_EID_HT_CAPABILITY;
160 *pos++ = sizeof(struct ieee80211_ht_cap);
161 memset(pos, 0, sizeof(struct ieee80211_ht_cap));
162
163 /* capability flags */
164 tmp = cpu_to_le16(cap);
165 memcpy(pos, &tmp, sizeof(u16));
166 pos += sizeof(u16);
167
168 /* AMPDU parameters */
169 *pos++ = sband->ht_cap.ampdu_factor |
170 (sband->ht_cap.ampdu_density <<
171 IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT);
172
173 /* MCS set */
174 memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
175 pos += sizeof(sband->ht_cap.mcs);
176
177 /* extended capabilities */
178 pos += sizeof(__le16);
179
180 /* BF capabilities */
181 pos += sizeof(__le32);
182
183 /* antenna selection */
184 pos += sizeof(u8);
185}
186
187static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
188 struct ieee80211_work *wk)
189{
190 struct ieee80211_local *local = sdata->local;
191 struct sk_buff *skb;
192 struct ieee80211_mgmt *mgmt;
193 u8 *pos, qos_info;
194 size_t offset = 0, noffset;
195 int i, count, rates_len, supp_rates_len;
196 u16 capab;
197 struct ieee80211_supported_band *sband;
198 u32 rates = 0;
199
200 sband = local->hw.wiphy->bands[wk->chan->band];
201
202 if (wk->assoc.supp_rates_len) {
203 /*
204 * Get all rates supported by the device and the AP as
205 * some APs don't like getting a superset of their rates
206 * in the association request (e.g. D-Link DAP 1353 in
207 * b-only mode)...
208 */
209 rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates,
210 wk->assoc.supp_rates_len,
211 sband, &rates);
212 } else {
213 /*
214 * In case AP not provide any supported rates information
215 * before association, we send information element(s) with
216 * all rates that we support.
217 */
218 rates = ~0;
219 rates_len = sband->n_bitrates;
220 }
221
222 skb = alloc_skb(local->hw.extra_tx_headroom +
223 sizeof(*mgmt) + /* bit too much but doesn't matter */
224 2 + wk->assoc.ssid_len + /* SSID */
225 4 + rates_len + /* (extended) rates */
226 4 + /* power capability */
227 2 + 2 * sband->n_channels + /* supported channels */
228 2 + sizeof(struct ieee80211_ht_cap) + /* HT */
229 wk->ie_len + /* extra IEs */
230 9, /* WMM */
231 GFP_KERNEL);
232 if (!skb) {
233 printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
234 "frame\n", sdata->name);
235 return;
236 }
237 skb_reserve(skb, local->hw.extra_tx_headroom);
238
239 capab = WLAN_CAPABILITY_ESS;
240
241 if (sband->band == IEEE80211_BAND_2GHZ) {
242 if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
243 capab |= WLAN_CAPABILITY_SHORT_SLOT_TIME;
244 if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE))
245 capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
246 }
247
248 if (wk->assoc.capability & WLAN_CAPABILITY_PRIVACY)
249 capab |= WLAN_CAPABILITY_PRIVACY;
250
251 if ((wk->assoc.capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
252 (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
253 capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
254
255 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
256 memset(mgmt, 0, 24);
257 memcpy(mgmt->da, wk->filter_ta, ETH_ALEN);
258 memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
259 memcpy(mgmt->bssid, wk->filter_ta, ETH_ALEN);
260
261 if (!is_zero_ether_addr(wk->assoc.prev_bssid)) {
262 skb_put(skb, 10);
263 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
264 IEEE80211_STYPE_REASSOC_REQ);
265 mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab);
266 mgmt->u.reassoc_req.listen_interval =
267 cpu_to_le16(local->hw.conf.listen_interval);
268 memcpy(mgmt->u.reassoc_req.current_ap, wk->assoc.prev_bssid,
269 ETH_ALEN);
270 } else {
271 skb_put(skb, 4);
272 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
273 IEEE80211_STYPE_ASSOC_REQ);
274 mgmt->u.assoc_req.capab_info = cpu_to_le16(capab);
275 mgmt->u.assoc_req.listen_interval =
276 cpu_to_le16(local->hw.conf.listen_interval);
277 }
278
279 /* SSID */
280 pos = skb_put(skb, 2 + wk->assoc.ssid_len);
281 *pos++ = WLAN_EID_SSID;
282 *pos++ = wk->assoc.ssid_len;
283 memcpy(pos, wk->assoc.ssid, wk->assoc.ssid_len);
284
285 /* add all rates which were marked to be used above */
286 supp_rates_len = rates_len;
287 if (supp_rates_len > 8)
288 supp_rates_len = 8;
289
290 pos = skb_put(skb, supp_rates_len + 2);
291 *pos++ = WLAN_EID_SUPP_RATES;
292 *pos++ = supp_rates_len;
293
294 count = 0;
295 for (i = 0; i < sband->n_bitrates; i++) {
296 if (BIT(i) & rates) {
297 int rate = sband->bitrates[i].bitrate;
298 *pos++ = (u8) (rate / 5);
299 if (++count == 8)
300 break;
301 }
302 }
303
304 if (rates_len > count) {
305 pos = skb_put(skb, rates_len - count + 2);
306 *pos++ = WLAN_EID_EXT_SUPP_RATES;
307 *pos++ = rates_len - count;
308
309 for (i++; i < sband->n_bitrates; i++) {
310 if (BIT(i) & rates) {
311 int rate = sband->bitrates[i].bitrate;
312 *pos++ = (u8) (rate / 5);
313 }
314 }
315 }
316
317 if (capab & WLAN_CAPABILITY_SPECTRUM_MGMT) {
318 /* 1. power capabilities */
319 pos = skb_put(skb, 4);
320 *pos++ = WLAN_EID_PWR_CAPABILITY;
321 *pos++ = 2;
322 *pos++ = 0; /* min tx power */
323 *pos++ = wk->chan->max_power; /* max tx power */
324
325 /* 2. supported channels */
326 /* TODO: get this in reg domain format */
327 pos = skb_put(skb, 2 * sband->n_channels + 2);
328 *pos++ = WLAN_EID_SUPPORTED_CHANNELS;
329 *pos++ = 2 * sband->n_channels;
330 for (i = 0; i < sband->n_channels; i++) {
331 *pos++ = ieee80211_frequency_to_channel(
332 sband->channels[i].center_freq);
333 *pos++ = 1; /* one channel in the subband*/
334 }
335 }
336
337 /* if present, add any custom IEs that go before HT */
338 if (wk->ie_len && wk->ie) {
339 static const u8 before_ht[] = {
340 WLAN_EID_SSID,
341 WLAN_EID_SUPP_RATES,
342 WLAN_EID_EXT_SUPP_RATES,
343 WLAN_EID_PWR_CAPABILITY,
344 WLAN_EID_SUPPORTED_CHANNELS,
345 WLAN_EID_RSN,
346 WLAN_EID_QOS_CAPA,
347 WLAN_EID_RRM_ENABLED_CAPABILITIES,
348 WLAN_EID_MOBILITY_DOMAIN,
349 WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
350 };
351 noffset = ieee80211_ie_split(wk->ie, wk->ie_len,
352 before_ht, ARRAY_SIZE(before_ht),
353 offset);
354 pos = skb_put(skb, noffset - offset);
355 memcpy(pos, wk->ie + offset, noffset - offset);
356 offset = noffset;
357 }
358
359 if (wk->assoc.use_11n && wk->assoc.wmm_used &&
360 local->hw.queues >= 4)
361 ieee80211_add_ht_ie(skb, wk->assoc.ht_information_ie,
362 sband, wk->chan, wk->assoc.smps);
363
364 /* if present, add any custom non-vendor IEs that go after HT */
365 if (wk->ie_len && wk->ie) {
366 noffset = ieee80211_ie_split_vendor(wk->ie, wk->ie_len,
367 offset);
368 pos = skb_put(skb, noffset - offset);
369 memcpy(pos, wk->ie + offset, noffset - offset);
370 offset = noffset;
371 }
372
373 if (wk->assoc.wmm_used && local->hw.queues >= 4) {
374 if (wk->assoc.uapsd_used) {
375 qos_info = local->uapsd_queues;
376 qos_info |= (local->uapsd_max_sp_len <<
377 IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT);
378 } else {
379 qos_info = 0;
380 }
381
382 pos = skb_put(skb, 9);
383 *pos++ = WLAN_EID_VENDOR_SPECIFIC;
384 *pos++ = 7; /* len */
385 *pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
386 *pos++ = 0x50;
387 *pos++ = 0xf2;
388 *pos++ = 2; /* WME */
389 *pos++ = 0; /* WME info */
390 *pos++ = 1; /* WME ver */
391 *pos++ = qos_info;
392 }
393
394 /* add any remaining custom (i.e. vendor specific here) IEs */
395 if (wk->ie_len && wk->ie) {
396 noffset = wk->ie_len;
397 pos = skb_put(skb, noffset - offset);
398 memcpy(pos, wk->ie + offset, noffset - offset);
399 }
400
401 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
402 ieee80211_tx_skb(sdata, skb);
403}
404
405static void ieee80211_remove_auth_bss(struct ieee80211_local *local,
406 struct ieee80211_work *wk)
407{
408 struct cfg80211_bss *cbss;
409 u16 capa_val = WLAN_CAPABILITY_ESS;
410
411 if (wk->probe_auth.privacy)
412 capa_val |= WLAN_CAPABILITY_PRIVACY;
413
414 cbss = cfg80211_get_bss(local->hw.wiphy, wk->chan, wk->filter_ta,
415 wk->probe_auth.ssid, wk->probe_auth.ssid_len,
416 WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_PRIVACY,
417 capa_val);
418 if (!cbss)
419 return;
420
421 cfg80211_unlink_bss(local->hw.wiphy, cbss);
422 cfg80211_put_bss(cbss);
423}
424
425static enum work_action __must_check
426ieee80211_direct_probe(struct ieee80211_work *wk)
427{
428 struct ieee80211_sub_if_data *sdata = wk->sdata;
429 struct ieee80211_local *local = sdata->local;
430
431 if (!wk->probe_auth.synced) {
432 int ret = drv_tx_sync(local, sdata, wk->filter_ta,
433 IEEE80211_TX_SYNC_AUTH);
434 if (ret)
435 return WORK_ACT_TIMEOUT;
436 }
437 wk->probe_auth.synced = true;
438
439 wk->probe_auth.tries++;
440 if (wk->probe_auth.tries > IEEE80211_AUTH_MAX_TRIES) {
441 printk(KERN_DEBUG "%s: direct probe to %pM timed out\n",
442 sdata->name, wk->filter_ta);
443
444 /*
445 * Most likely AP is not in the range so remove the
446 * bss struct for that AP.
447 */
448 ieee80211_remove_auth_bss(local, wk);
449
450 return WORK_ACT_TIMEOUT;
451 }
452
453 printk(KERN_DEBUG "%s: direct probe to %pM (try %d/%i)\n",
454 sdata->name, wk->filter_ta, wk->probe_auth.tries,
455 IEEE80211_AUTH_MAX_TRIES);
456
457 /*
458 * Direct probe is sent to broadcast address as some APs
459 * will not answer to direct packet in unassociated state.
460 */
461 ieee80211_send_probe_req(sdata, NULL, wk->probe_auth.ssid,
462 wk->probe_auth.ssid_len, NULL, 0,
463 (u32) -1, true);
464
465 wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
466 run_again(local, wk->timeout);
467
468 return WORK_ACT_NONE;
469}
470
471
472static enum work_action __must_check
473ieee80211_authenticate(struct ieee80211_work *wk)
474{
475 struct ieee80211_sub_if_data *sdata = wk->sdata;
476 struct ieee80211_local *local = sdata->local;
477
478 if (!wk->probe_auth.synced) {
479 int ret = drv_tx_sync(local, sdata, wk->filter_ta,
480 IEEE80211_TX_SYNC_AUTH);
481 if (ret)
482 return WORK_ACT_TIMEOUT;
483 }
484 wk->probe_auth.synced = true;
485
486 wk->probe_auth.tries++;
487 if (wk->probe_auth.tries > IEEE80211_AUTH_MAX_TRIES) {
488 printk(KERN_DEBUG "%s: authentication with %pM"
489 " timed out\n", sdata->name, wk->filter_ta);
490
491 /*
492 * Most likely AP is not in the range so remove the
493 * bss struct for that AP.
494 */
495 ieee80211_remove_auth_bss(local, wk);
496
497 return WORK_ACT_TIMEOUT;
498 }
499
500 printk(KERN_DEBUG "%s: authenticate with %pM (try %d)\n",
501 sdata->name, wk->filter_ta, wk->probe_auth.tries);
502
503 ieee80211_send_auth(sdata, 1, wk->probe_auth.algorithm, wk->ie,
504 wk->ie_len, wk->filter_ta, NULL, 0, 0);
505 wk->probe_auth.transaction = 2;
506
507 wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
508 run_again(local, wk->timeout);
509
510 return WORK_ACT_NONE;
511}
512
513static enum work_action __must_check
514ieee80211_associate(struct ieee80211_work *wk)
515{
516 struct ieee80211_sub_if_data *sdata = wk->sdata;
517 struct ieee80211_local *local = sdata->local;
518
519 if (!wk->assoc.synced) {
520 int ret = drv_tx_sync(local, sdata, wk->filter_ta,
521 IEEE80211_TX_SYNC_ASSOC);
522 if (ret)
523 return WORK_ACT_TIMEOUT;
524 }
525 wk->assoc.synced = true;
526
527 wk->assoc.tries++;
528 if (wk->assoc.tries > IEEE80211_ASSOC_MAX_TRIES) {
529 printk(KERN_DEBUG "%s: association with %pM"
530 " timed out\n",
531 sdata->name, wk->filter_ta);
532
533 /*
534 * Most likely AP is not in the range so remove the
535 * bss struct for that AP.
536 */
537 if (wk->assoc.bss)
538 cfg80211_unlink_bss(local->hw.wiphy, wk->assoc.bss);
539
540 return WORK_ACT_TIMEOUT;
541 }
542
543 printk(KERN_DEBUG "%s: associate with %pM (try %d)\n",
544 sdata->name, wk->filter_ta, wk->assoc.tries);
545 ieee80211_send_assoc(sdata, wk);
546
547 wk->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT;
548 run_again(local, wk->timeout);
549
550 return WORK_ACT_NONE;
551}
552
553static enum work_action __must_check
554ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk)
555{
556 /*
557 * First time we run, do nothing -- the generic code will
558 * have switched to the right channel etc.
559 */
560 if (!wk->started) {
561 wk->timeout = jiffies + msecs_to_jiffies(wk->remain.duration);
562
563 cfg80211_ready_on_channel(wk->sdata->dev, (unsigned long) wk,
564 wk->chan, wk->chan_type,
565 wk->remain.duration, GFP_KERNEL);
566
567 return WORK_ACT_NONE;
568 }
569
570 return WORK_ACT_TIMEOUT;
571}
572
573static enum work_action __must_check
574ieee80211_offchannel_tx(struct ieee80211_work *wk)
575{
576 if (!wk->started) {
577 wk->timeout = jiffies + msecs_to_jiffies(wk->offchan_tx.wait);
578
579 /*
580 * After this, offchan_tx.frame remains but now is no
581 * longer a valid pointer -- we still need it as the
582 * cookie for canceling this work/status matching.
583 */
584 ieee80211_tx_skb(wk->sdata, wk->offchan_tx.frame);
585
586 return WORK_ACT_NONE;
587 }
588
589 return WORK_ACT_TIMEOUT;
590}
591
592static enum work_action __must_check
593ieee80211_assoc_beacon_wait(struct ieee80211_work *wk)
594{
595 if (wk->started)
596 return WORK_ACT_TIMEOUT;
597
598 /*
599 * Wait up to one beacon interval ...
600 * should this be more if we miss one?
601 */
602 printk(KERN_DEBUG "%s: waiting for beacon from %pM\n",
603 wk->sdata->name, wk->filter_ta);
604 wk->timeout = TU_TO_EXP_TIME(wk->assoc.bss->beacon_interval);
605 return WORK_ACT_NONE;
606}
607
608static void ieee80211_auth_challenge(struct ieee80211_work *wk,
609 struct ieee80211_mgmt *mgmt,
610 size_t len)
611{
612 struct ieee80211_sub_if_data *sdata = wk->sdata;
613 u8 *pos;
614 struct ieee802_11_elems elems;
615
616 pos = mgmt->u.auth.variable;
617 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
618 if (!elems.challenge)
619 return;
620 ieee80211_send_auth(sdata, 3, wk->probe_auth.algorithm,
621 elems.challenge - 2, elems.challenge_len + 2,
622 wk->filter_ta, wk->probe_auth.key,
623 wk->probe_auth.key_len, wk->probe_auth.key_idx);
624 wk->probe_auth.transaction = 4;
625}
626
627static enum work_action __must_check
628ieee80211_rx_mgmt_auth(struct ieee80211_work *wk,
629 struct ieee80211_mgmt *mgmt, size_t len)
630{
631 u16 auth_alg, auth_transaction, status_code;
632
633 if (wk->type != IEEE80211_WORK_AUTH)
634 return WORK_ACT_MISMATCH;
635
636 if (len < 24 + 6)
637 return WORK_ACT_NONE;
638
639 auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
640 auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
641 status_code = le16_to_cpu(mgmt->u.auth.status_code);
642
643 if (auth_alg != wk->probe_auth.algorithm ||
644 auth_transaction != wk->probe_auth.transaction)
645 return WORK_ACT_NONE;
646
647 if (status_code != WLAN_STATUS_SUCCESS) {
648 printk(KERN_DEBUG "%s: %pM denied authentication (status %d)\n",
649 wk->sdata->name, mgmt->sa, status_code);
650 return WORK_ACT_DONE;
651 }
652
653 switch (wk->probe_auth.algorithm) {
654 case WLAN_AUTH_OPEN:
655 case WLAN_AUTH_LEAP:
656 case WLAN_AUTH_FT:
657 break;
658 case WLAN_AUTH_SHARED_KEY:
659 if (wk->probe_auth.transaction != 4) {
660 ieee80211_auth_challenge(wk, mgmt, len);
661 /* need another frame */
662 return WORK_ACT_NONE;
663 }
664 break;
665 default:
666 WARN_ON(1);
667 return WORK_ACT_NONE;
668 }
669
670 printk(KERN_DEBUG "%s: authenticated\n", wk->sdata->name);
671 return WORK_ACT_DONE;
672}
673
674static enum work_action __must_check
675ieee80211_rx_mgmt_assoc_resp(struct ieee80211_work *wk,
676 struct ieee80211_mgmt *mgmt, size_t len,
677 bool reassoc)
678{
679 struct ieee80211_sub_if_data *sdata = wk->sdata;
680 struct ieee80211_local *local = sdata->local;
681 u16 capab_info, status_code, aid;
682 struct ieee802_11_elems elems;
683 u8 *pos;
684
685 if (wk->type != IEEE80211_WORK_ASSOC)
686 return WORK_ACT_MISMATCH;
687
688 /*
689 * AssocResp and ReassocResp have identical structure, so process both
690 * of them in this function.
691 */
692
693 if (len < 24 + 6)
694 return WORK_ACT_NONE;
695
696 capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
697 status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
698 aid = le16_to_cpu(mgmt->u.assoc_resp.aid);
699
700 printk(KERN_DEBUG "%s: RX %sssocResp from %pM (capab=0x%x "
701 "status=%d aid=%d)\n",
702 sdata->name, reassoc ? "Rea" : "A", mgmt->sa,
703 capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
704
705 pos = mgmt->u.assoc_resp.variable;
706 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
707
708 if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
709 elems.timeout_int && elems.timeout_int_len == 5 &&
710 elems.timeout_int[0] == WLAN_TIMEOUT_ASSOC_COMEBACK) {
711 u32 tu, ms;
712 tu = get_unaligned_le32(elems.timeout_int + 1);
713 ms = tu * 1024 / 1000;
714 printk(KERN_DEBUG "%s: %pM rejected association temporarily; "
715 "comeback duration %u TU (%u ms)\n",
716 sdata->name, mgmt->sa, tu, ms);
717 wk->timeout = jiffies + msecs_to_jiffies(ms);
718 if (ms > IEEE80211_ASSOC_TIMEOUT)
719 run_again(local, wk->timeout);
720 return WORK_ACT_NONE;
721 }
722
723 if (status_code != WLAN_STATUS_SUCCESS)
724 printk(KERN_DEBUG "%s: %pM denied association (code=%d)\n",
725 sdata->name, mgmt->sa, status_code);
726 else
727 printk(KERN_DEBUG "%s: associated\n", sdata->name);
728
729 return WORK_ACT_DONE;
730}
731
732static enum work_action __must_check
733ieee80211_rx_mgmt_probe_resp(struct ieee80211_work *wk,
734 struct ieee80211_mgmt *mgmt, size_t len,
735 struct ieee80211_rx_status *rx_status)
736{
737 struct ieee80211_sub_if_data *sdata = wk->sdata;
738 struct ieee80211_local *local = sdata->local;
739 size_t baselen;
740
741 ASSERT_WORK_MTX(local);
742
743 if (wk->type != IEEE80211_WORK_DIRECT_PROBE)
744 return WORK_ACT_MISMATCH;
745
746 if (len < 24 + 12)
747 return WORK_ACT_NONE;
748
749 baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
750 if (baselen > len)
751 return WORK_ACT_NONE;
752
753 printk(KERN_DEBUG "%s: direct probe responded\n", sdata->name);
754 return WORK_ACT_DONE;
755}
756
757static enum work_action __must_check
758ieee80211_rx_mgmt_beacon(struct ieee80211_work *wk,
759 struct ieee80211_mgmt *mgmt, size_t len)
760{
761 struct ieee80211_sub_if_data *sdata = wk->sdata;
762 struct ieee80211_local *local = sdata->local;
763
764 ASSERT_WORK_MTX(local);
765
766 if (wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT)
767 return WORK_ACT_MISMATCH;
768
769 if (len < 24 + 12)
770 return WORK_ACT_NONE;
771
772 printk(KERN_DEBUG "%s: beacon received\n", sdata->name);
773 return WORK_ACT_DONE;
774}
775
776static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
777 struct sk_buff *skb)
778{
779 struct ieee80211_rx_status *rx_status;
780 struct ieee80211_mgmt *mgmt;
781 struct ieee80211_work *wk;
782 enum work_action rma = WORK_ACT_NONE;
783 u16 fc;
784
785 rx_status = (struct ieee80211_rx_status *) skb->cb;
786 mgmt = (struct ieee80211_mgmt *) skb->data;
787 fc = le16_to_cpu(mgmt->frame_control);
788
789 mutex_lock(&local->mtx);
790
791 list_for_each_entry(wk, &local->work_list, list) {
792 const u8 *bssid = NULL;
793
794 switch (wk->type) {
795 case IEEE80211_WORK_DIRECT_PROBE:
796 case IEEE80211_WORK_AUTH:
797 case IEEE80211_WORK_ASSOC:
798 case IEEE80211_WORK_ASSOC_BEACON_WAIT:
799 bssid = wk->filter_ta;
800 break;
801 default:
802 continue;
803 }
804
805 /*
806 * Before queuing, we already verified mgmt->sa,
807 * so this is needed just for matching.
808 */
809 if (compare_ether_addr(bssid, mgmt->bssid))
810 continue;
811
812 switch (fc & IEEE80211_FCTL_STYPE) {
813 case IEEE80211_STYPE_BEACON:
814 rma = ieee80211_rx_mgmt_beacon(wk, mgmt, skb->len);
815 break;
816 case IEEE80211_STYPE_PROBE_RESP:
817 rma = ieee80211_rx_mgmt_probe_resp(wk, mgmt, skb->len,
818 rx_status);
819 break;
820 case IEEE80211_STYPE_AUTH:
821 rma = ieee80211_rx_mgmt_auth(wk, mgmt, skb->len);
822 break;
823 case IEEE80211_STYPE_ASSOC_RESP:
824 rma = ieee80211_rx_mgmt_assoc_resp(wk, mgmt,
825 skb->len, false);
826 break;
827 case IEEE80211_STYPE_REASSOC_RESP:
828 rma = ieee80211_rx_mgmt_assoc_resp(wk, mgmt,
829 skb->len, true);
830 break;
831 default:
832 WARN_ON(1);
833 rma = WORK_ACT_NONE;
834 }
835
836 /*
837 * We've either received an unexpected frame, or we have
838 * multiple work items and need to match the frame to the
839 * right one.
840 */
841 if (rma == WORK_ACT_MISMATCH)
842 continue;
843
844 /*
845 * We've processed this frame for that work, so it can't
846 * belong to another work struct.
847 * NB: this is also required for correctness for 'rma'!
848 */
849 break;
850 }
851
852 switch (rma) {
853 case WORK_ACT_MISMATCH:
854 /* ignore this unmatched frame */
855 break;
856 case WORK_ACT_NONE:
857 break;
858 case WORK_ACT_DONE:
859 list_del_rcu(&wk->list);
860 break;
861 default:
862 WARN(1, "unexpected: %d", rma);
863 }
864
865 mutex_unlock(&local->mtx);
866
867 if (rma != WORK_ACT_DONE)
868 goto out;
869
870 switch (wk->done(wk, skb)) {
871 case WORK_DONE_DESTROY:
872 free_work(wk);
873 break;
874 case WORK_DONE_REQUEUE:
875 synchronize_rcu();
876 wk->started = false; /* restart */
877 mutex_lock(&local->mtx);
878 list_add_tail(&wk->list, &local->work_list);
879 mutex_unlock(&local->mtx);
880 }
881
882 out:
883 kfree_skb(skb);
884}
885
886static bool ieee80211_work_ct_coexists(enum nl80211_channel_type wk_ct,
887 enum nl80211_channel_type oper_ct)
888{
889 switch (wk_ct) {
890 case NL80211_CHAN_NO_HT:
891 return true;
892 case NL80211_CHAN_HT20:
893 if (oper_ct != NL80211_CHAN_NO_HT)
894 return true;
895 return false;
896 case NL80211_CHAN_HT40MINUS:
897 case NL80211_CHAN_HT40PLUS:
898 return (wk_ct == oper_ct);
899 }
900 WARN_ON(1); /* shouldn't get here */
901 return false;
902}
903
904static enum nl80211_channel_type
905ieee80211_calc_ct(enum nl80211_channel_type wk_ct,
906 enum nl80211_channel_type oper_ct)
907{
908 switch (wk_ct) {
909 case NL80211_CHAN_NO_HT:
910 return oper_ct;
911 case NL80211_CHAN_HT20:
912 if (oper_ct != NL80211_CHAN_NO_HT)
913 return oper_ct;
914 return wk_ct;
915 case NL80211_CHAN_HT40MINUS:
916 case NL80211_CHAN_HT40PLUS:
917 return wk_ct;
918 }
919 WARN_ON(1); /* shouldn't get here */
920 return wk_ct;
921}
922
923
924static void ieee80211_work_timer(unsigned long data)
925{
926 struct ieee80211_local *local = (void *) data;
927
928 if (local->quiescing)
929 return;
930
931 ieee80211_queue_work(&local->hw, &local->work_work);
932}
933
934static void ieee80211_work_work(struct work_struct *work)
935{
936 struct ieee80211_local *local =
937 container_of(work, struct ieee80211_local, work_work);
938 struct sk_buff *skb;
939 struct ieee80211_work *wk, *tmp;
940 LIST_HEAD(free_work);
941 enum work_action rma;
942 bool remain_off_channel = false;
943
944 if (local->scanning)
945 return;
946
947 /*
948 * ieee80211_queue_work() should have picked up most cases,
949 * here we'll pick the rest.
950 */
951 if (WARN(local->suspended, "work scheduled while going to suspend\n"))
952 return;
953
954 /* first process frames to avoid timing out while a frame is pending */
955 while ((skb = skb_dequeue(&local->work_skb_queue)))
956 ieee80211_work_rx_queued_mgmt(local, skb);
957
958 mutex_lock(&local->mtx);
959
960 ieee80211_recalc_idle(local);
961
962 list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
963 bool started = wk->started;
964
965 /* mark work as started if it's on the current off-channel */
966 if (!started && local->tmp_channel &&
967 wk->chan == local->tmp_channel &&
968 wk->chan_type == local->tmp_channel_type) {
969 started = true;
970 wk->timeout = jiffies;
971 }
972
973 if (!started && !local->tmp_channel) {
974 bool on_oper_chan;
975 bool tmp_chan_changed = false;
976 bool on_oper_chan2;
977 enum nl80211_channel_type wk_ct;
978 on_oper_chan = ieee80211_cfg_on_oper_channel(local);
979
980 /* Work with existing channel type if possible. */
981 wk_ct = wk->chan_type;
982 if (wk->chan == local->hw.conf.channel)
983 wk_ct = ieee80211_calc_ct(wk->chan_type,
984 local->hw.conf.channel_type);
985
986 if (local->tmp_channel)
987 if ((local->tmp_channel != wk->chan) ||
988 (local->tmp_channel_type != wk_ct))
989 tmp_chan_changed = true;
990
991 local->tmp_channel = wk->chan;
992 local->tmp_channel_type = wk_ct;
993 /*
994 * Leave the station vifs in awake mode if they
995 * happen to be on the same channel as
996 * the requested channel.
997 */
998 on_oper_chan2 = ieee80211_cfg_on_oper_channel(local);
999 if (on_oper_chan != on_oper_chan2) {
1000 if (on_oper_chan2) {
1001 /* going off oper channel, PS too */
1002 ieee80211_offchannel_stop_vifs(local,
1003 true);
1004 ieee80211_hw_config(local, 0);
1005 } else {
1006 /* going on channel, but leave PS
1007 * off-channel. */
1008 ieee80211_hw_config(local, 0);
1009 ieee80211_offchannel_return(local,
1010 true,
1011 false);
1012 }
1013 } else if (tmp_chan_changed)
1014 /* Still off-channel, but on some other
1015 * channel, so update hardware.
1016 * PS should already be off-channel.
1017 */
1018 ieee80211_hw_config(local, 0);
1019
1020 started = true;
1021 wk->timeout = jiffies;
1022 }
1023
1024 /* don't try to work with items that aren't started */
1025 if (!started)
1026 continue;
1027
1028 if (time_is_after_jiffies(wk->timeout)) {
1029 /*
1030 * This work item isn't supposed to be worked on
1031 * right now, but take care to adjust the timer
1032 * properly.
1033 */
1034 run_again(local, wk->timeout);
1035 continue;
1036 }
1037
1038 switch (wk->type) {
1039 default:
1040 WARN_ON(1);
1041 /* nothing */
1042 rma = WORK_ACT_NONE;
1043 break;
1044 case IEEE80211_WORK_ABORT:
1045 rma = WORK_ACT_TIMEOUT;
1046 break;
1047 case IEEE80211_WORK_DIRECT_PROBE:
1048 rma = ieee80211_direct_probe(wk);
1049 break;
1050 case IEEE80211_WORK_AUTH:
1051 rma = ieee80211_authenticate(wk);
1052 break;
1053 case IEEE80211_WORK_ASSOC:
1054 rma = ieee80211_associate(wk);
1055 break;
1056 case IEEE80211_WORK_REMAIN_ON_CHANNEL:
1057 rma = ieee80211_remain_on_channel_timeout(wk);
1058 break;
1059 case IEEE80211_WORK_OFFCHANNEL_TX:
1060 rma = ieee80211_offchannel_tx(wk);
1061 break;
1062 case IEEE80211_WORK_ASSOC_BEACON_WAIT:
1063 rma = ieee80211_assoc_beacon_wait(wk);
1064 break;
1065 }
1066
1067 wk->started = started;
1068
1069 switch (rma) {
1070 case WORK_ACT_NONE:
1071 /* might have changed the timeout */
1072 run_again(local, wk->timeout);
1073 break;
1074 case WORK_ACT_TIMEOUT:
1075 list_del_rcu(&wk->list);
1076 synchronize_rcu();
1077 list_add(&wk->list, &free_work);
1078 break;
1079 default:
1080 WARN(1, "unexpected: %d", rma);
1081 }
1082 }
1083
1084 list_for_each_entry(wk, &local->work_list, list) {
1085 if (!wk->started)
1086 continue;
1087 if (wk->chan != local->tmp_channel)
1088 continue;
1089 if (!ieee80211_work_ct_coexists(wk->chan_type,
1090 local->tmp_channel_type))
1091 continue;
1092 remain_off_channel = true;
1093 }
1094
1095 if (!remain_off_channel && local->tmp_channel) {
1096 local->tmp_channel = NULL;
1097 /* If tmp_channel wasn't operating channel, then
1098 * we need to go back on-channel.
1099 * NOTE: If we can ever be here while scannning,
1100 * or if the hw_config() channel config logic changes,
1101 * then we may need to do a more thorough check to see if
1102 * we still need to do a hardware config. Currently,
1103 * we cannot be here while scanning, however.
1104 */
1105 if (!ieee80211_cfg_on_oper_channel(local))
1106 ieee80211_hw_config(local, 0);
1107
1108 /* At the least, we need to disable offchannel_ps,
1109 * so just go ahead and run the entire offchannel
1110 * return logic here. We *could* skip enabling
1111 * beaconing if we were already on-oper-channel
1112 * as a future optimization.
1113 */
1114 ieee80211_offchannel_return(local, true, true);
1115
1116 /* give connection some time to breathe */
1117 run_again(local, jiffies + HZ/2);
1118 }
1119
1120 if (list_empty(&local->work_list) && local->scan_req &&
1121 !local->scanning)
1122 ieee80211_queue_delayed_work(&local->hw,
1123 &local->scan_work,
1124 round_jiffies_relative(0));
1125
1126 ieee80211_recalc_idle(local);
1127
1128 mutex_unlock(&local->mtx);
1129
1130 list_for_each_entry_safe(wk, tmp, &free_work, list) {
1131 wk->done(wk, NULL);
1132 list_del(&wk->list);
1133 kfree(wk);
1134 }
1135}
1136
1137void ieee80211_add_work(struct ieee80211_work *wk)
1138{
1139 struct ieee80211_local *local;
1140
1141 if (WARN_ON(!wk->chan))
1142 return;
1143
1144 if (WARN_ON(!wk->sdata))
1145 return;
1146
1147 if (WARN_ON(!wk->done))
1148 return;
1149
1150 if (WARN_ON(!ieee80211_sdata_running(wk->sdata)))
1151 return;
1152
1153 wk->started = false;
1154
1155 local = wk->sdata->local;
1156 mutex_lock(&local->mtx);
1157 list_add_tail(&wk->list, &local->work_list);
1158 mutex_unlock(&local->mtx);
1159
1160 ieee80211_queue_work(&local->hw, &local->work_work);
1161}
1162
1163void ieee80211_work_init(struct ieee80211_local *local)
1164{
1165 INIT_LIST_HEAD(&local->work_list);
1166 setup_timer(&local->work_timer, ieee80211_work_timer,
1167 (unsigned long)local);
1168 INIT_WORK(&local->work_work, ieee80211_work_work);
1169 skb_queue_head_init(&local->work_skb_queue);
1170}
1171
1172void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
1173{
1174 struct ieee80211_local *local = sdata->local;
1175 struct ieee80211_work *wk;
1176 bool cleanup = false;
1177
1178 mutex_lock(&local->mtx);
1179 list_for_each_entry(wk, &local->work_list, list) {
1180 if (wk->sdata != sdata)
1181 continue;
1182 cleanup = true;
1183 wk->type = IEEE80211_WORK_ABORT;
1184 wk->started = true;
1185 wk->timeout = jiffies;
1186 }
1187 mutex_unlock(&local->mtx);
1188
1189 /* run cleanups etc. */
1190 if (cleanup)
1191 ieee80211_work_work(&local->work_work);
1192
1193 mutex_lock(&local->mtx);
1194 list_for_each_entry(wk, &local->work_list, list) {
1195 if (wk->sdata != sdata)
1196 continue;
1197 WARN_ON(1);
1198 break;
1199 }
1200 mutex_unlock(&local->mtx);
1201}
1202
1203ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
1204 struct sk_buff *skb)
1205{
1206 struct ieee80211_local *local = sdata->local;
1207 struct ieee80211_mgmt *mgmt;
1208 struct ieee80211_work *wk;
1209 u16 fc;
1210
1211 if (skb->len < 24)
1212 return RX_DROP_MONITOR;
1213
1214 mgmt = (struct ieee80211_mgmt *) skb->data;
1215 fc = le16_to_cpu(mgmt->frame_control);
1216
1217 list_for_each_entry_rcu(wk, &local->work_list, list) {
1218 if (sdata != wk->sdata)
1219 continue;
1220 if (compare_ether_addr(wk->filter_ta, mgmt->sa))
1221 continue;
1222 if (compare_ether_addr(wk->filter_ta, mgmt->bssid))
1223 continue;
1224
1225 switch (fc & IEEE80211_FCTL_STYPE) {
1226 case IEEE80211_STYPE_AUTH:
1227 case IEEE80211_STYPE_PROBE_RESP:
1228 case IEEE80211_STYPE_ASSOC_RESP:
1229 case IEEE80211_STYPE_REASSOC_RESP:
1230 case IEEE80211_STYPE_BEACON:
1231 skb_queue_tail(&local->work_skb_queue, skb);
1232 ieee80211_queue_work(&local->hw, &local->work_work);
1233 return RX_QUEUED;
1234 }
1235 }
1236
1237 return RX_CONTINUE;
1238}
1239
1240static enum work_done_result ieee80211_remain_done(struct ieee80211_work *wk,
1241 struct sk_buff *skb)
1242{
1243 /*
1244 * We are done serving the remain-on-channel command.
1245 */
1246 cfg80211_remain_on_channel_expired(wk->sdata->dev, (unsigned long) wk,
1247 wk->chan, wk->chan_type,
1248 GFP_KERNEL);
1249
1250 return WORK_DONE_DESTROY;
1251}
1252
1253int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata,
1254 struct ieee80211_channel *chan,
1255 enum nl80211_channel_type channel_type,
1256 unsigned int duration, u64 *cookie)
1257{
1258 struct ieee80211_work *wk;
1259
1260 wk = kzalloc(sizeof(*wk), GFP_KERNEL);
1261 if (!wk)
1262 return -ENOMEM;
1263
1264 wk->type = IEEE80211_WORK_REMAIN_ON_CHANNEL;
1265 wk->chan = chan;
1266 wk->chan_type = channel_type;
1267 wk->sdata = sdata;
1268 wk->done = ieee80211_remain_done;
1269
1270 wk->remain.duration = duration;
1271
1272 *cookie = (unsigned long) wk;
1273
1274 ieee80211_add_work(wk);
1275
1276 return 0;
1277}
1278
1279int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
1280 u64 cookie)
1281{
1282 struct ieee80211_local *local = sdata->local;
1283 struct ieee80211_work *wk, *tmp;
1284 bool found = false;
1285
1286 mutex_lock(&local->mtx);
1287 list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
1288 if ((unsigned long) wk == cookie) {
1289 wk->timeout = jiffies;
1290 found = true;
1291 break;
1292 }
1293 }
1294 mutex_unlock(&local->mtx);
1295
1296 if (!found)
1297 return -ENOENT;
1298
1299 ieee80211_queue_work(&local->hw, &local->work_work);
1300
1301 return 0;
1302}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
new file mode 100644
index 00000000000..a80b0cb03f1
--- /dev/null
+++ b/net/netfilter/nfnetlink_queue.c
@@ -0,0 +1,1028 @@
1/*
2 * This is a module which is used for queueing packets and communicating with
3 * userspace via nfnetlink.
4 *
5 * (C) 2005 by Harald Welte <laforge@netfilter.org>
6 * (C) 2007 by Patrick McHardy <kaber@trash.net>
7 *
8 * Based on the old ipv4-only ip_queue.c:
9 * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
10 * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
15 *
16 */
17#include <linux/module.h>
18#include <linux/skbuff.h>
19#include <linux/init.h>
20#include <linux/spinlock.h>
21#include <linux/slab.h>
22#include <linux/notifier.h>
23#include <linux/netdevice.h>
24#include <linux/netfilter.h>
25#include <linux/proc_fs.h>
26#include <linux/netfilter_ipv4.h>
27#include <linux/netfilter_ipv6.h>
28#include <linux/netfilter/nfnetlink.h>
29#include <linux/netfilter/nfnetlink_queue.h>
30#include <linux/list.h>
31#include <net/sock.h>
32#include <net/netfilter/nf_queue.h>
33
34#include <linux/atomic.h>
35
36#ifdef CONFIG_BRIDGE_NETFILTER
37#include "../bridge/br_private.h"
38#endif
39
40#define NFQNL_QMAX_DEFAULT 1024
41
42struct nfqnl_instance {
43 struct hlist_node hlist; /* global list of queues */
44 struct rcu_head rcu;
45
46 int peer_pid;
47 unsigned int queue_maxlen;
48 unsigned int copy_range;
49 unsigned int queue_dropped;
50 unsigned int queue_user_dropped;
51
52
53 u_int16_t queue_num; /* number of this queue */
54 u_int8_t copy_mode;
55/*
56 * Following fields are dirtied for each queued packet,
57 * keep them in same cache line if possible.
58 */
59 spinlock_t lock;
60 unsigned int queue_total;
61 unsigned int id_sequence; /* 'sequence' of pkt ids */
62 struct list_head queue_list; /* packets in queue */
63};
64
65typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
66
67static DEFINE_SPINLOCK(instances_lock);
68
69#define INSTANCE_BUCKETS 16
70static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
71
72static inline u_int8_t instance_hashfn(u_int16_t queue_num)
73{
74 return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS;
75}
76
77static struct nfqnl_instance *
78instance_lookup(u_int16_t queue_num)
79{
80 struct hlist_head *head;
81 struct hlist_node *pos;
82 struct nfqnl_instance *inst;
83
84 head = &instance_table[instance_hashfn(queue_num)];
85 hlist_for_each_entry_rcu(inst, pos, head, hlist) {
86 if (inst->queue_num == queue_num)
87 return inst;
88 }
89 return NULL;
90}
91
92static struct nfqnl_instance *
93instance_create(u_int16_t queue_num, int pid)
94{
95 struct nfqnl_instance *inst;
96 unsigned int h;
97 int err;
98
99 spin_lock(&instances_lock);
100 if (instance_lookup(queue_num)) {
101 err = -EEXIST;
102 goto out_unlock;
103 }
104
105 inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
106 if (!inst) {
107 err = -ENOMEM;
108 goto out_unlock;
109 }
110
111 inst->queue_num = queue_num;
112 inst->peer_pid = pid;
113 inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
114 inst->copy_range = 0xfffff;
115 inst->copy_mode = NFQNL_COPY_NONE;
116 spin_lock_init(&inst->lock);
117 INIT_LIST_HEAD(&inst->queue_list);
118
119 if (!try_module_get(THIS_MODULE)) {
120 err = -EAGAIN;
121 goto out_free;
122 }
123
124 h = instance_hashfn(queue_num);
125 hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
126
127 spin_unlock(&instances_lock);
128
129 return inst;
130
131out_free:
132 kfree(inst);
133out_unlock:
134 spin_unlock(&instances_lock);
135 return ERR_PTR(err);
136}
137
138static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
139 unsigned long data);
140
141static void
142instance_destroy_rcu(struct rcu_head *head)
143{
144 struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
145 rcu);
146
147 nfqnl_flush(inst, NULL, 0);
148 kfree(inst);
149 module_put(THIS_MODULE);
150}
151
152static void
153__instance_destroy(struct nfqnl_instance *inst)
154{
155 hlist_del_rcu(&inst->hlist);
156 call_rcu(&inst->rcu, instance_destroy_rcu);
157}
158
159static void
160instance_destroy(struct nfqnl_instance *inst)
161{
162 spin_lock(&instances_lock);
163 __instance_destroy(inst);
164 spin_unlock(&instances_lock);
165}
166
167static inline void
168__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
169{
170 list_add_tail(&entry->list, &queue->queue_list);
171 queue->queue_total++;
172}
173
174static void
175__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
176{
177 list_del(&entry->list);
178 queue->queue_total--;
179}
180
181static struct nf_queue_entry *
182find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
183{
184 struct nf_queue_entry *entry = NULL, *i;
185
186 spin_lock_bh(&queue->lock);
187
188 list_for_each_entry(i, &queue->queue_list, list) {
189 if (i->id == id) {
190 entry = i;
191 break;
192 }
193 }
194
195 if (entry)
196 __dequeue_entry(queue, entry);
197
198 spin_unlock_bh(&queue->lock);
199
200 return entry;
201}
202
203static void
204nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
205{
206 struct nf_queue_entry *entry, *next;
207
208 spin_lock_bh(&queue->lock);
209 list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
210 if (!cmpfn || cmpfn(entry, data)) {
211 list_del(&entry->list);
212 queue->queue_total--;
213 nf_reinject(entry, NF_DROP);
214 }
215 }
216 spin_unlock_bh(&queue->lock);
217}
218
219static struct sk_buff *
220nfqnl_build_packet_message(struct nfqnl_instance *queue,
221 struct nf_queue_entry *entry,
222 __be32 **packet_id_ptr)
223{
224 sk_buff_data_t old_tail;
225 size_t size;
226 size_t data_len = 0;
227 struct sk_buff *skb;
228 struct nlattr *nla;
229 struct nfqnl_msg_packet_hdr *pmsg;
230 struct nlmsghdr *nlh;
231 struct nfgenmsg *nfmsg;
232 struct sk_buff *entskb = entry->skb;
233 struct net_device *indev;
234 struct net_device *outdev;
235
236 size = NLMSG_SPACE(sizeof(struct nfgenmsg))
237 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
238 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
239 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
240#ifdef CONFIG_BRIDGE_NETFILTER
241 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
242 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
243#endif
244 + nla_total_size(sizeof(u_int32_t)) /* mark */
245 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
246 + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
247
248 outdev = entry->outdev;
249
250 switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
251 case NFQNL_COPY_META:
252 case NFQNL_COPY_NONE:
253 break;
254
255 case NFQNL_COPY_PACKET:
256 if (entskb->ip_summed == CHECKSUM_PARTIAL &&
257 skb_checksum_help(entskb))
258 return NULL;
259
260 data_len = ACCESS_ONCE(queue->copy_range);
261 if (data_len == 0 || data_len > entskb->len)
262 data_len = entskb->len;
263
264 size += nla_total_size(data_len);
265 break;
266 }
267
268
269 skb = alloc_skb(size, GFP_ATOMIC);
270 if (!skb)
271 goto nlmsg_failure;
272
273 old_tail = skb->tail;
274 nlh = NLMSG_PUT(skb, 0, 0,
275 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
276 sizeof(struct nfgenmsg));
277 nfmsg = NLMSG_DATA(nlh);
278 nfmsg->nfgen_family = entry->pf;
279 nfmsg->version = NFNETLINK_V0;
280 nfmsg->res_id = htons(queue->queue_num);
281
282 nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
283 pmsg = nla_data(nla);
284 pmsg->hw_protocol = entskb->protocol;
285 pmsg->hook = entry->hook;
286 *packet_id_ptr = &pmsg->packet_id;
287
288 indev = entry->indev;
289 if (indev) {
290#ifndef CONFIG_BRIDGE_NETFILTER
291 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex));
292#else
293 if (entry->pf == PF_BRIDGE) {
294 /* Case 1: indev is physical input device, we need to
295 * look for bridge group (when called from
296 * netfilter_bridge) */
297 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
298 htonl(indev->ifindex));
299 /* this is the bridge group "brX" */
300 /* rcu_read_lock()ed by __nf_queue */
301 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
302 htonl(br_port_get_rcu(indev)->br->dev->ifindex));
303 } else {
304 /* Case 2: indev is bridge group, we need to look for
305 * physical device (when called from ipv4) */
306 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
307 htonl(indev->ifindex));
308 if (entskb->nf_bridge && entskb->nf_bridge->physindev)
309 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
310 htonl(entskb->nf_bridge->physindev->ifindex));
311 }
312#endif
313 }
314
315 if (outdev) {
316#ifndef CONFIG_BRIDGE_NETFILTER
317 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex));
318#else
319 if (entry->pf == PF_BRIDGE) {
320 /* Case 1: outdev is physical output device, we need to
321 * look for bridge group (when called from
322 * netfilter_bridge) */
323 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
324 htonl(outdev->ifindex));
325 /* this is the bridge group "brX" */
326 /* rcu_read_lock()ed by __nf_queue */
327 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
328 htonl(br_port_get_rcu(outdev)->br->dev->ifindex));
329 } else {
330 /* Case 2: outdev is bridge group, we need to look for
331 * physical output device (when called from ipv4) */
332 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
333 htonl(outdev->ifindex));
334 if (entskb->nf_bridge && entskb->nf_bridge->physoutdev)
335 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
336 htonl(entskb->nf_bridge->physoutdev->ifindex));
337 }
338#endif
339 }
340
341 if (entskb->mark)
342 NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark));
343
344 if (indev && entskb->dev &&
345 entskb->mac_header != entskb->network_header) {
346 struct nfqnl_msg_packet_hw phw;
347 int len = dev_parse_header(entskb, phw.hw_addr);
348 if (len) {
349 phw.hw_addrlen = htons(len);
350 NLA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
351 }
352 }
353
354 if (entskb->tstamp.tv64) {
355 struct nfqnl_msg_packet_timestamp ts;
356 struct timeval tv = ktime_to_timeval(entskb->tstamp);
357 ts.sec = cpu_to_be64(tv.tv_sec);
358 ts.usec = cpu_to_be64(tv.tv_usec);
359
360 NLA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
361 }
362
363 if (data_len) {
364 struct nlattr *nla;
365 int sz = nla_attr_size(data_len);
366
367 if (skb_tailroom(skb) < nla_total_size(data_len)) {
368 printk(KERN_WARNING "nf_queue: no tailroom!\n");
369 goto nlmsg_failure;
370 }
371
372 nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len));
373 nla->nla_type = NFQA_PAYLOAD;
374 nla->nla_len = sz;
375
376 if (skb_copy_bits(entskb, 0, nla_data(nla), data_len))
377 BUG();
378 }
379
380 nlh->nlmsg_len = skb->tail - old_tail;
381 return skb;
382
383nlmsg_failure:
384nla_put_failure:
385 if (skb)
386 kfree_skb(skb);
387 if (net_ratelimit())
388 printk(KERN_ERR "nf_queue: error creating packet message\n");
389 return NULL;
390}
391
392static int
393nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
394{
395 struct sk_buff *nskb;
396 struct nfqnl_instance *queue;
397 int err = -ENOBUFS;
398 __be32 *packet_id_ptr;
399
400 /* rcu_read_lock()ed by nf_hook_slow() */
401 queue = instance_lookup(queuenum);
402 if (!queue) {
403 err = -ESRCH;
404 goto err_out;
405 }
406
407 if (queue->copy_mode == NFQNL_COPY_NONE) {
408 err = -EINVAL;
409 goto err_out;
410 }
411
412 nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);
413 if (nskb == NULL) {
414 err = -ENOMEM;
415 goto err_out;
416 }
417 spin_lock_bh(&queue->lock);
418
419 if (!queue->peer_pid) {
420 err = -EINVAL;
421 goto err_out_free_nskb;
422 }
423 if (queue->queue_total >= queue->queue_maxlen) {
424 queue->queue_dropped++;
425 if (net_ratelimit())
426 printk(KERN_WARNING "nf_queue: full at %d entries, "
427 "dropping packets(s).\n",
428 queue->queue_total);
429 goto err_out_free_nskb;
430 }
431 entry->id = ++queue->id_sequence;
432 *packet_id_ptr = htonl(entry->id);
433
434 /* nfnetlink_unicast will either free the nskb or add it to a socket */
435 err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT);
436 if (err < 0) {
437 queue->queue_user_dropped++;
438 goto err_out_unlock;
439 }
440
441 __enqueue_entry(queue, entry);
442
443 spin_unlock_bh(&queue->lock);
444 return 0;
445
446err_out_free_nskb:
447 kfree_skb(nskb);
448err_out_unlock:
449 spin_unlock_bh(&queue->lock);
450err_out:
451 return err;
452}
453
454static int
455nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e)
456{
457 struct sk_buff *nskb;
458 int diff;
459
460 diff = data_len - e->skb->len;
461 if (diff < 0) {
462 if (pskb_trim(e->skb, data_len))
463 return -ENOMEM;
464 } else if (diff > 0) {
465 if (data_len > 0xFFFF)
466 return -EINVAL;
467 if (diff > skb_tailroom(e->skb)) {
468 nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
469 diff, GFP_ATOMIC);
470 if (!nskb) {
471 printk(KERN_WARNING "nf_queue: OOM "
472 "in mangle, dropping packet\n");
473 return -ENOMEM;
474 }
475 kfree_skb(e->skb);
476 e->skb = nskb;
477 }
478 skb_put(e->skb, diff);
479 }
480 if (!skb_make_writable(e->skb, data_len))
481 return -ENOMEM;
482 skb_copy_to_linear_data(e->skb, data, data_len);
483 e->skb->ip_summed = CHECKSUM_NONE;
484 return 0;
485}
486
487static int
488nfqnl_set_mode(struct nfqnl_instance *queue,
489 unsigned char mode, unsigned int range)
490{
491 int status = 0;
492
493 spin_lock_bh(&queue->lock);
494 switch (mode) {
495 case NFQNL_COPY_NONE:
496 case NFQNL_COPY_META:
497 queue->copy_mode = mode;
498 queue->copy_range = 0;
499 break;
500
501 case NFQNL_COPY_PACKET:
502 queue->copy_mode = mode;
503 /* we're using struct nlattr which has 16bit nla_len */
504 if (range > 0xffff)
505 queue->copy_range = 0xffff;
506 else
507 queue->copy_range = range;
508 break;
509
510 default:
511 status = -EINVAL;
512
513 }
514 spin_unlock_bh(&queue->lock);
515
516 return status;
517}
518
519static int
520dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
521{
522 if (entry->indev)
523 if (entry->indev->ifindex == ifindex)
524 return 1;
525 if (entry->outdev)
526 if (entry->outdev->ifindex == ifindex)
527 return 1;
528#ifdef CONFIG_BRIDGE_NETFILTER
529 if (entry->skb->nf_bridge) {
530 if (entry->skb->nf_bridge->physindev &&
531 entry->skb->nf_bridge->physindev->ifindex == ifindex)
532 return 1;
533 if (entry->skb->nf_bridge->physoutdev &&
534 entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
535 return 1;
536 }
537#endif
538 return 0;
539}
540
541/* drop all packets with either indev or outdev == ifindex from all queue
542 * instances */
543static void
544nfqnl_dev_drop(int ifindex)
545{
546 int i;
547
548 rcu_read_lock();
549
550 for (i = 0; i < INSTANCE_BUCKETS; i++) {
551 struct hlist_node *tmp;
552 struct nfqnl_instance *inst;
553 struct hlist_head *head = &instance_table[i];
554
555 hlist_for_each_entry_rcu(inst, tmp, head, hlist)
556 nfqnl_flush(inst, dev_cmp, ifindex);
557 }
558
559 rcu_read_unlock();
560}
561
562#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
563
564static int
565nfqnl_rcv_dev_event(struct notifier_block *this,
566 unsigned long event, void *ptr)
567{
568 struct net_device *dev = ptr;
569
570 if (!net_eq(dev_net(dev), &init_net))
571 return NOTIFY_DONE;
572
573 /* Drop any packets associated with the downed device */
574 if (event == NETDEV_DOWN)
575 nfqnl_dev_drop(dev->ifindex);
576 return NOTIFY_DONE;
577}
578
579static struct notifier_block nfqnl_dev_notifier = {
580 .notifier_call = nfqnl_rcv_dev_event,
581};
582
583static int
584nfqnl_rcv_nl_event(struct notifier_block *this,
585 unsigned long event, void *ptr)
586{
587 struct netlink_notify *n = ptr;
588
589 if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
590 int i;
591
592 /* destroy all instances for this pid */
593 spin_lock(&instances_lock);
594 for (i = 0; i < INSTANCE_BUCKETS; i++) {
595 struct hlist_node *tmp, *t2;
596 struct nfqnl_instance *inst;
597 struct hlist_head *head = &instance_table[i];
598
599 hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
600 if ((n->net == &init_net) &&
601 (n->pid == inst->peer_pid))
602 __instance_destroy(inst);
603 }
604 }
605 spin_unlock(&instances_lock);
606 }
607 return NOTIFY_DONE;
608}
609
610static struct notifier_block nfqnl_rtnl_notifier = {
611 .notifier_call = nfqnl_rcv_nl_event,
612};
613
614static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
615 [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
616 [NFQA_MARK] = { .type = NLA_U32 },
617 [NFQA_PAYLOAD] = { .type = NLA_UNSPEC },
618};
619
620static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
621 [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
622 [NFQA_MARK] = { .type = NLA_U32 },
623};
624
625static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid)
626{
627 struct nfqnl_instance *queue;
628
629 queue = instance_lookup(queue_num);
630 if (!queue)
631 return ERR_PTR(-ENODEV);
632
633 if (queue->peer_pid != nlpid)
634 return ERR_PTR(-EPERM);
635
636 return queue;
637}
638
639static struct nfqnl_msg_verdict_hdr*
640verdicthdr_get(const struct nlattr * const nfqa[])
641{
642 struct nfqnl_msg_verdict_hdr *vhdr;
643 unsigned int verdict;
644
645 if (!nfqa[NFQA_VERDICT_HDR])
646 return NULL;
647
648 vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
649 verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK;
650 if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN)
651 return NULL;
652 return vhdr;
653}
654
655static int nfq_id_after(unsigned int id, unsigned int max)
656{
657 return (int)(id - max) > 0;
658}
659
660static int
661nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
662 const struct nlmsghdr *nlh,
663 const struct nlattr * const nfqa[])
664{
665 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
666 struct nf_queue_entry *entry, *tmp;
667 unsigned int verdict, maxid;
668 struct nfqnl_msg_verdict_hdr *vhdr;
669 struct nfqnl_instance *queue;
670 LIST_HEAD(batch_list);
671 u16 queue_num = ntohs(nfmsg->res_id);
672
673 queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid);
674 if (IS_ERR(queue))
675 return PTR_ERR(queue);
676
677 vhdr = verdicthdr_get(nfqa);
678 if (!vhdr)
679 return -EINVAL;
680
681 verdict = ntohl(vhdr->verdict);
682 maxid = ntohl(vhdr->id);
683
684 spin_lock_bh(&queue->lock);
685
686 list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) {
687 if (nfq_id_after(entry->id, maxid))
688 break;
689 __dequeue_entry(queue, entry);
690 list_add_tail(&entry->list, &batch_list);
691 }
692
693 spin_unlock_bh(&queue->lock);
694
695 if (list_empty(&batch_list))
696 return -ENOENT;
697
698 list_for_each_entry_safe(entry, tmp, &batch_list, list) {
699 if (nfqa[NFQA_MARK])
700 entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
701 nf_reinject(entry, verdict);
702 }
703 return 0;
704}
705
706static int
707nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
708 const struct nlmsghdr *nlh,
709 const struct nlattr * const nfqa[])
710{
711 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
712 u_int16_t queue_num = ntohs(nfmsg->res_id);
713
714 struct nfqnl_msg_verdict_hdr *vhdr;
715 struct nfqnl_instance *queue;
716 unsigned int verdict;
717 struct nf_queue_entry *entry;
718
719 queue = instance_lookup(queue_num);
720 if (!queue)
721
722 queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid);
723 if (IS_ERR(queue))
724 return PTR_ERR(queue);
725
726 vhdr = verdicthdr_get(nfqa);
727 if (!vhdr)
728 return -EINVAL;
729
730 verdict = ntohl(vhdr->verdict);
731
732 entry = find_dequeue_entry(queue, ntohl(vhdr->id));
733 if (entry == NULL)
734 return -ENOENT;
735
736 if (nfqa[NFQA_PAYLOAD]) {
737 if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
738 nla_len(nfqa[NFQA_PAYLOAD]), entry) < 0)
739 verdict = NF_DROP;
740 }
741
742 if (nfqa[NFQA_MARK])
743 entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
744
745 nf_reinject(entry, verdict);
746 return 0;
747}
748
749static int
750nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
751 const struct nlmsghdr *nlh,
752 const struct nlattr * const nfqa[])
753{
754 return -ENOTSUPP;
755}
756
757static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
758 [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) },
759 [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) },
760};
761
762static const struct nf_queue_handler nfqh = {
763 .name = "nf_queue",
764 .outfn = &nfqnl_enqueue_packet,
765};
766
767static int
768nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
769 const struct nlmsghdr *nlh,
770 const struct nlattr * const nfqa[])
771{
772 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
773 u_int16_t queue_num = ntohs(nfmsg->res_id);
774 struct nfqnl_instance *queue;
775 struct nfqnl_msg_config_cmd *cmd = NULL;
776 int ret = 0;
777
778 if (nfqa[NFQA_CFG_CMD]) {
779 cmd = nla_data(nfqa[NFQA_CFG_CMD]);
780
781 /* Commands without queue context - might sleep */
782 switch (cmd->command) {
783 case NFQNL_CFG_CMD_PF_BIND:
784 return nf_register_queue_handler(ntohs(cmd->pf),
785 &nfqh);
786 case NFQNL_CFG_CMD_PF_UNBIND:
787 return nf_unregister_queue_handler(ntohs(cmd->pf),
788 &nfqh);
789 }
790 }
791
792 rcu_read_lock();
793 queue = instance_lookup(queue_num);
794 if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
795 ret = -EPERM;
796 goto err_out_unlock;
797 }
798
799 if (cmd != NULL) {
800 switch (cmd->command) {
801 case NFQNL_CFG_CMD_BIND:
802 if (queue) {
803 ret = -EBUSY;
804 goto err_out_unlock;
805 }
806 queue = instance_create(queue_num, NETLINK_CB(skb).pid);
807 if (IS_ERR(queue)) {
808 ret = PTR_ERR(queue);
809 goto err_out_unlock;
810 }
811 break;
812 case NFQNL_CFG_CMD_UNBIND:
813 if (!queue) {
814 ret = -ENODEV;
815 goto err_out_unlock;
816 }
817 instance_destroy(queue);
818 break;
819 case NFQNL_CFG_CMD_PF_BIND:
820 case NFQNL_CFG_CMD_PF_UNBIND:
821 break;
822 default:
823 ret = -ENOTSUPP;
824 break;
825 }
826 }
827
828 if (nfqa[NFQA_CFG_PARAMS]) {
829 struct nfqnl_msg_config_params *params;
830
831 if (!queue) {
832 ret = -ENODEV;
833 goto err_out_unlock;
834 }
835 params = nla_data(nfqa[NFQA_CFG_PARAMS]);
836 nfqnl_set_mode(queue, params->copy_mode,
837 ntohl(params->copy_range));
838 }
839
840 if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) {
841 __be32 *queue_maxlen;
842
843 if (!queue) {
844 ret = -ENODEV;
845 goto err_out_unlock;
846 }
847 queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
848 spin_lock_bh(&queue->lock);
849 queue->queue_maxlen = ntohl(*queue_maxlen);
850 spin_unlock_bh(&queue->lock);
851 }
852
853err_out_unlock:
854 rcu_read_unlock();
855 return ret;
856}
857
858static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
859 [NFQNL_MSG_PACKET] = { .call_rcu = nfqnl_recv_unsupp,
860 .attr_count = NFQA_MAX, },
861 [NFQNL_MSG_VERDICT] = { .call_rcu = nfqnl_recv_verdict,
862 .attr_count = NFQA_MAX,
863 .policy = nfqa_verdict_policy },
864 [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config,
865 .attr_count = NFQA_CFG_MAX,
866 .policy = nfqa_cfg_policy },
867 [NFQNL_MSG_VERDICT_BATCH]={ .call_rcu = nfqnl_recv_verdict_batch,
868 .attr_count = NFQA_MAX,
869 .policy = nfqa_verdict_batch_policy },
870};
871
872static const struct nfnetlink_subsystem nfqnl_subsys = {
873 .name = "nf_queue",
874 .subsys_id = NFNL_SUBSYS_QUEUE,
875 .cb_count = NFQNL_MSG_MAX,
876 .cb = nfqnl_cb,
877};
878
879#ifdef CONFIG_PROC_FS
880struct iter_state {
881 unsigned int bucket;
882};
883
884static struct hlist_node *get_first(struct seq_file *seq)
885{
886 struct iter_state *st = seq->private;
887
888 if (!st)
889 return NULL;
890
891 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
892 if (!hlist_empty(&instance_table[st->bucket]))
893 return instance_table[st->bucket].first;
894 }
895 return NULL;
896}
897
898static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
899{
900 struct iter_state *st = seq->private;
901
902 h = h->next;
903 while (!h) {
904 if (++st->bucket >= INSTANCE_BUCKETS)
905 return NULL;
906
907 h = instance_table[st->bucket].first;
908 }
909 return h;
910}
911
912static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
913{
914 struct hlist_node *head;
915 head = get_first(seq);
916
917 if (head)
918 while (pos && (head = get_next(seq, head)))
919 pos--;
920 return pos ? NULL : head;
921}
922
923static void *seq_start(struct seq_file *seq, loff_t *pos)
924 __acquires(instances_lock)
925{
926 spin_lock(&instances_lock);
927 return get_idx(seq, *pos);
928}
929
930static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
931{
932 (*pos)++;
933 return get_next(s, v);
934}
935
936static void seq_stop(struct seq_file *s, void *v)
937 __releases(instances_lock)
938{
939 spin_unlock(&instances_lock);
940}
941
942static int seq_show(struct seq_file *s, void *v)
943{
944 const struct nfqnl_instance *inst = v;
945
946 return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
947 inst->queue_num,
948 inst->peer_pid, inst->queue_total,
949 inst->copy_mode, inst->copy_range,
950 inst->queue_dropped, inst->queue_user_dropped,
951 inst->id_sequence, 1);
952}
953
954static const struct seq_operations nfqnl_seq_ops = {
955 .start = seq_start,
956 .next = seq_next,
957 .stop = seq_stop,
958 .show = seq_show,
959};
960
961static int nfqnl_open(struct inode *inode, struct file *file)
962{
963 return seq_open_private(file, &nfqnl_seq_ops,
964 sizeof(struct iter_state));
965}
966
967static const struct file_operations nfqnl_file_ops = {
968 .owner = THIS_MODULE,
969 .open = nfqnl_open,
970 .read = seq_read,
971 .llseek = seq_lseek,
972 .release = seq_release_private,
973};
974
975#endif /* PROC_FS */
976
977static int __init nfnetlink_queue_init(void)
978{
979 int i, status = -ENOMEM;
980
981 for (i = 0; i < INSTANCE_BUCKETS; i++)
982 INIT_HLIST_HEAD(&instance_table[i]);
983
984 netlink_register_notifier(&nfqnl_rtnl_notifier);
985 status = nfnetlink_subsys_register(&nfqnl_subsys);
986 if (status < 0) {
987 printk(KERN_ERR "nf_queue: failed to create netlink socket\n");
988 goto cleanup_netlink_notifier;
989 }
990
991#ifdef CONFIG_PROC_FS
992 if (!proc_create("nfnetlink_queue", 0440,
993 proc_net_netfilter, &nfqnl_file_ops))
994 goto cleanup_subsys;
995#endif
996
997 register_netdevice_notifier(&nfqnl_dev_notifier);
998 return status;
999
1000#ifdef CONFIG_PROC_FS
1001cleanup_subsys:
1002 nfnetlink_subsys_unregister(&nfqnl_subsys);
1003#endif
1004cleanup_netlink_notifier:
1005 netlink_unregister_notifier(&nfqnl_rtnl_notifier);
1006 return status;
1007}
1008
1009static void __exit nfnetlink_queue_fini(void)
1010{
1011 nf_unregister_queue_handlers(&nfqh);
1012 unregister_netdevice_notifier(&nfqnl_dev_notifier);
1013#ifdef CONFIG_PROC_FS
1014 remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
1015#endif
1016 nfnetlink_subsys_unregister(&nfqnl_subsys);
1017 netlink_unregister_notifier(&nfqnl_rtnl_notifier);
1018
1019 rcu_barrier(); /* Wait for completion of call_rcu()'s */
1020}
1021
1022MODULE_DESCRIPTION("netfilter packet queue handler");
1023MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
1024MODULE_LICENSE("GPL");
1025MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE);
1026
1027module_init(nfnetlink_queue_init);
1028module_exit(nfnetlink_queue_fini);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
new file mode 100644
index 00000000000..9d782181b6c
--- /dev/null
+++ b/net/netfilter/xt_NOTRACK.c
@@ -0,0 +1,53 @@
1/* This is a module which is used for setting up fake conntracks
2 * on packets so that they are not seen by the conntrack/NAT code.
3 */
4#include <linux/module.h>
5#include <linux/skbuff.h>
6
7#include <linux/netfilter/x_tables.h>
8#include <net/netfilter/nf_conntrack.h>
9
10MODULE_DESCRIPTION("Xtables: Disabling connection tracking for packets");
11MODULE_LICENSE("GPL");
12MODULE_ALIAS("ipt_NOTRACK");
13MODULE_ALIAS("ip6t_NOTRACK");
14
15static unsigned int
16notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
17{
18 /* Previously seen (loopback)? Ignore. */
19 if (skb->nfct != NULL)
20 return XT_CONTINUE;
21
22 /* Attach fake conntrack entry.
23 If there is a real ct entry correspondig to this packet,
24 it'll hang aroun till timing out. We don't deal with it
25 for performance reasons. JK */
26 skb->nfct = &nf_ct_untracked_get()->ct_general;
27 skb->nfctinfo = IP_CT_NEW;
28 nf_conntrack_get(skb->nfct);
29
30 return XT_CONTINUE;
31}
32
33static struct xt_target notrack_tg_reg __read_mostly = {
34 .name = "NOTRACK",
35 .revision = 0,
36 .family = NFPROTO_UNSPEC,
37 .target = notrack_tg,
38 .table = "raw",
39 .me = THIS_MODULE,
40};
41
42static int __init notrack_tg_init(void)
43{
44 return xt_register_target(&notrack_tg_reg);
45}
46
47static void __exit notrack_tg_exit(void)
48{
49 xt_unregister_target(&notrack_tg_reg);
50}
51
52module_init(notrack_tg_init);
53module_exit(notrack_tg_exit);
diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c
new file mode 100644
index 00000000000..08086d680c2
--- /dev/null
+++ b/net/netfilter/xt_qtaguid.c
@@ -0,0 +1,2785 @@
1/*
2 * Kernel iptables module to track stats for packets based on user tags.
3 *
4 * (C) 2011 Google, Inc
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/*
12 * There are run-time debug flags enabled via the debug_mask module param, or
13 * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14 */
15#define DEBUG
16
17#include <linux/file.h>
18#include <linux/inetdevice.h>
19#include <linux/module.h>
20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_qtaguid.h>
22#include <linux/skbuff.h>
23#include <linux/workqueue.h>
24#include <net/addrconf.h>
25#include <net/sock.h>
26#include <net/tcp.h>
27#include <net/udp.h>
28
29#include <linux/netfilter/xt_socket.h>
30#include "xt_qtaguid_internal.h"
31#include "xt_qtaguid_print.h"
32
33/*
34 * We only use the xt_socket funcs within a similar context to avoid unexpected
35 * return values.
36 */
37#define XT_SOCKET_SUPPORTED_HOOKS \
38 ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
39
40
41static const char *module_procdirname = "xt_qtaguid";
42static struct proc_dir_entry *xt_qtaguid_procdir;
43
44static unsigned int proc_iface_perms = S_IRUGO;
45module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
46
47static struct proc_dir_entry *xt_qtaguid_stats_file;
48static unsigned int proc_stats_perms = S_IRUGO;
49module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
50
51static struct proc_dir_entry *xt_qtaguid_ctrl_file;
52#ifdef CONFIG_ANDROID_PARANOID_NETWORK
53static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
54#else
55static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
56#endif
57module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
58
59#ifdef CONFIG_ANDROID_PARANOID_NETWORK
60#include <linux/android_aid.h>
61static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
62static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
63#else
64/* 0 means, don't limit anybody */
65static gid_t proc_stats_readall_gid;
66static gid_t proc_ctrl_write_gid;
67#endif
68module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
69 S_IRUGO | S_IWUSR);
70module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
71 S_IRUGO | S_IWUSR);
72
73/*
74 * Limit the number of active tags (via socket tags) for a given UID.
75 * Multiple processes could share the UID.
76 */
77static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
78module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
79
80/*
81 * After the kernel has initiallized this module, it is still possible
82 * to make it passive.
83 * Setting passive to Y:
84 * - the iface stats handling will not act on notifications.
85 * - iptables matches will never match.
86 * - ctrl commands silently succeed.
87 * - stats are always empty.
88 * This is mostly usefull when a bug is suspected.
89 */
90static bool module_passive;
91module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
92
93/*
94 * Control how qtaguid data is tracked per proc/uid.
95 * Setting tag_tracking_passive to Y:
96 * - don't create proc specific structs to track tags
97 * - don't check that active tag stats exceed some limits.
98 * - don't clean up socket tags on process exits.
99 * This is mostly usefull when a bug is suspected.
100 */
101static bool qtu_proc_handling_passive;
102module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
103 S_IRUGO | S_IWUSR);
104
105#define QTU_DEV_NAME "xt_qtaguid"
106
107uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
108module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
109
110/*---------------------------------------------------------------------------*/
111static const char *iface_stat_procdirname = "iface_stat";
112static struct proc_dir_entry *iface_stat_procdir;
113static const char *iface_stat_all_procfilename = "iface_stat_all";
114static struct proc_dir_entry *iface_stat_all_procfile;
115
116/*
117 * Ordering of locks:
118 * outer locks:
119 * iface_stat_list_lock
120 * sock_tag_list_lock
121 * inner locks:
122 * uid_tag_data_tree_lock
123 * tag_counter_set_list_lock
124 * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
125 * is acquired.
126 *
127 * Call tree with all lock holders as of 2011-09-25:
128 *
129 * iface_stat_all_proc_read()
130 * iface_stat_list_lock
131 * (struct iface_stat)
132 *
133 * qtaguid_ctrl_proc_read()
134 * sock_tag_list_lock
135 * (sock_tag_tree)
136 * (struct proc_qtu_data->sock_tag_list)
137 * prdebug_full_state()
138 * sock_tag_list_lock
139 * (sock_tag_tree)
140 * uid_tag_data_tree_lock
141 * (uid_tag_data_tree)
142 * (proc_qtu_data_tree)
143 * iface_stat_list_lock
144 *
145 * qtaguid_stats_proc_read()
146 * iface_stat_list_lock
147 * struct iface_stat->tag_stat_list_lock
148 *
149 * qtudev_open()
150 * uid_tag_data_tree_lock
151 *
152 * qtudev_release()
153 * sock_tag_data_list_lock
154 * uid_tag_data_tree_lock
155 * prdebug_full_state()
156 * sock_tag_list_lock
157 * uid_tag_data_tree_lock
158 * iface_stat_list_lock
159 *
160 * iface_netdev_event_handler()
161 * iface_stat_create()
162 * iface_stat_list_lock
163 * iface_stat_update()
164 * iface_stat_list_lock
165 *
166 * iface_inetaddr_event_handler()
167 * iface_stat_create()
168 * iface_stat_list_lock
169 * iface_stat_update()
170 * iface_stat_list_lock
171 *
172 * iface_inet6addr_event_handler()
173 * iface_stat_create_ipv6()
174 * iface_stat_list_lock
175 * iface_stat_update()
176 * iface_stat_list_lock
177 *
178 * qtaguid_mt()
179 * account_for_uid()
180 * if_tag_stat_update()
181 * get_sock_stat()
182 * sock_tag_list_lock
183 * struct iface_stat->tag_stat_list_lock
184 * tag_stat_update()
185 * get_active_counter_set()
186 * tag_counter_set_list_lock
187 * tag_stat_update()
188 * get_active_counter_set()
189 * tag_counter_set_list_lock
190 *
191 *
192 * qtaguid_ctrl_parse()
193 * ctrl_cmd_delete()
194 * sock_tag_list_lock
195 * tag_counter_set_list_lock
196 * iface_stat_list_lock
197 * struct iface_stat->tag_stat_list_lock
198 * uid_tag_data_tree_lock
199 * ctrl_cmd_counter_set()
200 * tag_counter_set_list_lock
201 * ctrl_cmd_tag()
202 * sock_tag_list_lock
203 * (sock_tag_tree)
204 * get_tag_ref()
205 * uid_tag_data_tree_lock
206 * (uid_tag_data_tree)
207 * uid_tag_data_tree_lock
208 * (proc_qtu_data_tree)
209 * ctrl_cmd_untag()
210 * sock_tag_list_lock
211 * uid_tag_data_tree_lock
212 *
213 */
214static LIST_HEAD(iface_stat_list);
215static DEFINE_SPINLOCK(iface_stat_list_lock);
216
217static struct rb_root sock_tag_tree = RB_ROOT;
218static DEFINE_SPINLOCK(sock_tag_list_lock);
219
220static struct rb_root tag_counter_set_tree = RB_ROOT;
221static DEFINE_SPINLOCK(tag_counter_set_list_lock);
222
223static struct rb_root uid_tag_data_tree = RB_ROOT;
224static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
225
226static struct rb_root proc_qtu_data_tree = RB_ROOT;
227/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
228
229static struct qtaguid_event_counts qtu_events;
230/*----------------------------------------------*/
231static bool can_manipulate_uids(void)
232{
233 /* root pwnd */
234 return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
235 || in_egroup_p(proc_ctrl_write_gid);
236}
237
238static bool can_impersonate_uid(uid_t uid)
239{
240 return uid == current_fsuid() || can_manipulate_uids();
241}
242
243static bool can_read_other_uid_stats(uid_t uid)
244{
245 /* root pwnd */
246 return unlikely(!current_fsuid()) || uid == current_fsuid()
247 || unlikely(!proc_stats_readall_gid)
248 || in_egroup_p(proc_stats_readall_gid);
249}
250
251static inline void dc_add_byte_packets(struct data_counters *counters, int set,
252 enum ifs_tx_rx direction,
253 enum ifs_proto ifs_proto,
254 int bytes,
255 int packets)
256{
257 counters->bpc[set][direction][ifs_proto].bytes += bytes;
258 counters->bpc[set][direction][ifs_proto].packets += packets;
259}
260
261static inline uint64_t dc_sum_bytes(struct data_counters *counters,
262 int set,
263 enum ifs_tx_rx direction)
264{
265 return counters->bpc[set][direction][IFS_TCP].bytes
266 + counters->bpc[set][direction][IFS_UDP].bytes
267 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
268}
269
270static inline uint64_t dc_sum_packets(struct data_counters *counters,
271 int set,
272 enum ifs_tx_rx direction)
273{
274 return counters->bpc[set][direction][IFS_TCP].packets
275 + counters->bpc[set][direction][IFS_UDP].packets
276 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
277}
278
279static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
280{
281 struct rb_node *node = root->rb_node;
282
283 while (node) {
284 struct tag_node *data = rb_entry(node, struct tag_node, node);
285 int result;
286 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
287 " node=%p data=%p\n", tag, node, data);
288 result = tag_compare(tag, data->tag);
289 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
290 " data.tag=0x%llx (uid=%u) res=%d\n",
291 tag, data->tag, get_uid_from_tag(data->tag), result);
292 if (result < 0)
293 node = node->rb_left;
294 else if (result > 0)
295 node = node->rb_right;
296 else
297 return data;
298 }
299 return NULL;
300}
301
302static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
303{
304 struct rb_node **new = &(root->rb_node), *parent = NULL;
305
306 /* Figure out where to put new node */
307 while (*new) {
308 struct tag_node *this = rb_entry(*new, struct tag_node,
309 node);
310 int result = tag_compare(data->tag, this->tag);
311 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
312 " (uid=%u)\n", __func__,
313 this->tag,
314 get_uid_from_tag(this->tag));
315 parent = *new;
316 if (result < 0)
317 new = &((*new)->rb_left);
318 else if (result > 0)
319 new = &((*new)->rb_right);
320 else
321 BUG();
322 }
323
324 /* Add new node and rebalance tree. */
325 rb_link_node(&data->node, parent, new);
326 rb_insert_color(&data->node, root);
327}
328
329static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
330{
331 tag_node_tree_insert(&data->tn, root);
332}
333
334static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
335{
336 struct tag_node *node = tag_node_tree_search(root, tag);
337 if (!node)
338 return NULL;
339 return rb_entry(&node->node, struct tag_stat, tn.node);
340}
341
342static void tag_counter_set_tree_insert(struct tag_counter_set *data,
343 struct rb_root *root)
344{
345 tag_node_tree_insert(&data->tn, root);
346}
347
348static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
349 tag_t tag)
350{
351 struct tag_node *node = tag_node_tree_search(root, tag);
352 if (!node)
353 return NULL;
354 return rb_entry(&node->node, struct tag_counter_set, tn.node);
355
356}
357
358static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
359{
360 tag_node_tree_insert(&data->tn, root);
361}
362
363static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
364{
365 struct tag_node *node = tag_node_tree_search(root, tag);
366 if (!node)
367 return NULL;
368 return rb_entry(&node->node, struct tag_ref, tn.node);
369}
370
371static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
372 const struct sock *sk)
373{
374 struct rb_node *node = root->rb_node;
375
376 while (node) {
377 struct sock_tag *data = rb_entry(node, struct sock_tag,
378 sock_node);
379 if (sk < data->sk)
380 node = node->rb_left;
381 else if (sk > data->sk)
382 node = node->rb_right;
383 else
384 return data;
385 }
386 return NULL;
387}
388
389static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
390{
391 struct rb_node **new = &(root->rb_node), *parent = NULL;
392
393 /* Figure out where to put new node */
394 while (*new) {
395 struct sock_tag *this = rb_entry(*new, struct sock_tag,
396 sock_node);
397 parent = *new;
398 if (data->sk < this->sk)
399 new = &((*new)->rb_left);
400 else if (data->sk > this->sk)
401 new = &((*new)->rb_right);
402 else
403 BUG();
404 }
405
406 /* Add new node and rebalance tree. */
407 rb_link_node(&data->sock_node, parent, new);
408 rb_insert_color(&data->sock_node, root);
409}
410
411static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
412{
413 struct rb_node *node;
414 struct sock_tag *st_entry;
415
416 node = rb_first(st_to_free_tree);
417 while (node) {
418 st_entry = rb_entry(node, struct sock_tag, sock_node);
419 node = rb_next(node);
420 CT_DEBUG("qtaguid: %s(): "
421 "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
422 st_entry->sk,
423 st_entry->tag,
424 get_uid_from_tag(st_entry->tag));
425 rb_erase(&st_entry->sock_node, st_to_free_tree);
426 sockfd_put(st_entry->socket);
427 kfree(st_entry);
428 }
429}
430
431static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
432 const pid_t pid)
433{
434 struct rb_node *node = root->rb_node;
435
436 while (node) {
437 struct proc_qtu_data *data = rb_entry(node,
438 struct proc_qtu_data,
439 node);
440 if (pid < data->pid)
441 node = node->rb_left;
442 else if (pid > data->pid)
443 node = node->rb_right;
444 else
445 return data;
446 }
447 return NULL;
448}
449
450static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
451 struct rb_root *root)
452{
453 struct rb_node **new = &(root->rb_node), *parent = NULL;
454
455 /* Figure out where to put new node */
456 while (*new) {
457 struct proc_qtu_data *this = rb_entry(*new,
458 struct proc_qtu_data,
459 node);
460 parent = *new;
461 if (data->pid < this->pid)
462 new = &((*new)->rb_left);
463 else if (data->pid > this->pid)
464 new = &((*new)->rb_right);
465 else
466 BUG();
467 }
468
469 /* Add new node and rebalance tree. */
470 rb_link_node(&data->node, parent, new);
471 rb_insert_color(&data->node, root);
472}
473
474static void uid_tag_data_tree_insert(struct uid_tag_data *data,
475 struct rb_root *root)
476{
477 struct rb_node **new = &(root->rb_node), *parent = NULL;
478
479 /* Figure out where to put new node */
480 while (*new) {
481 struct uid_tag_data *this = rb_entry(*new,
482 struct uid_tag_data,
483 node);
484 parent = *new;
485 if (data->uid < this->uid)
486 new = &((*new)->rb_left);
487 else if (data->uid > this->uid)
488 new = &((*new)->rb_right);
489 else
490 BUG();
491 }
492
493 /* Add new node and rebalance tree. */
494 rb_link_node(&data->node, parent, new);
495 rb_insert_color(&data->node, root);
496}
497
498static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
499 uid_t uid)
500{
501 struct rb_node *node = root->rb_node;
502
503 while (node) {
504 struct uid_tag_data *data = rb_entry(node,
505 struct uid_tag_data,
506 node);
507 if (uid < data->uid)
508 node = node->rb_left;
509 else if (uid > data->uid)
510 node = node->rb_right;
511 else
512 return data;
513 }
514 return NULL;
515}
516
517/*
518 * Allocates a new uid_tag_data struct if needed.
519 * Returns a pointer to the found or allocated uid_tag_data.
520 * Returns a PTR_ERR on failures, and lock is not held.
521 * If found is not NULL:
522 * sets *found to true if not allocated.
523 * sets *found to false if allocated.
524 */
525struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
526{
527 struct uid_tag_data *utd_entry;
528
529 /* Look for top level uid_tag_data for the UID */
530 utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
531 DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
532
533 if (found_res)
534 *found_res = utd_entry;
535 if (utd_entry)
536 return utd_entry;
537
538 utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
539 if (!utd_entry) {
540 pr_err("qtaguid: get_uid_data(%u): "
541 "tag data alloc failed\n", uid);
542 return ERR_PTR(-ENOMEM);
543 }
544
545 utd_entry->uid = uid;
546 utd_entry->tag_ref_tree = RB_ROOT;
547 uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
548 DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
549 return utd_entry;
550}
551
552/* Never returns NULL. Either PTR_ERR or a valid ptr. */
553static struct tag_ref *new_tag_ref(tag_t new_tag,
554 struct uid_tag_data *utd_entry)
555{
556 struct tag_ref *tr_entry;
557 int res;
558
559 if (utd_entry->num_active_tags + 1 > max_sock_tags) {
560 pr_info("qtaguid: new_tag_ref(0x%llx): "
561 "tag ref alloc quota exceeded. max=%d\n",
562 new_tag, max_sock_tags);
563 res = -EMFILE;
564 goto err_res;
565
566 }
567
568 tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
569 if (!tr_entry) {
570 pr_err("qtaguid: new_tag_ref(0x%llx): "
571 "tag ref alloc failed\n",
572 new_tag);
573 res = -ENOMEM;
574 goto err_res;
575 }
576 tr_entry->tn.tag = new_tag;
577 /* tr_entry->num_sock_tags handled by caller */
578 utd_entry->num_active_tags++;
579 tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
580 DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
581 " inserted new tag ref %p\n",
582 new_tag, tr_entry);
583 return tr_entry;
584
585err_res:
586 return ERR_PTR(res);
587}
588
589static struct tag_ref *lookup_tag_ref(tag_t full_tag,
590 struct uid_tag_data **utd_res)
591{
592 struct uid_tag_data *utd_entry;
593 struct tag_ref *tr_entry;
594 bool found_utd;
595 uid_t uid = get_uid_from_tag(full_tag);
596
597 DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
598 full_tag, uid);
599
600 utd_entry = get_uid_data(uid, &found_utd);
601 if (IS_ERR_OR_NULL(utd_entry)) {
602 if (utd_res)
603 *utd_res = utd_entry;
604 return NULL;
605 }
606
607 tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
608 if (utd_res)
609 *utd_res = utd_entry;
610 DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
611 full_tag, utd_entry, tr_entry);
612 return tr_entry;
613}
614
615/* Never returns NULL. Either PTR_ERR or a valid ptr. */
616static struct tag_ref *get_tag_ref(tag_t full_tag,
617 struct uid_tag_data **utd_res)
618{
619 struct uid_tag_data *utd_entry;
620 struct tag_ref *tr_entry;
621
622 DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
623 full_tag);
624 spin_lock_bh(&uid_tag_data_tree_lock);
625 tr_entry = lookup_tag_ref(full_tag, &utd_entry);
626 BUG_ON(IS_ERR_OR_NULL(utd_entry));
627 if (!tr_entry)
628 tr_entry = new_tag_ref(full_tag, utd_entry);
629
630 spin_unlock_bh(&uid_tag_data_tree_lock);
631 if (utd_res)
632 *utd_res = utd_entry;
633 DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
634 full_tag, utd_entry, tr_entry);
635 return tr_entry;
636}
637
638/* Checks and maybe frees the UID Tag Data entry */
639static void put_utd_entry(struct uid_tag_data *utd_entry)
640{
641 /* Are we done with the UID tag data entry? */
642 if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
643 !utd_entry->num_pqd) {
644 DR_DEBUG("qtaguid: %s(): "
645 "erase utd_entry=%p uid=%u "
646 "by pid=%u tgid=%u uid=%u\n", __func__,
647 utd_entry, utd_entry->uid,
648 current->pid, current->tgid, current_fsuid());
649 BUG_ON(utd_entry->num_active_tags);
650 rb_erase(&utd_entry->node, &uid_tag_data_tree);
651 kfree(utd_entry);
652 } else {
653 DR_DEBUG("qtaguid: %s(): "
654 "utd_entry=%p still has %d tags %d proc_qtu_data\n",
655 __func__, utd_entry, utd_entry->num_active_tags,
656 utd_entry->num_pqd);
657 BUG_ON(!(utd_entry->num_active_tags ||
658 utd_entry->num_pqd));
659 }
660}
661
662/*
663 * If no sock_tags are using this tag_ref,
664 * decrements refcount of utd_entry, removes tr_entry
665 * from utd_entry->tag_ref_tree and frees.
666 */
667static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
668 struct uid_tag_data *utd_entry)
669{
670 DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
671 tr_entry, tr_entry->tn.tag,
672 get_uid_from_tag(tr_entry->tn.tag));
673 if (!tr_entry->num_sock_tags) {
674 BUG_ON(!utd_entry->num_active_tags);
675 utd_entry->num_active_tags--;
676 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
677 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
678 kfree(tr_entry);
679 }
680}
681
682static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
683{
684 struct rb_node *node;
685 struct tag_ref *tr_entry;
686 tag_t acct_tag;
687
688 DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
689 full_tag, get_uid_from_tag(full_tag));
690 acct_tag = get_atag_from_tag(full_tag);
691 node = rb_first(&utd_entry->tag_ref_tree);
692 while (node) {
693 tr_entry = rb_entry(node, struct tag_ref, tn.node);
694 node = rb_next(node);
695 if (!acct_tag || tr_entry->tn.tag == full_tag)
696 free_tag_ref_from_utd_entry(tr_entry, utd_entry);
697 }
698}
699
700static int read_proc_u64(char *page, char **start, off_t off,
701 int count, int *eof, void *data)
702{
703 int len;
704 uint64_t value;
705 char *p = page;
706 uint64_t *iface_entry = data;
707
708 if (!data)
709 return 0;
710
711 value = *iface_entry;
712 p += sprintf(p, "%llu\n", value);
713 len = (p - page) - off;
714 *eof = (len <= count) ? 1 : 0;
715 *start = page + off;
716 return len;
717}
718
719static int read_proc_bool(char *page, char **start, off_t off,
720 int count, int *eof, void *data)
721{
722 int len;
723 bool value;
724 char *p = page;
725 bool *bool_entry = data;
726
727 if (!data)
728 return 0;
729
730 value = *bool_entry;
731 p += sprintf(p, "%u\n", value);
732 len = (p - page) - off;
733 *eof = (len <= count) ? 1 : 0;
734 *start = page + off;
735 return len;
736}
737
738static int get_active_counter_set(tag_t tag)
739{
740 int active_set = 0;
741 struct tag_counter_set *tcs;
742
743 MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
744 " (uid=%u)\n",
745 tag, get_uid_from_tag(tag));
746 /* For now we only handle UID tags for active sets */
747 tag = get_utag_from_tag(tag);
748 spin_lock_bh(&tag_counter_set_list_lock);
749 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
750 if (tcs)
751 active_set = tcs->active_set;
752 spin_unlock_bh(&tag_counter_set_list_lock);
753 return active_set;
754}
755
756/*
757 * Find the entry for tracking the specified interface.
758 * Caller must hold iface_stat_list_lock
759 */
760static struct iface_stat *get_iface_entry(const char *ifname)
761{
762 struct iface_stat *iface_entry;
763
764 /* Find the entry for tracking the specified tag within the interface */
765 if (ifname == NULL) {
766 pr_info("qtaguid: iface_stat: get() NULL device name\n");
767 return NULL;
768 }
769
770 /* Iterate over interfaces */
771 list_for_each_entry(iface_entry, &iface_stat_list, list) {
772 if (!strcmp(ifname, iface_entry->ifname))
773 goto done;
774 }
775 iface_entry = NULL;
776done:
777 return iface_entry;
778}
779
780static int iface_stat_all_proc_read(char *page, char **num_items_returned,
781 off_t items_to_skip, int char_count,
782 int *eof, void *data)
783{
784 char *outp = page;
785 int item_index = 0;
786 int len;
787 struct iface_stat *iface_entry;
788 struct rtnl_link_stats64 dev_stats, *stats;
789 struct rtnl_link_stats64 no_dev_stats = {0};
790
791 if (unlikely(module_passive)) {
792 *eof = 1;
793 return 0;
794 }
795
796 CT_DEBUG("qtaguid:proc iface_stat_all "
797 "page=%p *num_items_returned=%p off=%ld "
798 "char_count=%d *eof=%d\n", page, *num_items_returned,
799 items_to_skip, char_count, *eof);
800
801 if (*eof)
802 return 0;
803
804 /*
805 * This lock will prevent iface_stat_update() from changing active,
806 * and in turn prevent an interface from unregistering itself.
807 */
808 spin_lock_bh(&iface_stat_list_lock);
809 list_for_each_entry(iface_entry, &iface_stat_list, list) {
810 if (item_index++ < items_to_skip)
811 continue;
812
813 if (iface_entry->active) {
814 stats = dev_get_stats(iface_entry->net_dev,
815 &dev_stats);
816 } else {
817 stats = &no_dev_stats;
818 }
819 len = snprintf(outp, char_count,
820 "%s %d "
821 "%llu %llu %llu %llu "
822 "%llu %llu %llu %llu\n",
823 iface_entry->ifname,
824 iface_entry->active,
825 iface_entry->totals[IFS_RX].bytes,
826 iface_entry->totals[IFS_RX].packets,
827 iface_entry->totals[IFS_TX].bytes,
828 iface_entry->totals[IFS_TX].packets,
829 stats->rx_bytes, stats->rx_packets,
830 stats->tx_bytes, stats->tx_packets);
831 if (len >= char_count) {
832 spin_unlock_bh(&iface_stat_list_lock);
833 *outp = '\0';
834 return outp - page;
835 }
836 outp += len;
837 char_count -= len;
838 (*num_items_returned)++;
839 }
840 spin_unlock_bh(&iface_stat_list_lock);
841
842 *eof = 1;
843 return outp - page;
844}
845
846static void iface_create_proc_worker(struct work_struct *work)
847{
848 struct proc_dir_entry *proc_entry;
849 struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
850 iface_work);
851 struct iface_stat *new_iface = isw->iface_entry;
852
853 /* iface_entries are not deleted, so safe to manipulate. */
854 proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
855 if (IS_ERR_OR_NULL(proc_entry)) {
856 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
857 kfree(isw);
858 return;
859 }
860
861 new_iface->proc_ptr = proc_entry;
862
863 create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
864 read_proc_u64, &new_iface->totals[IFS_TX].bytes);
865 create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
866 read_proc_u64, &new_iface->totals[IFS_RX].bytes);
867 create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
868 read_proc_u64, &new_iface->totals[IFS_TX].packets);
869 create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
870 read_proc_u64, &new_iface->totals[IFS_RX].packets);
871 create_proc_read_entry("active", proc_iface_perms, proc_entry,
872 read_proc_bool, &new_iface->active);
873
874 IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
875 "entry=%p dev=%s\n", new_iface, new_iface->ifname);
876 kfree(isw);
877}
878
879/*
880 * Will set the entry's active state, and
881 * update the net_dev accordingly also.
882 */
883static void _iface_stat_set_active(struct iface_stat *entry,
884 struct net_device *net_dev,
885 bool activate)
886{
887 if (activate) {
888 entry->net_dev = net_dev;
889 entry->active = true;
890 IF_DEBUG("qtaguid: %s(%s): "
891 "enable tracking. rfcnt=%d\n", __func__,
892 entry->ifname,
893 percpu_read(*net_dev->pcpu_refcnt));
894 } else {
895 entry->active = false;
896 entry->net_dev = NULL;
897 IF_DEBUG("qtaguid: %s(%s): "
898 "disable tracking. rfcnt=%d\n", __func__,
899 entry->ifname,
900 percpu_read(*net_dev->pcpu_refcnt));
901
902 }
903}
904
905/* Caller must hold iface_stat_list_lock */
906static struct iface_stat *iface_alloc(struct net_device *net_dev)
907{
908 struct iface_stat *new_iface;
909 struct iface_stat_work *isw;
910
911 new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
912 if (new_iface == NULL) {
913 pr_err("qtaguid: iface_stat: create(%s): "
914 "iface_stat alloc failed\n", net_dev->name);
915 return NULL;
916 }
917 new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
918 if (new_iface->ifname == NULL) {
919 pr_err("qtaguid: iface_stat: create(%s): "
920 "ifname alloc failed\n", net_dev->name);
921 kfree(new_iface);
922 return NULL;
923 }
924 spin_lock_init(&new_iface->tag_stat_list_lock);
925 new_iface->tag_stat_tree = RB_ROOT;
926 _iface_stat_set_active(new_iface, net_dev, true);
927
928 /*
929 * ipv6 notifier chains are atomic :(
930 * No create_proc_read_entry() for you!
931 */
932 isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
933 if (!isw) {
934 pr_err("qtaguid: iface_stat: create(%s): "
935 "work alloc failed\n", new_iface->ifname);
936 _iface_stat_set_active(new_iface, net_dev, false);
937 kfree(new_iface->ifname);
938 kfree(new_iface);
939 return NULL;
940 }
941 isw->iface_entry = new_iface;
942 INIT_WORK(&isw->iface_work, iface_create_proc_worker);
943 schedule_work(&isw->iface_work);
944 list_add(&new_iface->list, &iface_stat_list);
945 return new_iface;
946}
947
948static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
949 struct iface_stat *iface)
950{
951 struct rtnl_link_stats64 dev_stats, *stats;
952 bool stats_rewound;
953
954 stats = dev_get_stats(net_dev, &dev_stats);
955 /* No empty packets */
956 stats_rewound =
957 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
958 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
959
960 IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
961 "bytes rx/tx=%llu/%llu "
962 "active=%d last_known=%d "
963 "stats_rewound=%d\n", __func__,
964 net_dev ? net_dev->name : "?",
965 iface, net_dev,
966 stats->rx_bytes, stats->tx_bytes,
967 iface->active, iface->last_known_valid, stats_rewound);
968
969 if (iface->active && iface->last_known_valid && stats_rewound) {
970 pr_warn_once("qtaguid: iface_stat: %s(%s): "
971 "iface reset its stats unexpectedly\n", __func__,
972 net_dev->name);
973
974 iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes;
975 iface->totals[IFS_TX].packets +=
976 iface->last_known[IFS_TX].packets;
977 iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes;
978 iface->totals[IFS_RX].packets +=
979 iface->last_known[IFS_RX].packets;
980 iface->last_known_valid = false;
981 IF_DEBUG("qtaguid: %s(%s): iface=%p "
982 "used last known bytes rx/tx=%llu/%llu\n", __func__,
983 iface->ifname, iface, iface->last_known[IFS_RX].bytes,
984 iface->last_known[IFS_TX].bytes);
985 }
986}
987
988/*
989 * Create a new entry for tracking the specified interface.
990 * Do nothing if the entry already exists.
991 * Called when an interface is configured with a valid IP address.
992 */
993static void iface_stat_create(struct net_device *net_dev,
994 struct in_ifaddr *ifa)
995{
996 struct in_device *in_dev = NULL;
997 const char *ifname;
998 struct iface_stat *entry;
999 __be32 ipaddr = 0;
1000 struct iface_stat *new_iface;
1001
1002 IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
1003 net_dev ? net_dev->name : "?",
1004 ifa, net_dev);
1005 if (!net_dev) {
1006 pr_err("qtaguid: iface_stat: create(): no net dev\n");
1007 return;
1008 }
1009
1010 ifname = net_dev->name;
1011 if (!ifa) {
1012 in_dev = in_dev_get(net_dev);
1013 if (!in_dev) {
1014 pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
1015 ifname);
1016 return;
1017 }
1018 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
1019 ifname, in_dev);
1020 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1021 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1022 "ifa=%p ifa_label=%s\n",
1023 ifname, ifa,
1024 ifa->ifa_label ? ifa->ifa_label : "(null)");
1025 if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
1026 break;
1027 }
1028 }
1029
1030 if (!ifa) {
1031 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
1032 ifname);
1033 goto done_put;
1034 }
1035 ipaddr = ifa->ifa_local;
1036
1037 spin_lock_bh(&iface_stat_list_lock);
1038 entry = get_iface_entry(ifname);
1039 if (entry != NULL) {
1040 bool activate = !ipv4_is_loopback(ipaddr);
1041 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
1042 ifname, entry);
1043 iface_check_stats_reset_and_adjust(net_dev, entry);
1044 _iface_stat_set_active(entry, net_dev, activate);
1045 IF_DEBUG("qtaguid: %s(%s): "
1046 "tracking now %d on ip=%pI4\n", __func__,
1047 entry->ifname, activate, &ipaddr);
1048 goto done_unlock_put;
1049 } else if (ipv4_is_loopback(ipaddr)) {
1050 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1051 "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
1052 goto done_unlock_put;
1053 }
1054
1055 new_iface = iface_alloc(net_dev);
1056 IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1057 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1058done_unlock_put:
1059 spin_unlock_bh(&iface_stat_list_lock);
1060done_put:
1061 if (in_dev)
1062 in_dev_put(in_dev);
1063}
1064
1065static void iface_stat_create_ipv6(struct net_device *net_dev,
1066 struct inet6_ifaddr *ifa)
1067{
1068 struct in_device *in_dev;
1069 const char *ifname;
1070 struct iface_stat *entry;
1071 struct iface_stat *new_iface;
1072 int addr_type;
1073
1074 IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1075 ifa, net_dev, net_dev ? net_dev->name : "");
1076 if (!net_dev) {
1077 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1078 return;
1079 }
1080 ifname = net_dev->name;
1081
1082 in_dev = in_dev_get(net_dev);
1083 if (!in_dev) {
1084 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1085 ifname);
1086 return;
1087 }
1088
1089 IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1090 ifname, in_dev);
1091
1092 if (!ifa) {
1093 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1094 ifname);
1095 goto done_put;
1096 }
1097 addr_type = ipv6_addr_type(&ifa->addr);
1098
1099 spin_lock_bh(&iface_stat_list_lock);
1100 entry = get_iface_entry(ifname);
1101 if (entry != NULL) {
1102 bool activate = !(addr_type & IPV6_ADDR_LOOPBACK);
1103 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1104 ifname, entry);
1105 iface_check_stats_reset_and_adjust(net_dev, entry);
1106 _iface_stat_set_active(entry, net_dev, activate);
1107 IF_DEBUG("qtaguid: %s(%s): "
1108 "tracking now %d on ip=%pI6c\n", __func__,
1109 entry->ifname, activate, &ifa->addr);
1110 goto done_unlock_put;
1111 } else if (addr_type & IPV6_ADDR_LOOPBACK) {
1112 IF_DEBUG("qtaguid: %s(%s): "
1113 "ignore loopback dev. ip=%pI6c\n", __func__,
1114 ifname, &ifa->addr);
1115 goto done_unlock_put;
1116 }
1117
1118 new_iface = iface_alloc(net_dev);
1119 IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1120 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1121
1122done_unlock_put:
1123 spin_unlock_bh(&iface_stat_list_lock);
1124done_put:
1125 in_dev_put(in_dev);
1126}
1127
1128static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1129{
1130 MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1131 return sock_tag_tree_search(&sock_tag_tree, sk);
1132}
1133
1134static struct sock_tag *get_sock_stat(const struct sock *sk)
1135{
1136 struct sock_tag *sock_tag_entry;
1137 MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1138 if (!sk)
1139 return NULL;
1140 spin_lock_bh(&sock_tag_list_lock);
1141 sock_tag_entry = get_sock_stat_nl(sk);
1142 spin_unlock_bh(&sock_tag_list_lock);
1143 return sock_tag_entry;
1144}
1145
1146static void
1147data_counters_update(struct data_counters *dc, int set,
1148 enum ifs_tx_rx direction, int proto, int bytes)
1149{
1150 switch (proto) {
1151 case IPPROTO_TCP:
1152 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1153 break;
1154 case IPPROTO_UDP:
1155 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1156 break;
1157 case IPPROTO_IP:
1158 default:
1159 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1160 1);
1161 break;
1162 }
1163}
1164
1165/*
1166 * Update stats for the specified interface. Do nothing if the entry
1167 * does not exist (when a device was never configured with an IP address).
1168 * Called when an device is being unregistered.
1169 */
1170static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1171{
1172 struct rtnl_link_stats64 dev_stats, *stats;
1173 struct iface_stat *entry;
1174
1175 stats = dev_get_stats(net_dev, &dev_stats);
1176 spin_lock_bh(&iface_stat_list_lock);
1177 entry = get_iface_entry(net_dev->name);
1178 if (entry == NULL) {
1179 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1180 net_dev->name);
1181 spin_unlock_bh(&iface_stat_list_lock);
1182 return;
1183 }
1184
1185 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1186 net_dev->name, entry);
1187 if (!entry->active) {
1188 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1189 net_dev->name);
1190 spin_unlock_bh(&iface_stat_list_lock);
1191 return;
1192 }
1193
1194 if (stash_only) {
1195 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1196 entry->last_known[IFS_TX].packets = stats->tx_packets;
1197 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1198 entry->last_known[IFS_RX].packets = stats->rx_packets;
1199 entry->last_known_valid = true;
1200 IF_DEBUG("qtaguid: %s(%s): "
1201 "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1202 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1203 spin_unlock_bh(&iface_stat_list_lock);
1204 return;
1205 }
1206 entry->totals[IFS_TX].bytes += stats->tx_bytes;
1207 entry->totals[IFS_TX].packets += stats->tx_packets;
1208 entry->totals[IFS_RX].bytes += stats->rx_bytes;
1209 entry->totals[IFS_RX].packets += stats->rx_packets;
1210 /* We don't need the last_known[] anymore */
1211 entry->last_known_valid = false;
1212 _iface_stat_set_active(entry, net_dev, false);
1213 IF_DEBUG("qtaguid: %s(%s): "
1214 "disable tracking. rx/tx=%llu/%llu\n", __func__,
1215 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1216 spin_unlock_bh(&iface_stat_list_lock);
1217}
1218
1219static void tag_stat_update(struct tag_stat *tag_entry,
1220 enum ifs_tx_rx direction, int proto, int bytes)
1221{
1222 int active_set;
1223 active_set = get_active_counter_set(tag_entry->tn.tag);
1224 MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1225 "dir=%d proto=%d bytes=%d)\n",
1226 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1227 active_set, direction, proto, bytes);
1228 data_counters_update(&tag_entry->counters, active_set, direction,
1229 proto, bytes);
1230 if (tag_entry->parent_counters)
1231 data_counters_update(tag_entry->parent_counters, active_set,
1232 direction, proto, bytes);
1233}
1234
1235/*
1236 * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1237 * the interface.
1238 * iface_entry->tag_stat_list_lock should be held.
1239 */
1240static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1241 tag_t tag)
1242{
1243 struct tag_stat *new_tag_stat_entry = NULL;
1244 IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1245 " (uid=%u)\n", __func__,
1246 iface_entry, tag, get_uid_from_tag(tag));
1247 new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1248 if (!new_tag_stat_entry) {
1249 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1250 goto done;
1251 }
1252 new_tag_stat_entry->tn.tag = tag;
1253 tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1254done:
1255 return new_tag_stat_entry;
1256}
1257
1258static void if_tag_stat_update(const char *ifname, uid_t uid,
1259 const struct sock *sk, enum ifs_tx_rx direction,
1260 int proto, int bytes)
1261{
1262 struct tag_stat *tag_stat_entry;
1263 tag_t tag, acct_tag;
1264 tag_t uid_tag;
1265 struct data_counters *uid_tag_counters;
1266 struct sock_tag *sock_tag_entry;
1267 struct iface_stat *iface_entry;
1268 struct tag_stat *new_tag_stat;
1269 MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1270 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1271 ifname, uid, sk, direction, proto, bytes);
1272
1273
1274 iface_entry = get_iface_entry(ifname);
1275 if (!iface_entry) {
1276 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
1277 ifname);
1278 return;
1279 }
1280 /* It is ok to process data when an iface_entry is inactive */
1281
1282 MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1283 ifname, iface_entry);
1284
1285 /*
1286 * Look for a tagged sock.
1287 * It will have an acct_uid.
1288 */
1289 sock_tag_entry = get_sock_stat(sk);
1290 if (sock_tag_entry) {
1291 tag = sock_tag_entry->tag;
1292 acct_tag = get_atag_from_tag(tag);
1293 uid_tag = get_utag_from_tag(tag);
1294 } else {
1295 acct_tag = make_atag_from_value(0);
1296 tag = combine_atag_with_uid(acct_tag, uid);
1297 uid_tag = make_tag_from_uid(uid);
1298 }
1299 MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1300 " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1301 tag, get_uid_from_tag(tag), iface_entry);
1302 /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1303 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1304
1305 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1306 tag);
1307 if (tag_stat_entry) {
1308 /*
1309 * Updating the {acct_tag, uid_tag} entry handles both stats:
1310 * {0, uid_tag} will also get updated.
1311 */
1312 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1313 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1314 return;
1315 }
1316
1317 /* Loop over tag list under this interface for {0,uid_tag} */
1318 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1319 uid_tag);
1320 if (!tag_stat_entry) {
1321 /* Here: the base uid_tag did not exist */
1322 /*
1323 * No parent counters. So
1324 * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1325 */
1326 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1327 uid_tag_counters = &new_tag_stat->counters;
1328 } else {
1329 uid_tag_counters = &tag_stat_entry->counters;
1330 }
1331
1332 if (acct_tag) {
1333 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1334 new_tag_stat->parent_counters = uid_tag_counters;
1335 }
1336 tag_stat_update(new_tag_stat, direction, proto, bytes);
1337 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1338}
1339
1340static int iface_netdev_event_handler(struct notifier_block *nb,
1341 unsigned long event, void *ptr) {
1342 struct net_device *dev = ptr;
1343
1344 if (unlikely(module_passive))
1345 return NOTIFY_DONE;
1346
1347 IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1348 "ev=0x%lx/%s netdev=%p->name=%s\n",
1349 event, netdev_evt_str(event), dev, dev ? dev->name : "");
1350
1351 switch (event) {
1352 case NETDEV_UP:
1353 iface_stat_create(dev, NULL);
1354 atomic64_inc(&qtu_events.iface_events);
1355 break;
1356 case NETDEV_DOWN:
1357 case NETDEV_UNREGISTER:
1358 iface_stat_update(dev, event == NETDEV_DOWN);
1359 atomic64_inc(&qtu_events.iface_events);
1360 break;
1361 }
1362 return NOTIFY_DONE;
1363}
1364
1365static int iface_inet6addr_event_handler(struct notifier_block *nb,
1366 unsigned long event, void *ptr)
1367{
1368 struct inet6_ifaddr *ifa = ptr;
1369 struct net_device *dev;
1370
1371 if (unlikely(module_passive))
1372 return NOTIFY_DONE;
1373
1374 IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1375 "ev=0x%lx/%s ifa=%p\n",
1376 event, netdev_evt_str(event), ifa);
1377
1378 switch (event) {
1379 case NETDEV_UP:
1380 BUG_ON(!ifa || !ifa->idev);
1381 dev = (struct net_device *)ifa->idev->dev;
1382 iface_stat_create_ipv6(dev, ifa);
1383 atomic64_inc(&qtu_events.iface_events);
1384 break;
1385 case NETDEV_DOWN:
1386 case NETDEV_UNREGISTER:
1387 BUG_ON(!ifa || !ifa->idev);
1388 dev = (struct net_device *)ifa->idev->dev;
1389 iface_stat_update(dev, event == NETDEV_DOWN);
1390 atomic64_inc(&qtu_events.iface_events);
1391 break;
1392 }
1393 return NOTIFY_DONE;
1394}
1395
1396static int iface_inetaddr_event_handler(struct notifier_block *nb,
1397 unsigned long event, void *ptr)
1398{
1399 struct in_ifaddr *ifa = ptr;
1400 struct net_device *dev;
1401
1402 if (unlikely(module_passive))
1403 return NOTIFY_DONE;
1404
1405 IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1406 "ev=0x%lx/%s ifa=%p\n",
1407 event, netdev_evt_str(event), ifa);
1408
1409 switch (event) {
1410 case NETDEV_UP:
1411 BUG_ON(!ifa || !ifa->ifa_dev);
1412 dev = ifa->ifa_dev->dev;
1413 iface_stat_create(dev, ifa);
1414 atomic64_inc(&qtu_events.iface_events);
1415 break;
1416 case NETDEV_DOWN:
1417 case NETDEV_UNREGISTER:
1418 BUG_ON(!ifa || !ifa->ifa_dev);
1419 dev = ifa->ifa_dev->dev;
1420 iface_stat_update(dev, event == NETDEV_DOWN);
1421 atomic64_inc(&qtu_events.iface_events);
1422 break;
1423 }
1424 return NOTIFY_DONE;
1425}
1426
1427static struct notifier_block iface_netdev_notifier_blk = {
1428 .notifier_call = iface_netdev_event_handler,
1429};
1430
1431static struct notifier_block iface_inetaddr_notifier_blk = {
1432 .notifier_call = iface_inetaddr_event_handler,
1433};
1434
1435static struct notifier_block iface_inet6addr_notifier_blk = {
1436 .notifier_call = iface_inet6addr_event_handler,
1437};
1438
1439static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1440{
1441 int err;
1442
1443 iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1444 if (!iface_stat_procdir) {
1445 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1446 err = -1;
1447 goto err;
1448 }
1449
1450 iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename,
1451 proc_iface_perms,
1452 parent_procdir);
1453 if (!iface_stat_all_procfile) {
1454 pr_err("qtaguid: iface_stat: init "
1455 " failed to create stat_all proc entry\n");
1456 err = -1;
1457 goto err_zap_entry;
1458 }
1459 iface_stat_all_procfile->read_proc = iface_stat_all_proc_read;
1460
1461
1462 err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1463 if (err) {
1464 pr_err("qtaguid: iface_stat: init "
1465 "failed to register dev event handler\n");
1466 goto err_zap_all_stats_entry;
1467 }
1468 err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1469 if (err) {
1470 pr_err("qtaguid: iface_stat: init "
1471 "failed to register ipv4 dev event handler\n");
1472 goto err_unreg_nd;
1473 }
1474
1475 err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1476 if (err) {
1477 pr_err("qtaguid: iface_stat: init "
1478 "failed to register ipv6 dev event handler\n");
1479 goto err_unreg_ip4_addr;
1480 }
1481 return 0;
1482
1483err_unreg_ip4_addr:
1484 unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1485err_unreg_nd:
1486 unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1487err_zap_all_stats_entry:
1488 remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1489err_zap_entry:
1490 remove_proc_entry(iface_stat_procdirname, parent_procdir);
1491err:
1492 return err;
1493}
1494
1495static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1496 struct xt_action_param *par)
1497{
1498 struct sock *sk;
1499 unsigned int hook_mask = (1 << par->hooknum);
1500
1501 MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1502 par->hooknum, par->family);
1503
1504 /*
1505 * Let's not abuse the the xt_socket_get*_sk(), or else it will
1506 * return garbage SKs.
1507 */
1508 if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1509 return NULL;
1510
1511 switch (par->family) {
1512 case NFPROTO_IPV6:
1513 sk = xt_socket_get6_sk(skb, par);
1514 break;
1515 case NFPROTO_IPV4:
1516 sk = xt_socket_get4_sk(skb, par);
1517 break;
1518 default:
1519 return NULL;
1520 }
1521
1522 /*
1523 * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1524 * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1525 * Not fixed in 3.0-r3 :(
1526 */
1527 if (sk) {
1528 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1529 "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1530 if (sk->sk_state == TCP_TIME_WAIT) {
1531 xt_socket_put_sk(sk);
1532 sk = NULL;
1533 }
1534 }
1535 return sk;
1536}
1537
1538static void account_for_uid(const struct sk_buff *skb,
1539 const struct sock *alternate_sk, uid_t uid,
1540 struct xt_action_param *par)
1541{
1542 const struct net_device *el_dev;
1543
1544 if (!skb->dev) {
1545 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1546 el_dev = par->in ? : par->out;
1547 } else {
1548 const struct net_device *other_dev;
1549 el_dev = skb->dev;
1550 other_dev = par->in ? : par->out;
1551 if (el_dev != other_dev) {
1552 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1553 "par->(in/out)=%p %s\n",
1554 par->hooknum, el_dev, el_dev->name, other_dev,
1555 other_dev->name);
1556 }
1557 }
1558
1559 if (unlikely(!el_dev)) {
1560 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1561 } else if (unlikely(!el_dev->name)) {
1562 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1563 } else {
1564 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n",
1565 par->hooknum,
1566 el_dev->name,
1567 el_dev->type);
1568
1569 if_tag_stat_update(el_dev->name, uid,
1570 skb->sk ? skb->sk : alternate_sk,
1571 par->in ? IFS_RX : IFS_TX,
1572 ip_hdr(skb)->protocol, skb->len);
1573 }
1574}
1575
1576static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1577{
1578 const struct xt_qtaguid_match_info *info = par->matchinfo;
1579 const struct file *filp;
1580 bool got_sock = false;
1581 struct sock *sk;
1582 uid_t sock_uid;
1583 bool res;
1584
1585 if (unlikely(module_passive))
1586 return (info->match ^ info->invert) == 0;
1587
1588 MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1589 par->hooknum, skb, par->in, par->out, par->family);
1590
1591 atomic64_inc(&qtu_events.match_calls);
1592 if (skb == NULL) {
1593 res = (info->match ^ info->invert) == 0;
1594 goto ret_res;
1595 }
1596
1597 sk = skb->sk;
1598
1599 if (sk == NULL) {
1600 /*
1601 * A missing sk->sk_socket happens when packets are in-flight
1602 * and the matching socket is already closed and gone.
1603 */
1604 sk = qtaguid_find_sk(skb, par);
1605 /*
1606 * If we got the socket from the find_sk(), we will need to put
1607 * it back, as nf_tproxy_get_sock_v4() got it.
1608 */
1609 got_sock = sk;
1610 if (sk)
1611 atomic64_inc(&qtu_events.match_found_sk_in_ct);
1612 else
1613 atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1614 } else {
1615 atomic64_inc(&qtu_events.match_found_sk);
1616 }
1617 MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n",
1618 par->hooknum, sk, got_sock, ip_hdr(skb)->protocol);
1619 if (sk != NULL) {
1620 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1621 par->hooknum, sk, sk->sk_socket,
1622 sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1623 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1624 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1625 par->hooknum, filp ? filp->f_cred->fsuid : -1);
1626 }
1627
1628 if (sk == NULL || sk->sk_socket == NULL) {
1629 /*
1630 * Here, the qtaguid_find_sk() using connection tracking
1631 * couldn't find the owner, so for now we just count them
1632 * against the system.
1633 */
1634 /*
1635 * TODO: unhack how to force just accounting.
1636 * For now we only do iface stats when the uid-owner is not
1637 * requested.
1638 */
1639 if (!(info->match & XT_QTAGUID_UID))
1640 account_for_uid(skb, sk, 0, par);
1641 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1642 par->hooknum,
1643 sk ? sk->sk_socket : NULL);
1644 res = (info->match ^ info->invert) == 0;
1645 atomic64_inc(&qtu_events.match_no_sk);
1646 goto put_sock_ret_res;
1647 } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1648 res = false;
1649 goto put_sock_ret_res;
1650 }
1651 filp = sk->sk_socket->file;
1652 if (filp == NULL) {
1653 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1654 account_for_uid(skb, sk, 0, par);
1655 res = ((info->match ^ info->invert) &
1656 (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1657 atomic64_inc(&qtu_events.match_no_sk_file);
1658 goto put_sock_ret_res;
1659 }
1660 sock_uid = filp->f_cred->fsuid;
1661 /*
1662 * TODO: unhack how to force just accounting.
1663 * For now we only do iface stats when the uid-owner is not requested
1664 */
1665 if (!(info->match & XT_QTAGUID_UID))
1666 account_for_uid(skb, sk, sock_uid, par);
1667
1668 /*
1669 * The following two tests fail the match when:
1670 * id not in range AND no inverted condition requested
1671 * or id in range AND inverted condition requested
1672 * Thus (!a && b) || (a && !b) == a ^ b
1673 */
1674 if (info->match & XT_QTAGUID_UID)
1675 if ((filp->f_cred->fsuid >= info->uid_min &&
1676 filp->f_cred->fsuid <= info->uid_max) ^
1677 !(info->invert & XT_QTAGUID_UID)) {
1678 MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1679 par->hooknum);
1680 res = false;
1681 goto put_sock_ret_res;
1682 }
1683 if (info->match & XT_QTAGUID_GID)
1684 if ((filp->f_cred->fsgid >= info->gid_min &&
1685 filp->f_cred->fsgid <= info->gid_max) ^
1686 !(info->invert & XT_QTAGUID_GID)) {
1687 MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1688 par->hooknum);
1689 res = false;
1690 goto put_sock_ret_res;
1691 }
1692
1693 MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1694 res = true;
1695
1696put_sock_ret_res:
1697 if (got_sock)
1698 xt_socket_put_sk(sk);
1699ret_res:
1700 MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1701 return res;
1702}
1703
1704#ifdef DDEBUG
1705/* This function is not in xt_qtaguid_print.c because of locks visibility */
1706static void prdebug_full_state(int indent_level, const char *fmt, ...)
1707{
1708 va_list args;
1709 char *fmt_buff;
1710 char *buff;
1711
1712 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1713 return;
1714
1715 fmt_buff = kasprintf(GFP_ATOMIC,
1716 "qtaguid: %s(): %s {\n", __func__, fmt);
1717 BUG_ON(!fmt_buff);
1718 va_start(args, fmt);
1719 buff = kvasprintf(GFP_ATOMIC,
1720 fmt_buff, args);
1721 BUG_ON(!buff);
1722 pr_debug("%s", buff);
1723 kfree(fmt_buff);
1724 kfree(buff);
1725 va_end(args);
1726
1727 spin_lock_bh(&sock_tag_list_lock);
1728 prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1729 spin_unlock_bh(&sock_tag_list_lock);
1730
1731 spin_lock_bh(&sock_tag_list_lock);
1732 spin_lock_bh(&uid_tag_data_tree_lock);
1733 prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1734 prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1735 spin_unlock_bh(&uid_tag_data_tree_lock);
1736 spin_unlock_bh(&sock_tag_list_lock);
1737
1738 spin_lock_bh(&iface_stat_list_lock);
1739 prdebug_iface_stat_list(indent_level, &iface_stat_list);
1740 spin_unlock_bh(&iface_stat_list_lock);
1741
1742 pr_debug("qtaguid: %s(): }\n", __func__);
1743}
1744#else
1745static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1746#endif
1747
1748/*
1749 * Procfs reader to get all active socket tags using style "1)" as described in
1750 * fs/proc/generic.c
1751 */
1752static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1753 off_t items_to_skip, int char_count, int *eof,
1754 void *data)
1755{
1756 char *outp = page;
1757 int len;
1758 uid_t uid;
1759 struct rb_node *node;
1760 struct sock_tag *sock_tag_entry;
1761 int item_index = 0;
1762 int indent_level = 0;
1763 long f_count;
1764
1765 if (unlikely(module_passive)) {
1766 *eof = 1;
1767 return 0;
1768 }
1769
1770 if (*eof)
1771 return 0;
1772
1773 CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
1774 page, items_to_skip, char_count, *eof);
1775
1776 spin_lock_bh(&sock_tag_list_lock);
1777 for (node = rb_first(&sock_tag_tree);
1778 node;
1779 node = rb_next(node)) {
1780 if (item_index++ < items_to_skip)
1781 continue;
1782 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1783 uid = get_uid_from_tag(sock_tag_entry->tag);
1784 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1785 "pid=%u\n",
1786 sock_tag_entry->sk,
1787 sock_tag_entry->tag,
1788 uid,
1789 sock_tag_entry->pid
1790 );
1791 f_count = atomic_long_read(
1792 &sock_tag_entry->socket->file->f_count);
1793 len = snprintf(outp, char_count,
1794 "sock=%p tag=0x%llx (uid=%u) pid=%u "
1795 "f_count=%lu\n",
1796 sock_tag_entry->sk,
1797 sock_tag_entry->tag, uid,
1798 sock_tag_entry->pid, f_count);
1799 if (len >= char_count) {
1800 spin_unlock_bh(&sock_tag_list_lock);
1801 *outp = '\0';
1802 return outp - page;
1803 }
1804 outp += len;
1805 char_count -= len;
1806 (*num_items_returned)++;
1807 }
1808 spin_unlock_bh(&sock_tag_list_lock);
1809
1810 if (item_index++ >= items_to_skip) {
1811 len = snprintf(outp, char_count,
1812 "events: sockets_tagged=%llu "
1813 "sockets_untagged=%llu "
1814 "counter_set_changes=%llu "
1815 "delete_cmds=%llu "
1816 "iface_events=%llu "
1817 "match_calls=%llu "
1818 "match_found_sk=%llu "
1819 "match_found_sk_in_ct=%llu "
1820 "match_found_no_sk_in_ct=%llu "
1821 "match_no_sk=%llu "
1822 "match_no_sk_file=%llu\n",
1823 atomic64_read(&qtu_events.sockets_tagged),
1824 atomic64_read(&qtu_events.sockets_untagged),
1825 atomic64_read(&qtu_events.counter_set_changes),
1826 atomic64_read(&qtu_events.delete_cmds),
1827 atomic64_read(&qtu_events.iface_events),
1828 atomic64_read(&qtu_events.match_calls),
1829 atomic64_read(&qtu_events.match_found_sk),
1830 atomic64_read(&qtu_events.match_found_sk_in_ct),
1831 atomic64_read(
1832 &qtu_events.match_found_no_sk_in_ct),
1833 atomic64_read(&qtu_events.match_no_sk),
1834 atomic64_read(&qtu_events.match_no_sk_file));
1835 if (len >= char_count) {
1836 *outp = '\0';
1837 return outp - page;
1838 }
1839 outp += len;
1840 char_count -= len;
1841 (*num_items_returned)++;
1842 }
1843
1844 /* Count the following as part of the last item_index */
1845 if (item_index > items_to_skip) {
1846 prdebug_full_state(indent_level, "proc ctrl");
1847 }
1848
1849 *eof = 1;
1850 return outp - page;
1851}
1852
1853/*
1854 * Delete socket tags, and stat tags associated with a given
1855 * accouting tag and uid.
1856 */
1857static int ctrl_cmd_delete(const char *input)
1858{
1859 char cmd;
1860 uid_t uid;
1861 uid_t entry_uid;
1862 tag_t acct_tag;
1863 tag_t tag;
1864 int res, argc;
1865 struct iface_stat *iface_entry;
1866 struct rb_node *node;
1867 struct sock_tag *st_entry;
1868 struct rb_root st_to_free_tree = RB_ROOT;
1869 struct tag_stat *ts_entry;
1870 struct tag_counter_set *tcs_entry;
1871 struct tag_ref *tr_entry;
1872 struct uid_tag_data *utd_entry;
1873
1874 argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1875 CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1876 "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1877 acct_tag, uid);
1878 if (argc < 2) {
1879 res = -EINVAL;
1880 goto err;
1881 }
1882 if (!valid_atag(acct_tag)) {
1883 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
1884 res = -EINVAL;
1885 goto err;
1886 }
1887 if (argc < 3) {
1888 uid = current_fsuid();
1889 } else if (!can_impersonate_uid(uid)) {
1890 pr_info("qtaguid: ctrl_delete(%s): "
1891 "insufficient priv from pid=%u tgid=%u uid=%u\n",
1892 input, current->pid, current->tgid, current_fsuid());
1893 res = -EPERM;
1894 goto err;
1895 }
1896
1897 tag = combine_atag_with_uid(acct_tag, uid);
1898 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1899 "looking for tag=0x%llx (uid=%u)\n",
1900 input, tag, uid);
1901
1902 /* Delete socket tags */
1903 spin_lock_bh(&sock_tag_list_lock);
1904 node = rb_first(&sock_tag_tree);
1905 while (node) {
1906 st_entry = rb_entry(node, struct sock_tag, sock_node);
1907 entry_uid = get_uid_from_tag(st_entry->tag);
1908 node = rb_next(node);
1909 if (entry_uid != uid)
1910 continue;
1911
1912 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
1913 input, st_entry->tag, entry_uid);
1914
1915 if (!acct_tag || st_entry->tag == tag) {
1916 rb_erase(&st_entry->sock_node, &sock_tag_tree);
1917 /* Can't sockfd_put() within spinlock, do it later. */
1918 sock_tag_tree_insert(st_entry, &st_to_free_tree);
1919 tr_entry = lookup_tag_ref(st_entry->tag, NULL);
1920 BUG_ON(tr_entry->num_sock_tags <= 0);
1921 tr_entry->num_sock_tags--;
1922 /*
1923 * TODO: remove if, and start failing.
1924 * This is a hack to work around the fact that in some
1925 * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
1926 * and are trying to work around apps
1927 * that didn't open the /dev/xt_qtaguid.
1928 */
1929 if (st_entry->list.next && st_entry->list.prev)
1930 list_del(&st_entry->list);
1931 }
1932 }
1933 spin_unlock_bh(&sock_tag_list_lock);
1934
1935 sock_tag_tree_erase(&st_to_free_tree);
1936
1937 /* Delete tag counter-sets */
1938 spin_lock_bh(&tag_counter_set_list_lock);
1939 /* Counter sets are only on the uid tag, not full tag */
1940 tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1941 if (tcs_entry) {
1942 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1943 "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
1944 input,
1945 tcs_entry->tn.tag,
1946 get_uid_from_tag(tcs_entry->tn.tag),
1947 tcs_entry->active_set);
1948 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
1949 kfree(tcs_entry);
1950 }
1951 spin_unlock_bh(&tag_counter_set_list_lock);
1952
1953 /*
1954 * If acct_tag is 0, then all entries belonging to uid are
1955 * erased.
1956 */
1957 spin_lock_bh(&iface_stat_list_lock);
1958 list_for_each_entry(iface_entry, &iface_stat_list, list) {
1959 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1960 node = rb_first(&iface_entry->tag_stat_tree);
1961 while (node) {
1962 ts_entry = rb_entry(node, struct tag_stat, tn.node);
1963 entry_uid = get_uid_from_tag(ts_entry->tn.tag);
1964 node = rb_next(node);
1965
1966 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1967 "ts tag=0x%llx (uid=%u)\n",
1968 input, ts_entry->tn.tag, entry_uid);
1969
1970 if (entry_uid != uid)
1971 continue;
1972 if (!acct_tag || ts_entry->tn.tag == tag) {
1973 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1974 "erase ts: %s 0x%llx %u\n",
1975 input, iface_entry->ifname,
1976 get_atag_from_tag(ts_entry->tn.tag),
1977 entry_uid);
1978 rb_erase(&ts_entry->tn.node,
1979 &iface_entry->tag_stat_tree);
1980 kfree(ts_entry);
1981 }
1982 }
1983 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1984 }
1985 spin_unlock_bh(&iface_stat_list_lock);
1986
1987 /* Cleanup the uid_tag_data */
1988 spin_lock_bh(&uid_tag_data_tree_lock);
1989 node = rb_first(&uid_tag_data_tree);
1990 while (node) {
1991 utd_entry = rb_entry(node, struct uid_tag_data, node);
1992 entry_uid = utd_entry->uid;
1993 node = rb_next(node);
1994
1995 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1996 "utd uid=%u\n",
1997 input, entry_uid);
1998
1999 if (entry_uid != uid)
2000 continue;
2001 /*
2002 * Go over the tag_refs, and those that don't have
2003 * sock_tags using them are freed.
2004 */
2005 put_tag_ref_tree(tag, utd_entry);
2006 put_utd_entry(utd_entry);
2007 }
2008 spin_unlock_bh(&uid_tag_data_tree_lock);
2009
2010 atomic64_inc(&qtu_events.delete_cmds);
2011 res = 0;
2012
2013err:
2014 return res;
2015}
2016
2017static int ctrl_cmd_counter_set(const char *input)
2018{
2019 char cmd;
2020 uid_t uid = 0;
2021 tag_t tag;
2022 int res, argc;
2023 struct tag_counter_set *tcs;
2024 int counter_set;
2025
2026 argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2027 CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2028 "set=%d uid=%u\n", input, argc, cmd,
2029 counter_set, uid);
2030 if (argc != 3) {
2031 res = -EINVAL;
2032 goto err;
2033 }
2034 if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2035 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2036 input);
2037 res = -EINVAL;
2038 goto err;
2039 }
2040 if (!can_manipulate_uids()) {
2041 pr_info("qtaguid: ctrl_counterset(%s): "
2042 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2043 input, current->pid, current->tgid, current_fsuid());
2044 res = -EPERM;
2045 goto err;
2046 }
2047
2048 tag = make_tag_from_uid(uid);
2049 spin_lock_bh(&tag_counter_set_list_lock);
2050 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2051 if (!tcs) {
2052 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2053 if (!tcs) {
2054 spin_unlock_bh(&tag_counter_set_list_lock);
2055 pr_err("qtaguid: ctrl_counterset(%s): "
2056 "failed to alloc counter set\n",
2057 input);
2058 res = -ENOMEM;
2059 goto err;
2060 }
2061 tcs->tn.tag = tag;
2062 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2063 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2064 "(uid=%u) set=%d\n",
2065 input, tag, get_uid_from_tag(tag), counter_set);
2066 }
2067 tcs->active_set = counter_set;
2068 spin_unlock_bh(&tag_counter_set_list_lock);
2069 atomic64_inc(&qtu_events.counter_set_changes);
2070 res = 0;
2071
2072err:
2073 return res;
2074}
2075
2076static int ctrl_cmd_tag(const char *input)
2077{
2078 char cmd;
2079 int sock_fd = 0;
2080 uid_t uid = 0;
2081 tag_t acct_tag = make_atag_from_value(0);
2082 tag_t full_tag;
2083 struct socket *el_socket;
2084 int res, argc;
2085 struct sock_tag *sock_tag_entry;
2086 struct tag_ref *tag_ref_entry;
2087 struct uid_tag_data *uid_tag_data_entry;
2088 struct proc_qtu_data *pqd_entry;
2089
2090 /* Unassigned args will get defaulted later. */
2091 argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2092 CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2093 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2094 acct_tag, uid);
2095 if (argc < 2) {
2096 res = -EINVAL;
2097 goto err;
2098 }
2099 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2100 if (!el_socket) {
2101 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2102 " sock_fd=%d err=%d\n", input, sock_fd, res);
2103 goto err;
2104 }
2105 CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2106 input, atomic_long_read(&el_socket->file->f_count),
2107 el_socket->sk);
2108 if (argc < 3) {
2109 acct_tag = make_atag_from_value(0);
2110 } else if (!valid_atag(acct_tag)) {
2111 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2112 res = -EINVAL;
2113 goto err_put;
2114 }
2115 CT_DEBUG("qtaguid: ctrl_tag(%s): "
2116 "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2117 "in_group=%d in_egroup=%d\n",
2118 input, current->pid, current->tgid, current_uid(),
2119 current_euid(), current_fsuid(),
2120 in_group_p(proc_ctrl_write_gid),
2121 in_egroup_p(proc_ctrl_write_gid));
2122 if (argc < 4) {
2123 uid = current_fsuid();
2124 } else if (!can_impersonate_uid(uid)) {
2125 pr_info("qtaguid: ctrl_tag(%s): "
2126 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2127 input, current->pid, current->tgid, current_fsuid());
2128 res = -EPERM;
2129 goto err_put;
2130 }
2131 full_tag = combine_atag_with_uid(acct_tag, uid);
2132
2133 spin_lock_bh(&sock_tag_list_lock);
2134 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2135 tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2136 if (IS_ERR(tag_ref_entry)) {
2137 res = PTR_ERR(tag_ref_entry);
2138 spin_unlock_bh(&sock_tag_list_lock);
2139 goto err_put;
2140 }
2141 tag_ref_entry->num_sock_tags++;
2142 if (sock_tag_entry) {
2143 struct tag_ref *prev_tag_ref_entry;
2144
2145 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2146 "st@%p ...->f_count=%ld\n",
2147 input, el_socket->sk, sock_tag_entry,
2148 atomic_long_read(&el_socket->file->f_count));
2149 /*
2150 * This is a re-tagging, so release the sock_fd that was
2151 * locked at the time of the 1st tagging.
2152 * There is still the ref from this call's sockfd_lookup() so
2153 * it can be done within the spinlock.
2154 */
2155 sockfd_put(sock_tag_entry->socket);
2156 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2157 &uid_tag_data_entry);
2158 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2159 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2160 prev_tag_ref_entry->num_sock_tags--;
2161 sock_tag_entry->tag = full_tag;
2162 } else {
2163 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2164 input, el_socket->sk);
2165 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2166 GFP_ATOMIC);
2167 if (!sock_tag_entry) {
2168 pr_err("qtaguid: ctrl_tag(%s): "
2169 "socket tag alloc failed\n",
2170 input);
2171 spin_unlock_bh(&sock_tag_list_lock);
2172 res = -ENOMEM;
2173 goto err_tag_unref_put;
2174 }
2175 sock_tag_entry->sk = el_socket->sk;
2176 sock_tag_entry->socket = el_socket;
2177 sock_tag_entry->pid = current->tgid;
2178 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2179 uid);
2180 spin_lock_bh(&uid_tag_data_tree_lock);
2181 pqd_entry = proc_qtu_data_tree_search(
2182 &proc_qtu_data_tree, current->tgid);
2183 /*
2184 * TODO: remove if, and start failing.
2185 * At first, we want to catch user-space code that is not
2186 * opening the /dev/xt_qtaguid.
2187 */
2188 if (IS_ERR_OR_NULL(pqd_entry))
2189 pr_warn_once(
2190 "qtaguid: %s(): "
2191 "User space forgot to open /dev/xt_qtaguid? "
2192 "pid=%u tgid=%u uid=%u\n", __func__,
2193 current->pid, current->tgid,
2194 current_fsuid());
2195 else
2196 list_add(&sock_tag_entry->list,
2197 &pqd_entry->sock_tag_list);
2198 spin_unlock_bh(&uid_tag_data_tree_lock);
2199
2200 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2201 atomic64_inc(&qtu_events.sockets_tagged);
2202 }
2203 spin_unlock_bh(&sock_tag_list_lock);
2204 /* We keep the ref to the socket (file) until it is untagged */
2205 CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2206 input, sock_tag_entry,
2207 atomic_long_read(&el_socket->file->f_count));
2208 return 0;
2209
2210err_tag_unref_put:
2211 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2212 tag_ref_entry->num_sock_tags--;
2213 free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2214err_put:
2215 CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2216 input, atomic_long_read(&el_socket->file->f_count) - 1);
2217 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2218 sockfd_put(el_socket);
2219 return res;
2220
2221err:
2222 CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2223 return res;
2224}
2225
2226static int ctrl_cmd_untag(const char *input)
2227{
2228 char cmd;
2229 int sock_fd = 0;
2230 struct socket *el_socket;
2231 int res, argc;
2232 struct sock_tag *sock_tag_entry;
2233 struct tag_ref *tag_ref_entry;
2234 struct uid_tag_data *utd_entry;
2235 struct proc_qtu_data *pqd_entry;
2236
2237 argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2238 CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2239 input, argc, cmd, sock_fd);
2240 if (argc < 2) {
2241 res = -EINVAL;
2242 goto err;
2243 }
2244 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2245 if (!el_socket) {
2246 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2247 " sock_fd=%d err=%d\n", input, sock_fd, res);
2248 goto err;
2249 }
2250 CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2251 input, atomic_long_read(&el_socket->file->f_count),
2252 el_socket->sk);
2253 spin_lock_bh(&sock_tag_list_lock);
2254 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2255 if (!sock_tag_entry) {
2256 spin_unlock_bh(&sock_tag_list_lock);
2257 res = -EINVAL;
2258 goto err_put;
2259 }
2260 /*
2261 * The socket already belongs to the current process
2262 * so it can do whatever it wants to it.
2263 */
2264 rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2265
2266 tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2267 BUG_ON(!tag_ref_entry);
2268 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2269 spin_lock_bh(&uid_tag_data_tree_lock);
2270 pqd_entry = proc_qtu_data_tree_search(
2271 &proc_qtu_data_tree, current->tgid);
2272 /*
2273 * TODO: remove if, and start failing.
2274 * At first, we want to catch user-space code that is not
2275 * opening the /dev/xt_qtaguid.
2276 */
2277 if (IS_ERR_OR_NULL(pqd_entry))
2278 pr_warn_once("qtaguid: %s(): "
2279 "User space forgot to open /dev/xt_qtaguid? "
2280 "pid=%u tgid=%u uid=%u\n", __func__,
2281 current->pid, current->tgid, current_fsuid());
2282 else
2283 list_del(&sock_tag_entry->list);
2284 spin_unlock_bh(&uid_tag_data_tree_lock);
2285 /*
2286 * We don't free tag_ref from the utd_entry here,
2287 * only during a cmd_delete().
2288 */
2289 tag_ref_entry->num_sock_tags--;
2290 spin_unlock_bh(&sock_tag_list_lock);
2291 /*
2292 * Release the sock_fd that was grabbed at tag time,
2293 * and once more for the sockfd_lookup() here.
2294 */
2295 sockfd_put(sock_tag_entry->socket);
2296 CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2297 input, sock_tag_entry,
2298 atomic_long_read(&el_socket->file->f_count) - 1);
2299 sockfd_put(el_socket);
2300
2301 kfree(sock_tag_entry);
2302 atomic64_inc(&qtu_events.sockets_untagged);
2303
2304 return 0;
2305
2306err_put:
2307 CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2308 input, atomic_long_read(&el_socket->file->f_count) - 1);
2309 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2310 sockfd_put(el_socket);
2311 return res;
2312
2313err:
2314 CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2315 return res;
2316}
2317
2318static int qtaguid_ctrl_parse(const char *input, int count)
2319{
2320 char cmd;
2321 int res;
2322
2323 cmd = input[0];
2324 /* Collect params for commands */
2325 switch (cmd) {
2326 case 'd':
2327 res = ctrl_cmd_delete(input);
2328 break;
2329
2330 case 's':
2331 res = ctrl_cmd_counter_set(input);
2332 break;
2333
2334 case 't':
2335 res = ctrl_cmd_tag(input);
2336 break;
2337
2338 case 'u':
2339 res = ctrl_cmd_untag(input);
2340 break;
2341
2342 default:
2343 res = -EINVAL;
2344 goto err;
2345 }
2346 if (!res)
2347 res = count;
2348err:
2349 CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2350 return res;
2351}
2352
2353#define MAX_QTAGUID_CTRL_INPUT_LEN 255
2354static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2355 unsigned long count, void *data)
2356{
2357 char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2358
2359 if (unlikely(module_passive))
2360 return count;
2361
2362 if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2363 return -EINVAL;
2364
2365 if (copy_from_user(input_buf, buffer, count))
2366 return -EFAULT;
2367
2368 input_buf[count] = '\0';
2369 return qtaguid_ctrl_parse(input_buf, count);
2370}
2371
2372struct proc_print_info {
2373 char *outp;
2374 char **num_items_returned;
2375 struct iface_stat *iface_entry;
2376 struct tag_stat *ts_entry;
2377 int item_index;
2378 int items_to_skip;
2379 int char_count;
2380};
2381
2382static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
2383{
2384 int len;
2385 struct data_counters *cnts;
2386
2387 if (!ppi->item_index) {
2388 if (ppi->item_index++ < ppi->items_to_skip)
2389 return 0;
2390 len = snprintf(ppi->outp, ppi->char_count,
2391 "idx iface acct_tag_hex uid_tag_int cnt_set "
2392 "rx_bytes rx_packets "
2393 "tx_bytes tx_packets "
2394 "rx_tcp_bytes rx_tcp_packets "
2395 "rx_udp_bytes rx_udp_packets "
2396 "rx_other_bytes rx_other_packets "
2397 "tx_tcp_bytes tx_tcp_packets "
2398 "tx_udp_bytes tx_udp_packets "
2399 "tx_other_bytes tx_other_packets\n");
2400 } else {
2401 tag_t tag = ppi->ts_entry->tn.tag;
2402 uid_t stat_uid = get_uid_from_tag(tag);
2403
2404 if (!can_read_other_uid_stats(stat_uid)) {
2405 CT_DEBUG("qtaguid: stats line: "
2406 "%s 0x%llx %u: insufficient priv "
2407 "from pid=%u tgid=%u uid=%u\n",
2408 ppi->iface_entry->ifname,
2409 get_atag_from_tag(tag), stat_uid,
2410 current->pid, current->tgid, current_fsuid());
2411 return 0;
2412 }
2413 if (ppi->item_index++ < ppi->items_to_skip)
2414 return 0;
2415 cnts = &ppi->ts_entry->counters;
2416 len = snprintf(
2417 ppi->outp, ppi->char_count,
2418 "%d %s 0x%llx %u %u "
2419 "%llu %llu "
2420 "%llu %llu "
2421 "%llu %llu "
2422 "%llu %llu "
2423 "%llu %llu "
2424 "%llu %llu "
2425 "%llu %llu "
2426 "%llu %llu\n",
2427 ppi->item_index,
2428 ppi->iface_entry->ifname,
2429 get_atag_from_tag(tag),
2430 stat_uid,
2431 cnt_set,
2432 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2433 dc_sum_packets(cnts, cnt_set, IFS_RX),
2434 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2435 dc_sum_packets(cnts, cnt_set, IFS_TX),
2436 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2437 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2438 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2439 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2440 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2441 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2442 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2443 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2444 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2445 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2446 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2447 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2448 }
2449 return len;
2450}
2451
2452static bool pp_sets(struct proc_print_info *ppi)
2453{
2454 int len;
2455 int counter_set;
2456 for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2457 counter_set++) {
2458 len = pp_stats_line(ppi, counter_set);
2459 if (len >= ppi->char_count) {
2460 *ppi->outp = '\0';
2461 return false;
2462 }
2463 if (len) {
2464 ppi->outp += len;
2465 ppi->char_count -= len;
2466 (*ppi->num_items_returned)++;
2467 }
2468 }
2469 return true;
2470}
2471
2472/*
2473 * Procfs reader to get all tag stats using style "1)" as described in
2474 * fs/proc/generic.c
2475 * Groups all protocols tx/rx bytes.
2476 */
2477static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
2478 off_t items_to_skip, int char_count, int *eof,
2479 void *data)
2480{
2481 struct proc_print_info ppi;
2482 int len;
2483
2484 ppi.outp = page;
2485 ppi.item_index = 0;
2486 ppi.char_count = char_count;
2487 ppi.num_items_returned = num_items_returned;
2488 ppi.items_to_skip = items_to_skip;
2489
2490 if (unlikely(module_passive)) {
2491 len = pp_stats_line(&ppi, 0);
2492 /* The header should always be shorter than the buffer. */
2493 BUG_ON(len >= ppi.char_count);
2494 (*num_items_returned)++;
2495 *eof = 1;
2496 return len;
2497 }
2498
2499 CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
2500 "char_count=%d *eof=%d\n", page, *num_items_returned,
2501 items_to_skip, char_count, *eof);
2502
2503 if (*eof)
2504 return 0;
2505
2506 /* The idx is there to help debug when things go belly up. */
2507 len = pp_stats_line(&ppi, 0);
2508 /* Don't advance the outp unless the whole line was printed */
2509 if (len >= ppi.char_count) {
2510 *ppi.outp = '\0';
2511 return ppi.outp - page;
2512 }
2513 if (len) {
2514 ppi.outp += len;
2515 ppi.char_count -= len;
2516 (*num_items_returned)++;
2517 }
2518
2519 spin_lock_bh(&iface_stat_list_lock);
2520 list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
2521 struct rb_node *node;
2522 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
2523 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
2524 node;
2525 node = rb_next(node)) {
2526 ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
2527 if (!pp_sets(&ppi)) {
2528 spin_unlock_bh(
2529 &ppi.iface_entry->tag_stat_list_lock);
2530 spin_unlock_bh(&iface_stat_list_lock);
2531 return ppi.outp - page;
2532 }
2533 }
2534 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
2535 }
2536 spin_unlock_bh(&iface_stat_list_lock);
2537
2538 *eof = 1;
2539 return ppi.outp - page;
2540}
2541
2542/*------------------------------------------*/
2543static int qtudev_open(struct inode *inode, struct file *file)
2544{
2545 struct uid_tag_data *utd_entry;
2546 struct proc_qtu_data *pqd_entry;
2547 struct proc_qtu_data *new_pqd_entry;
2548 int res;
2549 bool utd_entry_found;
2550
2551 if (unlikely(qtu_proc_handling_passive))
2552 return 0;
2553
2554 DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2555 current->pid, current->tgid, current_fsuid());
2556
2557 spin_lock_bh(&uid_tag_data_tree_lock);
2558
2559 /* Look for existing uid data, or alloc one. */
2560 utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2561 if (IS_ERR_OR_NULL(utd_entry)) {
2562 res = PTR_ERR(utd_entry);
2563 goto err;
2564 }
2565
2566 /* Look for existing PID based proc_data */
2567 pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2568 current->tgid);
2569 if (pqd_entry) {
2570 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2571 "%s already opened\n",
2572 current->pid, current->tgid, current_fsuid(),
2573 QTU_DEV_NAME);
2574 res = -EBUSY;
2575 goto err_unlock_free_utd;
2576 }
2577
2578 new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2579 if (!new_pqd_entry) {
2580 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2581 "proc data alloc failed\n",
2582 current->pid, current->tgid, current_fsuid());
2583 res = -ENOMEM;
2584 goto err_unlock_free_utd;
2585 }
2586 new_pqd_entry->pid = current->tgid;
2587 INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2588 new_pqd_entry->parent_tag_data = utd_entry;
2589 utd_entry->num_pqd++;
2590
2591 proc_qtu_data_tree_insert(new_pqd_entry,
2592 &proc_qtu_data_tree);
2593
2594 spin_unlock_bh(&uid_tag_data_tree_lock);
2595 DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2596 current_fsuid(), new_pqd_entry);
2597 file->private_data = new_pqd_entry;
2598 return 0;
2599
2600err_unlock_free_utd:
2601 if (!utd_entry_found) {
2602 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2603 kfree(utd_entry);
2604 }
2605 spin_unlock_bh(&uid_tag_data_tree_lock);
2606err:
2607 return res;
2608}
2609
2610static int qtudev_release(struct inode *inode, struct file *file)
2611{
2612 struct proc_qtu_data *pqd_entry = file->private_data;
2613 struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data;
2614 struct sock_tag *st_entry;
2615 struct rb_root st_to_free_tree = RB_ROOT;
2616 struct list_head *entry, *next;
2617 struct tag_ref *tr;
2618
2619 if (unlikely(qtu_proc_handling_passive))
2620 return 0;
2621
2622 /*
2623 * Do not trust the current->pid, it might just be a kworker cleaning
2624 * up after a dead proc.
2625 */
2626 DR_DEBUG("qtaguid: qtudev_release(): "
2627 "pid=%u tgid=%u uid=%u "
2628 "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2629 current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2630 pqd_entry, pqd_entry->pid, utd_entry,
2631 utd_entry->num_active_tags);
2632
2633 spin_lock_bh(&sock_tag_list_lock);
2634 spin_lock_bh(&uid_tag_data_tree_lock);
2635
2636 list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2637 st_entry = list_entry(entry, struct sock_tag, list);
2638 DR_DEBUG("qtaguid: %s(): "
2639 "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2640 __func__,
2641 st_entry, st_entry->sk,
2642 current->pid, current->tgid,
2643 pqd_entry->parent_tag_data->uid);
2644
2645 utd_entry = uid_tag_data_tree_search(
2646 &uid_tag_data_tree,
2647 get_uid_from_tag(st_entry->tag));
2648 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2649 DR_DEBUG("qtaguid: %s(): "
2650 "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2651 st_entry->tag, utd_entry);
2652 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2653 st_entry->tag);
2654 BUG_ON(!tr);
2655 BUG_ON(tr->num_sock_tags <= 0);
2656 tr->num_sock_tags--;
2657 free_tag_ref_from_utd_entry(tr, utd_entry);
2658
2659 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2660 list_del(&st_entry->list);
2661 /* Can't sockfd_put() within spinlock, do it later. */
2662 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2663
2664 /*
2665 * Try to free the utd_entry if no other proc_qtu_data is
2666 * using it (num_pqd is 0) and it doesn't have active tags
2667 * (num_active_tags is 0).
2668 */
2669 put_utd_entry(utd_entry);
2670 }
2671
2672 rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2673 BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2674 pqd_entry->parent_tag_data->num_pqd--;
2675 put_utd_entry(pqd_entry->parent_tag_data);
2676 kfree(pqd_entry);
2677 file->private_data = NULL;
2678
2679 spin_unlock_bh(&uid_tag_data_tree_lock);
2680 spin_unlock_bh(&sock_tag_list_lock);
2681
2682
2683 sock_tag_tree_erase(&st_to_free_tree);
2684
2685 prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2686 current->pid, current->tgid);
2687 return 0;
2688}
2689
2690/*------------------------------------------*/
2691static const struct file_operations qtudev_fops = {
2692 .owner = THIS_MODULE,
2693 .open = qtudev_open,
2694 .release = qtudev_release,
2695};
2696
2697static struct miscdevice qtu_device = {
2698 .minor = MISC_DYNAMIC_MINOR,
2699 .name = QTU_DEV_NAME,
2700 .fops = &qtudev_fops,
2701 /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2702};
2703
2704/*------------------------------------------*/
2705static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2706{
2707 int ret;
2708 *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2709 if (!*res_procdir) {
2710 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2711 ret = -ENOMEM;
2712 goto no_dir;
2713 }
2714
2715 xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
2716 *res_procdir);
2717 if (!xt_qtaguid_ctrl_file) {
2718 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2719 " file\n");
2720 ret = -ENOMEM;
2721 goto no_ctrl_entry;
2722 }
2723 xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
2724 xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
2725
2726 xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
2727 *res_procdir);
2728 if (!xt_qtaguid_stats_file) {
2729 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2730 "file\n");
2731 ret = -ENOMEM;
2732 goto no_stats_entry;
2733 }
2734 xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
2735 /*
2736 * TODO: add support counter hacking
2737 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2738 */
2739 return 0;
2740
2741no_stats_entry:
2742 remove_proc_entry("ctrl", *res_procdir);
2743no_ctrl_entry:
2744 remove_proc_entry("xt_qtaguid", NULL);
2745no_dir:
2746 return ret;
2747}
2748
2749static struct xt_match qtaguid_mt_reg __read_mostly = {
2750 /*
2751 * This module masquerades as the "owner" module so that iptables
2752 * tools can deal with it.
2753 */
2754 .name = "owner",
2755 .revision = 1,
2756 .family = NFPROTO_UNSPEC,
2757 .match = qtaguid_mt,
2758 .matchsize = sizeof(struct xt_qtaguid_match_info),
2759 .me = THIS_MODULE,
2760};
2761
2762static int __init qtaguid_mt_init(void)
2763{
2764 if (qtaguid_proc_register(&xt_qtaguid_procdir)
2765 || iface_stat_init(xt_qtaguid_procdir)
2766 || xt_register_match(&qtaguid_mt_reg)
2767 || misc_register(&qtu_device))
2768 return -1;
2769 return 0;
2770}
2771
2772/*
2773 * TODO: allow unloading of the module.
2774 * For now stats are permanent.
2775 * Kconfig forces'y/n' and never an 'm'.
2776 */
2777
2778module_init(qtaguid_mt_init);
2779MODULE_AUTHOR("jpa <jpa@google.com>");
2780MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2781MODULE_LICENSE("GPL");
2782MODULE_ALIAS("ipt_owner");
2783MODULE_ALIAS("ip6t_owner");
2784MODULE_ALIAS("ipt_qtaguid");
2785MODULE_ALIAS("ip6t_qtaguid");
diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h
new file mode 100644
index 00000000000..02479d6d317
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_internal.h
@@ -0,0 +1,330 @@
1/*
2 * Kernel iptables module to track stats for packets based on user tags.
3 *
4 * (C) 2011 Google, Inc
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __XT_QTAGUID_INTERNAL_H__
11#define __XT_QTAGUID_INTERNAL_H__
12
13#include <linux/types.h>
14#include <linux/rbtree.h>
15#include <linux/spinlock_types.h>
16#include <linux/workqueue.h>
17
18/* Iface handling */
19#define IDEBUG_MASK (1<<0)
20/* Iptable Matching. Per packet. */
21#define MDEBUG_MASK (1<<1)
22/* Red-black tree handling. Per packet. */
23#define RDEBUG_MASK (1<<2)
24/* procfs ctrl/stats handling */
25#define CDEBUG_MASK (1<<3)
26/* dev and resource tracking */
27#define DDEBUG_MASK (1<<4)
28
29/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
30#define DEFAULT_DEBUG_MASK 0
31
32/*
33 * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
34 * All undef: text size ~ 0x3030; all def: ~ 0x4404.
35 */
36#define IDEBUG
37#define MDEBUG
38#define RDEBUG
39#define CDEBUG
40#define DDEBUG
41
42#define MSK_DEBUG(mask, ...) do { \
43 if (unlikely(qtaguid_debug_mask & (mask))) \
44 pr_debug(__VA_ARGS__); \
45 } while (0)
46#ifdef IDEBUG
47#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
48#else
49#define IF_DEBUG(...) no_printk(__VA_ARGS__)
50#endif
51#ifdef MDEBUG
52#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
53#else
54#define MT_DEBUG(...) no_printk(__VA_ARGS__)
55#endif
56#ifdef RDEBUG
57#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
58#else
59#define RB_DEBUG(...) no_printk(__VA_ARGS__)
60#endif
61#ifdef CDEBUG
62#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
63#else
64#define CT_DEBUG(...) no_printk(__VA_ARGS__)
65#endif
66#ifdef DDEBUG
67#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
68#else
69#define DR_DEBUG(...) no_printk(__VA_ARGS__)
70#endif
71
72extern uint qtaguid_debug_mask;
73
74/*---------------------------------------------------------------------------*/
75/*
76 * Tags:
77 *
78 * They represent what the data usage counters will be tracked against.
79 * By default a tag is just based on the UID.
80 * The UID is used as the base for policing, and can not be ignored.
81 * So a tag will always at least represent a UID (uid_tag).
82 *
83 * A tag can be augmented with an "accounting tag" which is associated
84 * with a UID.
85 * User space can set the acct_tag portion of the tag which is then used
86 * with sockets: all data belonging to that socket will be counted against the
87 * tag. The policing is then based on the tag's uid_tag portion,
88 * and stats are collected for the acct_tag portion separately.
89 *
90 * There could be
91 * a: {acct_tag=1, uid_tag=10003}
92 * b: {acct_tag=2, uid_tag=10003}
93 * c: {acct_tag=3, uid_tag=10003}
94 * d: {acct_tag=0, uid_tag=10003}
95 * a, b, and c represent tags associated with specific sockets.
96 * d is for the totals for that uid, including all untagged traffic.
97 * Typically d is used with policing/quota rules.
98 *
99 * We want tag_t big enough to distinguish uid_t and acct_tag.
100 * It might become a struct if needed.
101 * Nothing should be using it as an int.
102 */
103typedef uint64_t tag_t; /* Only used via accessors */
104
105#define TAG_UID_MASK 0xFFFFFFFFULL
106#define TAG_ACCT_MASK (~0xFFFFFFFFULL)
107
108static inline int tag_compare(tag_t t1, tag_t t2)
109{
110 return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
111}
112
113static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
114{
115 return acct_tag | uid;
116}
117static inline tag_t make_tag_from_uid(uid_t uid)
118{
119 return uid;
120}
121static inline uid_t get_uid_from_tag(tag_t tag)
122{
123 return tag & TAG_UID_MASK;
124}
125static inline tag_t get_utag_from_tag(tag_t tag)
126{
127 return tag & TAG_UID_MASK;
128}
129static inline tag_t get_atag_from_tag(tag_t tag)
130{
131 return tag & TAG_ACCT_MASK;
132}
133
134static inline bool valid_atag(tag_t tag)
135{
136 return !(tag & TAG_UID_MASK);
137}
138static inline tag_t make_atag_from_value(uint32_t value)
139{
140 return (uint64_t)value << 32;
141}
142/*---------------------------------------------------------------------------*/
143
144/*
145 * Maximum number of socket tags that a UID is allowed to have active.
146 * Multiple processes belonging to the same UID contribute towards this limit.
147 * Special UIDs that can impersonate a UID also contribute (e.g. download
148 * manager, ...)
149 */
150#define DEFAULT_MAX_SOCK_TAGS 1024
151
152/*
153 * For now we only track 2 sets of counters.
154 * The default set is 0.
155 * Userspace can activate another set for a given uid being tracked.
156 */
157#define IFS_MAX_COUNTER_SETS 2
158
159enum ifs_tx_rx {
160 IFS_TX,
161 IFS_RX,
162 IFS_MAX_DIRECTIONS
163};
164
165/* For now, TCP, UDP, the rest */
166enum ifs_proto {
167 IFS_TCP,
168 IFS_UDP,
169 IFS_PROTO_OTHER,
170 IFS_MAX_PROTOS
171};
172
173struct byte_packet_counters {
174 uint64_t bytes;
175 uint64_t packets;
176};
177
178struct data_counters {
179 struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
180};
181
182/* Generic X based nodes used as a base for rb_tree ops */
183struct tag_node {
184 struct rb_node node;
185 tag_t tag;
186};
187
188struct tag_stat {
189 struct tag_node tn;
190 struct data_counters counters;
191 /*
192 * If this tag is acct_tag based, we need to count against the
193 * matching parent uid_tag.
194 */
195 struct data_counters *parent_counters;
196};
197
198struct iface_stat {
199 struct list_head list; /* in iface_stat_list */
200 char *ifname;
201 bool active;
202 /* net_dev is only valid for active iface_stat */
203 struct net_device *net_dev;
204
205 struct byte_packet_counters totals[IFS_MAX_DIRECTIONS];
206 /*
207 * We keep the last_known, because some devices reset their counters
208 * just before NETDEV_UP, while some will reset just before
209 * NETDEV_REGISTER (which is more normal).
210 * So now, if the device didn't do a NETDEV_UNREGISTER and we see
211 * its current dev stats smaller that what was previously known, we
212 * assume an UNREGISTER and just use the last_known.
213 */
214 struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
215 /* last_known is usable when last_known_valid is true */
216 bool last_known_valid;
217
218 struct proc_dir_entry *proc_ptr;
219
220 struct rb_root tag_stat_tree;
221 spinlock_t tag_stat_list_lock;
222};
223
224/* This is needed to create proc_dir_entries from atomic context. */
225struct iface_stat_work {
226 struct work_struct iface_work;
227 struct iface_stat *iface_entry;
228};
229
230/*
231 * Track tag that this socket is transferring data for, and not necessarily
232 * the uid that owns the socket.
233 * This is the tag against which tag_stat.counters will be billed.
234 * These structs need to be looked up by sock and pid.
235 */
236struct sock_tag {
237 struct rb_node sock_node;
238 struct sock *sk; /* Only used as a number, never dereferenced */
239 /* The socket is needed for sockfd_put() */
240 struct socket *socket;
241 /* Used to associate with a given pid */
242 struct list_head list; /* in proc_qtu_data.sock_tag_list */
243 pid_t pid;
244
245 tag_t tag;
246};
247
248struct qtaguid_event_counts {
249 /* Various successful events */
250 atomic64_t sockets_tagged;
251 atomic64_t sockets_untagged;
252 atomic64_t counter_set_changes;
253 atomic64_t delete_cmds;
254 atomic64_t iface_events; /* Number of NETDEV_* events handled */
255
256 atomic64_t match_calls; /* Number of times iptables called mt */
257 /*
258 * match_found_sk_*: numbers related to the netfilter matching
259 * function finding a sock for the sk_buff.
260 * Total skbs processed is sum(match_found*).
261 */
262 atomic64_t match_found_sk; /* An sk was already in the sk_buff. */
263 /* The connection tracker had or didn't have the sk. */
264 atomic64_t match_found_sk_in_ct;
265 atomic64_t match_found_no_sk_in_ct;
266 /*
267 * No sk could be found. No apparent owner. Could happen with
268 * unsolicited traffic.
269 */
270 atomic64_t match_no_sk;
271 /*
272 * The file ptr in the sk_socket wasn't there.
273 * This might happen for traffic while the socket is being closed.
274 */
275 atomic64_t match_no_sk_file;
276};
277
278/* Track the set active_set for the given tag. */
279struct tag_counter_set {
280 struct tag_node tn;
281 int active_set;
282};
283
284/*----------------------------------------------*/
285/*
286 * The qtu uid data is used to track resources that are created directly or
287 * indirectly by processes (uid tracked).
288 * It is shared by the processes with the same uid.
289 * Some of the resource will be counted to prevent further rogue allocations,
290 * some will need freeing once the owner process (uid) exits.
291 */
292struct uid_tag_data {
293 struct rb_node node;
294 uid_t uid;
295
296 /*
297 * For the uid, how many accounting tags have been set.
298 */
299 int num_active_tags;
300 /* Track the number of proc_qtu_data that reference it */
301 int num_pqd;
302 struct rb_root tag_ref_tree;
303 /* No tag_node_tree_lock; use uid_tag_data_tree_lock */
304};
305
306struct tag_ref {
307 struct tag_node tn;
308
309 /*
310 * This tracks the number of active sockets that have a tag on them
311 * which matches this tag_ref.tn.tag.
312 * A tag ref can live on after the sockets are untagged.
313 * A tag ref can only be removed during a tag delete command.
314 */
315 int num_sock_tags;
316};
317
318struct proc_qtu_data {
319 struct rb_node node;
320 pid_t pid;
321
322 struct uid_tag_data *parent_tag_data;
323
324 /* Tracks the sock_tags that need freeing upon this proc's death */
325 struct list_head sock_tag_list;
326 /* No spinlock_t sock_tag_list_lock; use the global one. */
327};
328
329/*----------------------------------------------*/
330#endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */
diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c
new file mode 100644
index 00000000000..39176785c91
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_print.c
@@ -0,0 +1,556 @@
1/*
2 * Pretty printing Support for iptables xt_qtaguid module.
3 *
4 * (C) 2011 Google, Inc
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/*
12 * Most of the functions in this file just waste time if DEBUG is not defined.
13 * The matching xt_qtaguid_print.h will static inline empty funcs if the needed
14 * debug flags ore not defined.
15 * Those funcs that fail to allocate memory will panic as there is no need to
16 * hobble allong just pretending to do the requested work.
17 */
18
19#define DEBUG
20
21#include <linux/fs.h>
22#include <linux/gfp.h>
23#include <linux/net.h>
24#include <linux/rbtree.h>
25#include <linux/slab.h>
26#include <linux/spinlock_types.h>
27
28
29#include "xt_qtaguid_internal.h"
30#include "xt_qtaguid_print.h"
31
32#ifdef DDEBUG
33
34static void _bug_on_err_or_null(void *ptr)
35{
36 if (IS_ERR_OR_NULL(ptr)) {
37 pr_err("qtaguid: kmalloc failed\n");
38 BUG();
39 }
40}
41
42char *pp_tag_t(tag_t *tag)
43{
44 char *res;
45
46 if (!tag)
47 res = kasprintf(GFP_ATOMIC, "tag_t@null{}");
48 else
49 res = kasprintf(GFP_ATOMIC,
50 "tag_t@%p{tag=0x%llx, uid=%u}",
51 tag, *tag, get_uid_from_tag(*tag));
52 _bug_on_err_or_null(res);
53 return res;
54}
55
56char *pp_data_counters(struct data_counters *dc, bool showValues)
57{
58 char *res;
59
60 if (!dc)
61 res = kasprintf(GFP_ATOMIC, "data_counters@null{}");
62 else if (showValues)
63 res = kasprintf(
64 GFP_ATOMIC, "data_counters@%p{"
65 "set0{"
66 "rx{"
67 "tcp{b=%llu, p=%llu}, "
68 "udp{b=%llu, p=%llu},"
69 "other{b=%llu, p=%llu}}, "
70 "tx{"
71 "tcp{b=%llu, p=%llu}, "
72 "udp{b=%llu, p=%llu},"
73 "other{b=%llu, p=%llu}}}, "
74 "set1{"
75 "rx{"
76 "tcp{b=%llu, p=%llu}, "
77 "udp{b=%llu, p=%llu},"
78 "other{b=%llu, p=%llu}}, "
79 "tx{"
80 "tcp{b=%llu, p=%llu}, "
81 "udp{b=%llu, p=%llu},"
82 "other{b=%llu, p=%llu}}}}",
83 dc,
84 dc->bpc[0][IFS_RX][IFS_TCP].bytes,
85 dc->bpc[0][IFS_RX][IFS_TCP].packets,
86 dc->bpc[0][IFS_RX][IFS_UDP].bytes,
87 dc->bpc[0][IFS_RX][IFS_UDP].packets,
88 dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes,
89 dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets,
90 dc->bpc[0][IFS_TX][IFS_TCP].bytes,
91 dc->bpc[0][IFS_TX][IFS_TCP].packets,
92 dc->bpc[0][IFS_TX][IFS_UDP].bytes,
93 dc->bpc[0][IFS_TX][IFS_UDP].packets,
94 dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes,
95 dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets,
96 dc->bpc[1][IFS_RX][IFS_TCP].bytes,
97 dc->bpc[1][IFS_RX][IFS_TCP].packets,
98 dc->bpc[1][IFS_RX][IFS_UDP].bytes,
99 dc->bpc[1][IFS_RX][IFS_UDP].packets,
100 dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes,
101 dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets,
102 dc->bpc[1][IFS_TX][IFS_TCP].bytes,
103 dc->bpc[1][IFS_TX][IFS_TCP].packets,
104 dc->bpc[1][IFS_TX][IFS_UDP].bytes,
105 dc->bpc[1][IFS_TX][IFS_UDP].packets,
106 dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes,
107 dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets);
108 else
109 res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc);
110 _bug_on_err_or_null(res);
111 return res;
112}
113
114char *pp_tag_node(struct tag_node *tn)
115{
116 char *tag_str;
117 char *res;
118
119 if (!tn) {
120 res = kasprintf(GFP_ATOMIC, "tag_node@null{}");
121 _bug_on_err_or_null(res);
122 return res;
123 }
124 tag_str = pp_tag_t(&tn->tag);
125 res = kasprintf(GFP_ATOMIC,
126 "tag_node@%p{tag=%s}",
127 tn, tag_str);
128 _bug_on_err_or_null(res);
129 kfree(tag_str);
130 return res;
131}
132
133char *pp_tag_ref(struct tag_ref *tr)
134{
135 char *tn_str;
136 char *res;
137
138 if (!tr) {
139 res = kasprintf(GFP_ATOMIC, "tag_ref@null{}");
140 _bug_on_err_or_null(res);
141 return res;
142 }
143 tn_str = pp_tag_node(&tr->tn);
144 res = kasprintf(GFP_ATOMIC,
145 "tag_ref@%p{%s, num_sock_tags=%d}",
146 tr, tn_str, tr->num_sock_tags);
147 _bug_on_err_or_null(res);
148 kfree(tn_str);
149 return res;
150}
151
152char *pp_tag_stat(struct tag_stat *ts)
153{
154 char *tn_str;
155 char *counters_str;
156 char *parent_counters_str;
157 char *res;
158
159 if (!ts) {
160 res = kasprintf(GFP_ATOMIC, "tag_stat@null{}");
161 _bug_on_err_or_null(res);
162 return res;
163 }
164 tn_str = pp_tag_node(&ts->tn);
165 counters_str = pp_data_counters(&ts->counters, true);
166 parent_counters_str = pp_data_counters(ts->parent_counters, false);
167 res = kasprintf(GFP_ATOMIC,
168 "tag_stat@%p{%s, counters=%s, parent_counters=%s}",
169 ts, tn_str, counters_str, parent_counters_str);
170 _bug_on_err_or_null(res);
171 kfree(tn_str);
172 kfree(counters_str);
173 kfree(parent_counters_str);
174 return res;
175}
176
177char *pp_iface_stat(struct iface_stat *is)
178{
179 char *res;
180 if (!is)
181 res = kasprintf(GFP_ATOMIC, "iface_stat@null{}");
182 else
183 res = kasprintf(GFP_ATOMIC, "iface_stat@%p{"
184 "list=list_head{...}, "
185 "ifname=%s, "
186 "total={rx={bytes=%llu, "
187 "packets=%llu}, "
188 "tx={bytes=%llu, "
189 "packets=%llu}}, "
190 "last_known_valid=%d, "
191 "last_known={rx={bytes=%llu, "
192 "packets=%llu}, "
193 "tx={bytes=%llu, "
194 "packets=%llu}}, "
195 "active=%d, "
196 "net_dev=%p, "
197 "proc_ptr=%p, "
198 "tag_stat_tree=rb_root{...}}",
199 is,
200 is->ifname,
201 is->totals[IFS_RX].bytes,
202 is->totals[IFS_RX].packets,
203 is->totals[IFS_TX].bytes,
204 is->totals[IFS_TX].packets,
205 is->last_known_valid,
206 is->last_known[IFS_RX].bytes,
207 is->last_known[IFS_RX].packets,
208 is->last_known[IFS_TX].bytes,
209 is->last_known[IFS_TX].packets,
210 is->active,
211 is->net_dev,
212 is->proc_ptr);
213 _bug_on_err_or_null(res);
214 return res;
215}
216
217char *pp_sock_tag(struct sock_tag *st)
218{
219 char *tag_str;
220 char *res;
221
222 if (!st) {
223 res = kasprintf(GFP_ATOMIC, "sock_tag@null{}");
224 _bug_on_err_or_null(res);
225 return res;
226 }
227 tag_str = pp_tag_t(&st->tag);
228 res = kasprintf(GFP_ATOMIC, "sock_tag@%p{"
229 "sock_node=rb_node{...}, "
230 "sk=%p socket=%p (f_count=%lu), list=list_head{...}, "
231 "pid=%u, tag=%s}",
232 st, st->sk, st->socket, atomic_long_read(
233 &st->socket->file->f_count),
234 st->pid, tag_str);
235 _bug_on_err_or_null(res);
236 kfree(tag_str);
237 return res;
238}
239
240char *pp_uid_tag_data(struct uid_tag_data *utd)
241{
242 char *res;
243
244 if (!utd)
245 res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}");
246 else
247 res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{"
248 "uid=%u, num_active_acct_tags=%d, "
249 "num_pqd=%d, "
250 "tag_node_tree=rb_root{...}, "
251 "proc_qtu_data_tree=rb_root{...}}",
252 utd, utd->uid,
253 utd->num_active_tags, utd->num_pqd);
254 _bug_on_err_or_null(res);
255 return res;
256}
257
258char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
259{
260 char *parent_tag_data_str;
261 char *res;
262
263 if (!pqd) {
264 res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}");
265 _bug_on_err_or_null(res);
266 return res;
267 }
268 parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data);
269 res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{"
270 "node=rb_node{...}, pid=%u, "
271 "parent_tag_data=%s, "
272 "sock_tag_list=list_head{...}}",
273 pqd, pqd->pid, parent_tag_data_str
274 );
275 _bug_on_err_or_null(res);
276 kfree(parent_tag_data_str);
277 return res;
278}
279
280/*------------------------------------------*/
281void prdebug_sock_tag_tree(int indent_level,
282 struct rb_root *sock_tag_tree)
283{
284 struct rb_node *node;
285 struct sock_tag *sock_tag_entry;
286 char *str;
287
288 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
289 return;
290
291 if (RB_EMPTY_ROOT(sock_tag_tree)) {
292 str = "sock_tag_tree=rb_root{}";
293 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
294 return;
295 }
296
297 str = "sock_tag_tree=rb_root{";
298 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
299 indent_level++;
300 for (node = rb_first(sock_tag_tree);
301 node;
302 node = rb_next(node)) {
303 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
304 str = pp_sock_tag(sock_tag_entry);
305 pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
306 kfree(str);
307 }
308 indent_level--;
309 str = "}";
310 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
311}
312
313void prdebug_sock_tag_list(int indent_level,
314 struct list_head *sock_tag_list)
315{
316 struct sock_tag *sock_tag_entry;
317 char *str;
318
319 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
320 return;
321
322 if (list_empty(sock_tag_list)) {
323 str = "sock_tag_list=list_head{}";
324 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
325 return;
326 }
327
328 str = "sock_tag_list=list_head{";
329 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
330 indent_level++;
331 list_for_each_entry(sock_tag_entry, sock_tag_list, list) {
332 str = pp_sock_tag(sock_tag_entry);
333 pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
334 kfree(str);
335 }
336 indent_level--;
337 str = "}";
338 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
339}
340
341void prdebug_proc_qtu_data_tree(int indent_level,
342 struct rb_root *proc_qtu_data_tree)
343{
344 char *str;
345 struct rb_node *node;
346 struct proc_qtu_data *proc_qtu_data_entry;
347
348 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
349 return;
350
351 if (RB_EMPTY_ROOT(proc_qtu_data_tree)) {
352 str = "proc_qtu_data_tree=rb_root{}";
353 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
354 return;
355 }
356
357 str = "proc_qtu_data_tree=rb_root{";
358 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
359 indent_level++;
360 for (node = rb_first(proc_qtu_data_tree);
361 node;
362 node = rb_next(node)) {
363 proc_qtu_data_entry = rb_entry(node,
364 struct proc_qtu_data,
365 node);
366 str = pp_proc_qtu_data(proc_qtu_data_entry);
367 pr_debug("%*d: %s,\n", indent_level*2, indent_level,
368 str);
369 kfree(str);
370 indent_level++;
371 prdebug_sock_tag_list(indent_level,
372 &proc_qtu_data_entry->sock_tag_list);
373 indent_level--;
374
375 }
376 indent_level--;
377 str = "}";
378 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
379}
380
381void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
382{
383 char *str;
384 struct rb_node *node;
385 struct tag_ref *tag_ref_entry;
386
387 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
388 return;
389
390 if (RB_EMPTY_ROOT(tag_ref_tree)) {
391 str = "tag_ref_tree{}";
392 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
393 return;
394 }
395
396 str = "tag_ref_tree{";
397 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
398 indent_level++;
399 for (node = rb_first(tag_ref_tree);
400 node;
401 node = rb_next(node)) {
402 tag_ref_entry = rb_entry(node,
403 struct tag_ref,
404 tn.node);
405 str = pp_tag_ref(tag_ref_entry);
406 pr_debug("%*d: %s,\n", indent_level*2, indent_level,
407 str);
408 kfree(str);
409 }
410 indent_level--;
411 str = "}";
412 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
413}
414
415void prdebug_uid_tag_data_tree(int indent_level,
416 struct rb_root *uid_tag_data_tree)
417{
418 char *str;
419 struct rb_node *node;
420 struct uid_tag_data *uid_tag_data_entry;
421
422 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
423 return;
424
425 if (RB_EMPTY_ROOT(uid_tag_data_tree)) {
426 str = "uid_tag_data_tree=rb_root{}";
427 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
428 return;
429 }
430
431 str = "uid_tag_data_tree=rb_root{";
432 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
433 indent_level++;
434 for (node = rb_first(uid_tag_data_tree);
435 node;
436 node = rb_next(node)) {
437 uid_tag_data_entry = rb_entry(node, struct uid_tag_data,
438 node);
439 str = pp_uid_tag_data(uid_tag_data_entry);
440 pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
441 kfree(str);
442 if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) {
443 indent_level++;
444 prdebug_tag_ref_tree(indent_level,
445 &uid_tag_data_entry->tag_ref_tree);
446 indent_level--;
447 }
448 }
449 indent_level--;
450 str = "}";
451 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
452}
453
454void prdebug_tag_stat_tree(int indent_level,
455 struct rb_root *tag_stat_tree)
456{
457 char *str;
458 struct rb_node *node;
459 struct tag_stat *ts_entry;
460
461 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
462 return;
463
464 if (RB_EMPTY_ROOT(tag_stat_tree)) {
465 str = "tag_stat_tree{}";
466 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
467 return;
468 }
469
470 str = "tag_stat_tree{";
471 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
472 indent_level++;
473 for (node = rb_first(tag_stat_tree);
474 node;
475 node = rb_next(node)) {
476 ts_entry = rb_entry(node, struct tag_stat, tn.node);
477 str = pp_tag_stat(ts_entry);
478 pr_debug("%*d: %s\n", indent_level*2, indent_level,
479 str);
480 kfree(str);
481 }
482 indent_level--;
483 str = "}";
484 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
485}
486
487void prdebug_iface_stat_list(int indent_level,
488 struct list_head *iface_stat_list)
489{
490 char *str;
491 struct iface_stat *iface_entry;
492
493 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
494 return;
495
496 if (list_empty(iface_stat_list)) {
497 str = "iface_stat_list=list_head{}";
498 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
499 return;
500 }
501
502 str = "iface_stat_list=list_head{";
503 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
504 indent_level++;
505 list_for_each_entry(iface_entry, iface_stat_list, list) {
506 str = pp_iface_stat(iface_entry);
507 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
508 kfree(str);
509
510 spin_lock_bh(&iface_entry->tag_stat_list_lock);
511 if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) {
512 indent_level++;
513 prdebug_tag_stat_tree(indent_level,
514 &iface_entry->tag_stat_tree);
515 indent_level--;
516 }
517 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
518 }
519 indent_level--;
520 str = "}";
521 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
522}
523
524#endif /* ifdef DDEBUG */
525/*------------------------------------------*/
526static const char * const netdev_event_strings[] = {
527 "netdev_unknown",
528 "NETDEV_UP",
529 "NETDEV_DOWN",
530 "NETDEV_REBOOT",
531 "NETDEV_CHANGE",
532 "NETDEV_REGISTER",
533 "NETDEV_UNREGISTER",
534 "NETDEV_CHANGEMTU",
535 "NETDEV_CHANGEADDR",
536 "NETDEV_GOING_DOWN",
537 "NETDEV_CHANGENAME",
538 "NETDEV_FEAT_CHANGE",
539 "NETDEV_BONDING_FAILOVER",
540 "NETDEV_PRE_UP",
541 "NETDEV_PRE_TYPE_CHANGE",
542 "NETDEV_POST_TYPE_CHANGE",
543 "NETDEV_POST_INIT",
544 "NETDEV_UNREGISTER_BATCH",
545 "NETDEV_RELEASE",
546 "NETDEV_NOTIFY_PEERS",
547 "NETDEV_JOIN",
548};
549
550const char *netdev_evt_str(int netdev_event)
551{
552 if (netdev_event < 0
553 || netdev_event >= ARRAY_SIZE(netdev_event_strings))
554 return "bad event num";
555 return netdev_event_strings[netdev_event];
556}
diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h
new file mode 100644
index 00000000000..b63871a0be5
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_print.h
@@ -0,0 +1,120 @@
1/*
2 * Pretty printing Support for iptables xt_qtaguid module.
3 *
4 * (C) 2011 Google, Inc
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __XT_QTAGUID_PRINT_H__
11#define __XT_QTAGUID_PRINT_H__
12
13#include "xt_qtaguid_internal.h"
14
15#ifdef DDEBUG
16
17char *pp_tag_t(tag_t *tag);
18char *pp_data_counters(struct data_counters *dc, bool showValues);
19char *pp_tag_node(struct tag_node *tn);
20char *pp_tag_ref(struct tag_ref *tr);
21char *pp_tag_stat(struct tag_stat *ts);
22char *pp_iface_stat(struct iface_stat *is);
23char *pp_sock_tag(struct sock_tag *st);
24char *pp_uid_tag_data(struct uid_tag_data *qtd);
25char *pp_proc_qtu_data(struct proc_qtu_data *pqd);
26
27/*------------------------------------------*/
28void prdebug_sock_tag_list(int indent_level,
29 struct list_head *sock_tag_list);
30void prdebug_sock_tag_tree(int indent_level,
31 struct rb_root *sock_tag_tree);
32void prdebug_proc_qtu_data_tree(int indent_level,
33 struct rb_root *proc_qtu_data_tree);
34void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree);
35void prdebug_uid_tag_data_tree(int indent_level,
36 struct rb_root *uid_tag_data_tree);
37void prdebug_tag_stat_tree(int indent_level,
38 struct rb_root *tag_stat_tree);
39void prdebug_iface_stat_list(int indent_level,
40 struct list_head *iface_stat_list);
41
42#else
43
44/*------------------------------------------*/
45static inline char *pp_tag_t(tag_t *tag)
46{
47 return NULL;
48}
49static inline char *pp_data_counters(struct data_counters *dc, bool showValues)
50{
51 return NULL;
52}
53static inline char *pp_tag_node(struct tag_node *tn)
54{
55 return NULL;
56}
57static inline char *pp_tag_ref(struct tag_ref *tr)
58{
59 return NULL;
60}
61static inline char *pp_tag_stat(struct tag_stat *ts)
62{
63 return NULL;
64}
65static inline char *pp_iface_stat(struct iface_stat *is)
66{
67 return NULL;
68}
69static inline char *pp_sock_tag(struct sock_tag *st)
70{
71 return NULL;
72}
73static inline char *pp_uid_tag_data(struct uid_tag_data *qtd)
74{
75 return NULL;
76}
77static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
78{
79 return NULL;
80}
81
82/*------------------------------------------*/
83static inline
84void prdebug_sock_tag_list(int indent_level,
85 struct list_head *sock_tag_list)
86{
87}
88static inline
89void prdebug_sock_tag_tree(int indent_level,
90 struct rb_root *sock_tag_tree)
91{
92}
93static inline
94void prdebug_proc_qtu_data_tree(int indent_level,
95 struct rb_root *proc_qtu_data_tree)
96{
97}
98static inline
99void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
100{
101}
102static inline
103void prdebug_uid_tag_data_tree(int indent_level,
104 struct rb_root *uid_tag_data_tree)
105{
106}
107static inline
108void prdebug_tag_stat_tree(int indent_level,
109 struct rb_root *tag_stat_tree)
110{
111}
112static inline
113void prdebug_iface_stat_list(int indent_level,
114 struct list_head *iface_stat_list)
115{
116}
117#endif
118/*------------------------------------------*/
119const char *netdev_evt_str(int netdev_event);
120#endif /* ifndef __XT_QTAGUID_PRINT_H__ */
diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c
new file mode 100644
index 00000000000..3c72bea2dd6
--- /dev/null
+++ b/net/netfilter/xt_quota2.c
@@ -0,0 +1,381 @@
1/*
2 * xt_quota2 - enhanced xt_quota that can count upwards and in packets
3 * as a minimal accounting match.
4 * by Jan Engelhardt <jengelh@medozas.de>, 2008
5 *
6 * Originally based on xt_quota.c:
7 * netfilter module to enforce network quotas
8 * Sam Johnston <samj@samj.net>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License; either
12 * version 2 of the License, as published by the Free Software Foundation.
13 */
14#include <linux/list.h>
15#include <linux/proc_fs.h>
16#include <linux/skbuff.h>
17#include <linux/spinlock.h>
18#include <asm/atomic.h>
19
20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_quota2.h>
22#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
23#include <linux/netfilter_ipv4/ipt_ULOG.h>
24#endif
25
26/**
27 * @lock: lock to protect quota writers from each other
28 */
29struct xt_quota_counter {
30 u_int64_t quota;
31 spinlock_t lock;
32 struct list_head list;
33 atomic_t ref;
34 char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)];
35 struct proc_dir_entry *procfs_entry;
36};
37
38#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
39/* Harald's favorite number +1 :D From ipt_ULOG.C */
40static int qlog_nl_event = 112;
41module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR);
42MODULE_PARM_DESC(event_num,
43 "Event number for NETLINK_NFLOG message. 0 disables log."
44 "111 is what ipt_ULOG uses.");
45static struct sock *nflognl;
46#endif
47
48static LIST_HEAD(counter_list);
49static DEFINE_SPINLOCK(counter_list_lock);
50
51static struct proc_dir_entry *proc_xt_quota;
52static unsigned int quota_list_perms = S_IRUGO | S_IWUSR;
53static unsigned int quota_list_uid = 0;
54static unsigned int quota_list_gid = 0;
55module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR);
56module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR);
57module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR);
58
59
60#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
61static void quota2_log(unsigned int hooknum,
62 const struct sk_buff *skb,
63 const struct net_device *in,
64 const struct net_device *out,
65 const char *prefix)
66{
67 ulog_packet_msg_t *pm;
68 struct sk_buff *log_skb;
69 size_t size;
70 struct nlmsghdr *nlh;
71
72 if (!qlog_nl_event)
73 return;
74
75 size = NLMSG_SPACE(sizeof(*pm));
76 size = max(size, (size_t)NLMSG_GOODSIZE);
77 log_skb = alloc_skb(size, GFP_ATOMIC);
78 if (!log_skb) {
79 pr_err("xt_quota2: cannot alloc skb for logging\n");
80 return;
81 }
82
83 /* NLMSG_PUT() uses "goto nlmsg_failure" */
84 nlh = NLMSG_PUT(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event,
85 sizeof(*pm));
86 pm = NLMSG_DATA(nlh);
87 if (skb->tstamp.tv64 == 0)
88 __net_timestamp((struct sk_buff *)skb);
89 pm->data_len = 0;
90 pm->hook = hooknum;
91 if (prefix != NULL)
92 strlcpy(pm->prefix, prefix, sizeof(pm->prefix));
93 else
94 *(pm->prefix) = '\0';
95 if (in)
96 strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name));
97 else
98 pm->indev_name[0] = '\0';
99
100 if (out)
101 strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
102 else
103 pm->outdev_name[0] = '\0';
104
105 NETLINK_CB(log_skb).dst_group = 1;
106 pr_debug("throwing 1 packets to netlink group 1\n");
107 netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC);
108
109nlmsg_failure: /* Used within NLMSG_PUT() */
110 pr_debug("xt_quota2: error during NLMSG_PUT\n");
111}
112#else
113static void quota2_log(unsigned int hooknum,
114 const struct sk_buff *skb,
115 const struct net_device *in,
116 const struct net_device *out,
117 const char *prefix)
118{
119}
120#endif /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */
121
122static int quota_proc_read(char *page, char **start, off_t offset,
123 int count, int *eof, void *data)
124{
125 struct xt_quota_counter *e = data;
126 int ret;
127
128 spin_lock_bh(&e->lock);
129 ret = snprintf(page, PAGE_SIZE, "%llu\n", e->quota);
130 spin_unlock_bh(&e->lock);
131 return ret;
132}
133
134static int quota_proc_write(struct file *file, const char __user *input,
135 unsigned long size, void *data)
136{
137 struct xt_quota_counter *e = data;
138 char buf[sizeof("18446744073709551616")];
139
140 if (size > sizeof(buf))
141 size = sizeof(buf);
142 if (copy_from_user(buf, input, size) != 0)
143 return -EFAULT;
144 buf[sizeof(buf)-1] = '\0';
145
146 spin_lock_bh(&e->lock);
147 e->quota = simple_strtoull(buf, NULL, 0);
148 spin_unlock_bh(&e->lock);
149 return size;
150}
151
152static struct xt_quota_counter *
153q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon)
154{
155 struct xt_quota_counter *e;
156 unsigned int size;
157
158 /* Do not need all the procfs things for anonymous counters. */
159 size = anon ? offsetof(typeof(*e), list) : sizeof(*e);
160 e = kmalloc(size, GFP_KERNEL);
161 if (e == NULL)
162 return NULL;
163
164 e->quota = q->quota;
165 spin_lock_init(&e->lock);
166 if (!anon) {
167 INIT_LIST_HEAD(&e->list);
168 atomic_set(&e->ref, 1);
169 strlcpy(e->name, q->name, sizeof(e->name));
170 }
171 return e;
172}
173
174/**
175 * q2_get_counter - get ref to counter or create new
176 * @name: name of counter
177 */
178static struct xt_quota_counter *
179q2_get_counter(const struct xt_quota_mtinfo2 *q)
180{
181 struct proc_dir_entry *p;
182 struct xt_quota_counter *e = NULL;
183 struct xt_quota_counter *new_e;
184
185 if (*q->name == '\0')
186 return q2_new_counter(q, true);
187
188 /* No need to hold a lock while getting a new counter */
189 new_e = q2_new_counter(q, false);
190 if (new_e == NULL)
191 goto out;
192
193 spin_lock_bh(&counter_list_lock);
194 list_for_each_entry(e, &counter_list, list)
195 if (strcmp(e->name, q->name) == 0) {
196 atomic_inc(&e->ref);
197 spin_unlock_bh(&counter_list_lock);
198 kfree(new_e);
199 pr_debug("xt_quota2: old counter name=%s", e->name);
200 return e;
201 }
202 e = new_e;
203 pr_debug("xt_quota2: new_counter name=%s", e->name);
204 list_add_tail(&e->list, &counter_list);
205 /* The entry having a refcount of 1 is not directly destructible.
206 * This func has not yet returned the new entry, thus iptables
207 * has not references for destroying this entry.
208 * For another rule to try to destroy it, it would 1st need for this
209 * func* to be re-invoked, acquire a new ref for the same named quota.
210 * Nobody will access the e->procfs_entry either.
211 * So release the lock. */
212 spin_unlock_bh(&counter_list_lock);
213
214 /* create_proc_entry() is not spin_lock happy */
215 p = e->procfs_entry = create_proc_entry(e->name, quota_list_perms,
216 proc_xt_quota);
217
218 if (IS_ERR_OR_NULL(p)) {
219 spin_lock_bh(&counter_list_lock);
220 list_del(&e->list);
221 spin_unlock_bh(&counter_list_lock);
222 goto out;
223 }
224 p->data = e;
225 p->read_proc = quota_proc_read;
226 p->write_proc = quota_proc_write;
227 p->uid = quota_list_uid;
228 p->gid = quota_list_gid;
229 return e;
230
231 out:
232 kfree(e);
233 return NULL;
234}
235
236static int quota_mt2_check(const struct xt_mtchk_param *par)
237{
238 struct xt_quota_mtinfo2 *q = par->matchinfo;
239
240 pr_debug("xt_quota2: check() flags=0x%04x", q->flags);
241
242 if (q->flags & ~XT_QUOTA_MASK)
243 return -EINVAL;
244
245 q->name[sizeof(q->name)-1] = '\0';
246 if (*q->name == '.' || strchr(q->name, '/') != NULL) {
247 printk(KERN_ERR "xt_quota.3: illegal name\n");
248 return -EINVAL;
249 }
250
251 q->master = q2_get_counter(q);
252 if (q->master == NULL) {
253 printk(KERN_ERR "xt_quota.3: memory alloc failure\n");
254 return -ENOMEM;
255 }
256
257 return 0;
258}
259
260static void quota_mt2_destroy(const struct xt_mtdtor_param *par)
261{
262 struct xt_quota_mtinfo2 *q = par->matchinfo;
263 struct xt_quota_counter *e = q->master;
264
265 if (*q->name == '\0') {
266 kfree(e);
267 return;
268 }
269
270 spin_lock_bh(&counter_list_lock);
271 if (!atomic_dec_and_test(&e->ref)) {
272 spin_unlock_bh(&counter_list_lock);
273 return;
274 }
275
276 list_del(&e->list);
277 remove_proc_entry(e->name, proc_xt_quota);
278 spin_unlock_bh(&counter_list_lock);
279 kfree(e);
280}
281
282static bool
283quota_mt2(const struct sk_buff *skb, struct xt_action_param *par)
284{
285 struct xt_quota_mtinfo2 *q = (void *)par->matchinfo;
286 struct xt_quota_counter *e = q->master;
287 bool ret = q->flags & XT_QUOTA_INVERT;
288
289 spin_lock_bh(&e->lock);
290 if (q->flags & XT_QUOTA_GROW) {
291 /*
292 * While no_change is pointless in "grow" mode, we will
293 * implement it here simply to have a consistent behavior.
294 */
295 if (!(q->flags & XT_QUOTA_NO_CHANGE)) {
296 e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
297 }
298 ret = true;
299 } else {
300 if (e->quota >= skb->len) {
301 if (!(q->flags & XT_QUOTA_NO_CHANGE))
302 e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
303 ret = !ret;
304 } else {
305 /* We are transitioning, log that fact. */
306 if (e->quota) {
307 quota2_log(par->hooknum,
308 skb,
309 par->in,
310 par->out,
311 q->name);
312 }
313 /* we do not allow even small packets from now on */
314 e->quota = 0;
315 }
316 }
317 spin_unlock_bh(&e->lock);
318 return ret;
319}
320
321static struct xt_match quota_mt2_reg[] __read_mostly = {
322 {
323 .name = "quota2",
324 .revision = 3,
325 .family = NFPROTO_IPV4,
326 .checkentry = quota_mt2_check,
327 .match = quota_mt2,
328 .destroy = quota_mt2_destroy,
329 .matchsize = sizeof(struct xt_quota_mtinfo2),
330 .me = THIS_MODULE,
331 },
332 {
333 .name = "quota2",
334 .revision = 3,
335 .family = NFPROTO_IPV6,
336 .checkentry = quota_mt2_check,
337 .match = quota_mt2,
338 .destroy = quota_mt2_destroy,
339 .matchsize = sizeof(struct xt_quota_mtinfo2),
340 .me = THIS_MODULE,
341 },
342};
343
344static int __init quota_mt2_init(void)
345{
346 int ret;
347 pr_debug("xt_quota2: init()");
348
349#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
350 nflognl = netlink_kernel_create(&init_net,
351 NETLINK_NFLOG, 1, NULL,
352 NULL, THIS_MODULE);
353 if (!nflognl)
354 return -ENOMEM;
355#endif
356
357 proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net);
358 if (proc_xt_quota == NULL)
359 return -EACCES;
360
361 ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
362 if (ret < 0)
363 remove_proc_entry("xt_quota", init_net.proc_net);
364 pr_debug("xt_quota2: init() %d", ret);
365 return ret;
366}
367
368static void __exit quota_mt2_exit(void)
369{
370 xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
371 remove_proc_entry("xt_quota", init_net.proc_net);
372}
373
374module_init(quota_mt2_init);
375module_exit(quota_mt2_exit);
376MODULE_DESCRIPTION("Xtables: countdown quota match; up counter");
377MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
378MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
379MODULE_LICENSE("GPL");
380MODULE_ALIAS("ipt_quota2");
381MODULE_ALIAS("ip6t_quota2");
diff --git a/net/rds/xlist.h b/net/rds/xlist.h
new file mode 100644
index 00000000000..e6b5190dadd
--- /dev/null
+++ b/net/rds/xlist.h
@@ -0,0 +1,80 @@
1#ifndef _LINUX_XLIST_H
2#define _LINUX_XLIST_H
3
4#include <linux/stddef.h>
5#include <linux/poison.h>
6#include <linux/prefetch.h>
7#include <asm/system.h>
8
9struct xlist_head {
10 struct xlist_head *next;
11};
12
13static inline void INIT_XLIST_HEAD(struct xlist_head *list)
14{
15 list->next = NULL;
16}
17
18static inline int xlist_empty(struct xlist_head *head)
19{
20 return head->next == NULL;
21}
22
23static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail,
24 struct xlist_head *head)
25{
26 struct xlist_head *cur;
27 struct xlist_head *check;
28
29 while (1) {
30 cur = head->next;
31 tail->next = cur;
32 check = cmpxchg(&head->next, cur, new);
33 if (check == cur)
34 break;
35 }
36}
37
38static inline struct xlist_head *xlist_del_head(struct xlist_head *head)
39{
40 struct xlist_head *cur;
41 struct xlist_head *check;
42 struct xlist_head *next;
43
44 while (1) {
45 cur = head->next;
46 if (!cur)
47 goto out;
48
49 next = cur->next;
50 check = cmpxchg(&head->next, cur, next);
51 if (check == cur)
52 goto out;
53 }
54out:
55 return cur;
56}
57
58static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head)
59{
60 struct xlist_head *cur;
61
62 cur = head->next;
63 if (!cur)
64 return NULL;
65
66 head->next = cur->next;
67 return cur;
68}
69
70static inline void xlist_splice(struct xlist_head *list,
71 struct xlist_head *head)
72{
73 struct xlist_head *cur;
74
75 WARN_ON(head->next);
76 cur = xchg(&list->next, NULL);
77 head->next = cur;
78}
79
80#endif
diff --git a/net/tipc/log.h b/net/tipc/log.h
new file mode 100644
index 00000000000..2248d96238e
--- /dev/null
+++ b/net/tipc/log.h
@@ -0,0 +1,67 @@
1/*
2 * net/tipc/log.h: Include file for TIPC print buffer routines
3 *
4 * Copyright (c) 1997-2006, Ericsson AB
5 * Copyright (c) 2005-2007, Wind River Systems
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the names of the copyright holders nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
19 *
20 * Alternatively, this software may be distributed under the terms of the
21 * GNU General Public License ("GPL") version 2 as published by the Free
22 * Software Foundation.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37#ifndef _TIPC_LOG_H
38#define _TIPC_LOG_H
39
40/**
41 * struct print_buf - TIPC print buffer structure
42 * @buf: pointer to character array containing print buffer contents
43 * @size: size of character array
44 * @crs: pointer to first unused space in character array (i.e. final NUL)
45 * @echo: echo output to system console if non-zero
46 */
47
48struct print_buf {
49 char *buf;
50 u32 size;
51 char *crs;
52 int echo;
53};
54
55#define TIPC_PB_MIN_SIZE 64 /* minimum size for a print buffer's array */
56#define TIPC_PB_MAX_STR 512 /* max printable string (with trailing NUL) */
57
58void tipc_printbuf_init(struct print_buf *pb, char *buf, u32 size);
59int tipc_printbuf_validate(struct print_buf *pb);
60
61int tipc_log_resize(int log_size);
62
63struct sk_buff *tipc_log_resize_cmd(const void *req_tlv_area,
64 int req_tlv_space);
65struct sk_buff *tipc_log_dump(void);
66
67#endif