aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/nfnetlink_queue.c1028
-rw-r--r--net/netfilter/xt_NOTRACK.c53
-rw-r--r--net/netfilter/xt_qtaguid.c2785
-rw-r--r--net/netfilter/xt_qtaguid_internal.h330
-rw-r--r--net/netfilter/xt_qtaguid_print.c556
-rw-r--r--net/netfilter/xt_qtaguid_print.h120
-rw-r--r--net/netfilter/xt_quota2.c381
7 files changed, 5253 insertions, 0 deletions
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
new file mode 100644
index 00000000000..a80b0cb03f1
--- /dev/null
+++ b/net/netfilter/nfnetlink_queue.c
@@ -0,0 +1,1028 @@
1/*
2 * This is a module which is used for queueing packets and communicating with
3 * userspace via nfnetlink.
4 *
5 * (C) 2005 by Harald Welte <laforge@netfilter.org>
6 * (C) 2007 by Patrick McHardy <kaber@trash.net>
7 *
8 * Based on the old ipv4-only ip_queue.c:
9 * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
10 * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
15 *
16 */
17#include <linux/module.h>
18#include <linux/skbuff.h>
19#include <linux/init.h>
20#include <linux/spinlock.h>
21#include <linux/slab.h>
22#include <linux/notifier.h>
23#include <linux/netdevice.h>
24#include <linux/netfilter.h>
25#include <linux/proc_fs.h>
26#include <linux/netfilter_ipv4.h>
27#include <linux/netfilter_ipv6.h>
28#include <linux/netfilter/nfnetlink.h>
29#include <linux/netfilter/nfnetlink_queue.h>
30#include <linux/list.h>
31#include <net/sock.h>
32#include <net/netfilter/nf_queue.h>
33
34#include <linux/atomic.h>
35
36#ifdef CONFIG_BRIDGE_NETFILTER
37#include "../bridge/br_private.h"
38#endif
39
40#define NFQNL_QMAX_DEFAULT 1024
41
42struct nfqnl_instance {
43 struct hlist_node hlist; /* global list of queues */
44 struct rcu_head rcu;
45
46 int peer_pid;
47 unsigned int queue_maxlen;
48 unsigned int copy_range;
49 unsigned int queue_dropped;
50 unsigned int queue_user_dropped;
51
52
53 u_int16_t queue_num; /* number of this queue */
54 u_int8_t copy_mode;
55/*
56 * Following fields are dirtied for each queued packet,
57 * keep them in same cache line if possible.
58 */
59 spinlock_t lock;
60 unsigned int queue_total;
61 unsigned int id_sequence; /* 'sequence' of pkt ids */
62 struct list_head queue_list; /* packets in queue */
63};
64
65typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
66
67static DEFINE_SPINLOCK(instances_lock);
68
69#define INSTANCE_BUCKETS 16
70static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
71
72static inline u_int8_t instance_hashfn(u_int16_t queue_num)
73{
74 return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS;
75}
76
77static struct nfqnl_instance *
78instance_lookup(u_int16_t queue_num)
79{
80 struct hlist_head *head;
81 struct hlist_node *pos;
82 struct nfqnl_instance *inst;
83
84 head = &instance_table[instance_hashfn(queue_num)];
85 hlist_for_each_entry_rcu(inst, pos, head, hlist) {
86 if (inst->queue_num == queue_num)
87 return inst;
88 }
89 return NULL;
90}
91
92static struct nfqnl_instance *
93instance_create(u_int16_t queue_num, int pid)
94{
95 struct nfqnl_instance *inst;
96 unsigned int h;
97 int err;
98
99 spin_lock(&instances_lock);
100 if (instance_lookup(queue_num)) {
101 err = -EEXIST;
102 goto out_unlock;
103 }
104
105 inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
106 if (!inst) {
107 err = -ENOMEM;
108 goto out_unlock;
109 }
110
111 inst->queue_num = queue_num;
112 inst->peer_pid = pid;
113 inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
114 inst->copy_range = 0xfffff;
115 inst->copy_mode = NFQNL_COPY_NONE;
116 spin_lock_init(&inst->lock);
117 INIT_LIST_HEAD(&inst->queue_list);
118
119 if (!try_module_get(THIS_MODULE)) {
120 err = -EAGAIN;
121 goto out_free;
122 }
123
124 h = instance_hashfn(queue_num);
125 hlist_add_head_rcu(&inst->hlist, &instance_table[h]);
126
127 spin_unlock(&instances_lock);
128
129 return inst;
130
131out_free:
132 kfree(inst);
133out_unlock:
134 spin_unlock(&instances_lock);
135 return ERR_PTR(err);
136}
137
138static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
139 unsigned long data);
140
141static void
142instance_destroy_rcu(struct rcu_head *head)
143{
144 struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
145 rcu);
146
147 nfqnl_flush(inst, NULL, 0);
148 kfree(inst);
149 module_put(THIS_MODULE);
150}
151
152static void
153__instance_destroy(struct nfqnl_instance *inst)
154{
155 hlist_del_rcu(&inst->hlist);
156 call_rcu(&inst->rcu, instance_destroy_rcu);
157}
158
159static void
160instance_destroy(struct nfqnl_instance *inst)
161{
162 spin_lock(&instances_lock);
163 __instance_destroy(inst);
164 spin_unlock(&instances_lock);
165}
166
167static inline void
168__enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
169{
170 list_add_tail(&entry->list, &queue->queue_list);
171 queue->queue_total++;
172}
173
174static void
175__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
176{
177 list_del(&entry->list);
178 queue->queue_total--;
179}
180
181static struct nf_queue_entry *
182find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
183{
184 struct nf_queue_entry *entry = NULL, *i;
185
186 spin_lock_bh(&queue->lock);
187
188 list_for_each_entry(i, &queue->queue_list, list) {
189 if (i->id == id) {
190 entry = i;
191 break;
192 }
193 }
194
195 if (entry)
196 __dequeue_entry(queue, entry);
197
198 spin_unlock_bh(&queue->lock);
199
200 return entry;
201}
202
203static void
204nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
205{
206 struct nf_queue_entry *entry, *next;
207
208 spin_lock_bh(&queue->lock);
209 list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
210 if (!cmpfn || cmpfn(entry, data)) {
211 list_del(&entry->list);
212 queue->queue_total--;
213 nf_reinject(entry, NF_DROP);
214 }
215 }
216 spin_unlock_bh(&queue->lock);
217}
218
219static struct sk_buff *
220nfqnl_build_packet_message(struct nfqnl_instance *queue,
221 struct nf_queue_entry *entry,
222 __be32 **packet_id_ptr)
223{
224 sk_buff_data_t old_tail;
225 size_t size;
226 size_t data_len = 0;
227 struct sk_buff *skb;
228 struct nlattr *nla;
229 struct nfqnl_msg_packet_hdr *pmsg;
230 struct nlmsghdr *nlh;
231 struct nfgenmsg *nfmsg;
232 struct sk_buff *entskb = entry->skb;
233 struct net_device *indev;
234 struct net_device *outdev;
235
236 size = NLMSG_SPACE(sizeof(struct nfgenmsg))
237 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
238 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
239 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
240#ifdef CONFIG_BRIDGE_NETFILTER
241 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
242 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
243#endif
244 + nla_total_size(sizeof(u_int32_t)) /* mark */
245 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
246 + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
247
248 outdev = entry->outdev;
249
250 switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
251 case NFQNL_COPY_META:
252 case NFQNL_COPY_NONE:
253 break;
254
255 case NFQNL_COPY_PACKET:
256 if (entskb->ip_summed == CHECKSUM_PARTIAL &&
257 skb_checksum_help(entskb))
258 return NULL;
259
260 data_len = ACCESS_ONCE(queue->copy_range);
261 if (data_len == 0 || data_len > entskb->len)
262 data_len = entskb->len;
263
264 size += nla_total_size(data_len);
265 break;
266 }
267
268
269 skb = alloc_skb(size, GFP_ATOMIC);
270 if (!skb)
271 goto nlmsg_failure;
272
273 old_tail = skb->tail;
274 nlh = NLMSG_PUT(skb, 0, 0,
275 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
276 sizeof(struct nfgenmsg));
277 nfmsg = NLMSG_DATA(nlh);
278 nfmsg->nfgen_family = entry->pf;
279 nfmsg->version = NFNETLINK_V0;
280 nfmsg->res_id = htons(queue->queue_num);
281
282 nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
283 pmsg = nla_data(nla);
284 pmsg->hw_protocol = entskb->protocol;
285 pmsg->hook = entry->hook;
286 *packet_id_ptr = &pmsg->packet_id;
287
288 indev = entry->indev;
289 if (indev) {
290#ifndef CONFIG_BRIDGE_NETFILTER
291 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex));
292#else
293 if (entry->pf == PF_BRIDGE) {
294 /* Case 1: indev is physical input device, we need to
295 * look for bridge group (when called from
296 * netfilter_bridge) */
297 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
298 htonl(indev->ifindex));
299 /* this is the bridge group "brX" */
300 /* rcu_read_lock()ed by __nf_queue */
301 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
302 htonl(br_port_get_rcu(indev)->br->dev->ifindex));
303 } else {
304 /* Case 2: indev is bridge group, we need to look for
305 * physical device (when called from ipv4) */
306 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
307 htonl(indev->ifindex));
308 if (entskb->nf_bridge && entskb->nf_bridge->physindev)
309 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
310 htonl(entskb->nf_bridge->physindev->ifindex));
311 }
312#endif
313 }
314
315 if (outdev) {
316#ifndef CONFIG_BRIDGE_NETFILTER
317 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex));
318#else
319 if (entry->pf == PF_BRIDGE) {
320 /* Case 1: outdev is physical output device, we need to
321 * look for bridge group (when called from
322 * netfilter_bridge) */
323 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
324 htonl(outdev->ifindex));
325 /* this is the bridge group "brX" */
326 /* rcu_read_lock()ed by __nf_queue */
327 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
328 htonl(br_port_get_rcu(outdev)->br->dev->ifindex));
329 } else {
330 /* Case 2: outdev is bridge group, we need to look for
331 * physical output device (when called from ipv4) */
332 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
333 htonl(outdev->ifindex));
334 if (entskb->nf_bridge && entskb->nf_bridge->physoutdev)
335 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
336 htonl(entskb->nf_bridge->physoutdev->ifindex));
337 }
338#endif
339 }
340
341 if (entskb->mark)
342 NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark));
343
344 if (indev && entskb->dev &&
345 entskb->mac_header != entskb->network_header) {
346 struct nfqnl_msg_packet_hw phw;
347 int len = dev_parse_header(entskb, phw.hw_addr);
348 if (len) {
349 phw.hw_addrlen = htons(len);
350 NLA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
351 }
352 }
353
354 if (entskb->tstamp.tv64) {
355 struct nfqnl_msg_packet_timestamp ts;
356 struct timeval tv = ktime_to_timeval(entskb->tstamp);
357 ts.sec = cpu_to_be64(tv.tv_sec);
358 ts.usec = cpu_to_be64(tv.tv_usec);
359
360 NLA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
361 }
362
363 if (data_len) {
364 struct nlattr *nla;
365 int sz = nla_attr_size(data_len);
366
367 if (skb_tailroom(skb) < nla_total_size(data_len)) {
368 printk(KERN_WARNING "nf_queue: no tailroom!\n");
369 goto nlmsg_failure;
370 }
371
372 nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len));
373 nla->nla_type = NFQA_PAYLOAD;
374 nla->nla_len = sz;
375
376 if (skb_copy_bits(entskb, 0, nla_data(nla), data_len))
377 BUG();
378 }
379
380 nlh->nlmsg_len = skb->tail - old_tail;
381 return skb;
382
383nlmsg_failure:
384nla_put_failure:
385 if (skb)
386 kfree_skb(skb);
387 if (net_ratelimit())
388 printk(KERN_ERR "nf_queue: error creating packet message\n");
389 return NULL;
390}
391
392static int
393nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
394{
395 struct sk_buff *nskb;
396 struct nfqnl_instance *queue;
397 int err = -ENOBUFS;
398 __be32 *packet_id_ptr;
399
400 /* rcu_read_lock()ed by nf_hook_slow() */
401 queue = instance_lookup(queuenum);
402 if (!queue) {
403 err = -ESRCH;
404 goto err_out;
405 }
406
407 if (queue->copy_mode == NFQNL_COPY_NONE) {
408 err = -EINVAL;
409 goto err_out;
410 }
411
412 nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);
413 if (nskb == NULL) {
414 err = -ENOMEM;
415 goto err_out;
416 }
417 spin_lock_bh(&queue->lock);
418
419 if (!queue->peer_pid) {
420 err = -EINVAL;
421 goto err_out_free_nskb;
422 }
423 if (queue->queue_total >= queue->queue_maxlen) {
424 queue->queue_dropped++;
425 if (net_ratelimit())
426 printk(KERN_WARNING "nf_queue: full at %d entries, "
427 "dropping packets(s).\n",
428 queue->queue_total);
429 goto err_out_free_nskb;
430 }
431 entry->id = ++queue->id_sequence;
432 *packet_id_ptr = htonl(entry->id);
433
434 /* nfnetlink_unicast will either free the nskb or add it to a socket */
435 err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT);
436 if (err < 0) {
437 queue->queue_user_dropped++;
438 goto err_out_unlock;
439 }
440
441 __enqueue_entry(queue, entry);
442
443 spin_unlock_bh(&queue->lock);
444 return 0;
445
446err_out_free_nskb:
447 kfree_skb(nskb);
448err_out_unlock:
449 spin_unlock_bh(&queue->lock);
450err_out:
451 return err;
452}
453
454static int
455nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e)
456{
457 struct sk_buff *nskb;
458 int diff;
459
460 diff = data_len - e->skb->len;
461 if (diff < 0) {
462 if (pskb_trim(e->skb, data_len))
463 return -ENOMEM;
464 } else if (diff > 0) {
465 if (data_len > 0xFFFF)
466 return -EINVAL;
467 if (diff > skb_tailroom(e->skb)) {
468 nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
469 diff, GFP_ATOMIC);
470 if (!nskb) {
471 printk(KERN_WARNING "nf_queue: OOM "
472 "in mangle, dropping packet\n");
473 return -ENOMEM;
474 }
475 kfree_skb(e->skb);
476 e->skb = nskb;
477 }
478 skb_put(e->skb, diff);
479 }
480 if (!skb_make_writable(e->skb, data_len))
481 return -ENOMEM;
482 skb_copy_to_linear_data(e->skb, data, data_len);
483 e->skb->ip_summed = CHECKSUM_NONE;
484 return 0;
485}
486
487static int
488nfqnl_set_mode(struct nfqnl_instance *queue,
489 unsigned char mode, unsigned int range)
490{
491 int status = 0;
492
493 spin_lock_bh(&queue->lock);
494 switch (mode) {
495 case NFQNL_COPY_NONE:
496 case NFQNL_COPY_META:
497 queue->copy_mode = mode;
498 queue->copy_range = 0;
499 break;
500
501 case NFQNL_COPY_PACKET:
502 queue->copy_mode = mode;
503 /* we're using struct nlattr which has 16bit nla_len */
504 if (range > 0xffff)
505 queue->copy_range = 0xffff;
506 else
507 queue->copy_range = range;
508 break;
509
510 default:
511 status = -EINVAL;
512
513 }
514 spin_unlock_bh(&queue->lock);
515
516 return status;
517}
518
519static int
520dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
521{
522 if (entry->indev)
523 if (entry->indev->ifindex == ifindex)
524 return 1;
525 if (entry->outdev)
526 if (entry->outdev->ifindex == ifindex)
527 return 1;
528#ifdef CONFIG_BRIDGE_NETFILTER
529 if (entry->skb->nf_bridge) {
530 if (entry->skb->nf_bridge->physindev &&
531 entry->skb->nf_bridge->physindev->ifindex == ifindex)
532 return 1;
533 if (entry->skb->nf_bridge->physoutdev &&
534 entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
535 return 1;
536 }
537#endif
538 return 0;
539}
540
541/* drop all packets with either indev or outdev == ifindex from all queue
542 * instances */
543static void
544nfqnl_dev_drop(int ifindex)
545{
546 int i;
547
548 rcu_read_lock();
549
550 for (i = 0; i < INSTANCE_BUCKETS; i++) {
551 struct hlist_node *tmp;
552 struct nfqnl_instance *inst;
553 struct hlist_head *head = &instance_table[i];
554
555 hlist_for_each_entry_rcu(inst, tmp, head, hlist)
556 nfqnl_flush(inst, dev_cmp, ifindex);
557 }
558
559 rcu_read_unlock();
560}
561
562#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
563
564static int
565nfqnl_rcv_dev_event(struct notifier_block *this,
566 unsigned long event, void *ptr)
567{
568 struct net_device *dev = ptr;
569
570 if (!net_eq(dev_net(dev), &init_net))
571 return NOTIFY_DONE;
572
573 /* Drop any packets associated with the downed device */
574 if (event == NETDEV_DOWN)
575 nfqnl_dev_drop(dev->ifindex);
576 return NOTIFY_DONE;
577}
578
579static struct notifier_block nfqnl_dev_notifier = {
580 .notifier_call = nfqnl_rcv_dev_event,
581};
582
583static int
584nfqnl_rcv_nl_event(struct notifier_block *this,
585 unsigned long event, void *ptr)
586{
587 struct netlink_notify *n = ptr;
588
589 if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
590 int i;
591
592 /* destroy all instances for this pid */
593 spin_lock(&instances_lock);
594 for (i = 0; i < INSTANCE_BUCKETS; i++) {
595 struct hlist_node *tmp, *t2;
596 struct nfqnl_instance *inst;
597 struct hlist_head *head = &instance_table[i];
598
599 hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
600 if ((n->net == &init_net) &&
601 (n->pid == inst->peer_pid))
602 __instance_destroy(inst);
603 }
604 }
605 spin_unlock(&instances_lock);
606 }
607 return NOTIFY_DONE;
608}
609
610static struct notifier_block nfqnl_rtnl_notifier = {
611 .notifier_call = nfqnl_rcv_nl_event,
612};
613
614static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
615 [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
616 [NFQA_MARK] = { .type = NLA_U32 },
617 [NFQA_PAYLOAD] = { .type = NLA_UNSPEC },
618};
619
620static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
621 [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
622 [NFQA_MARK] = { .type = NLA_U32 },
623};
624
625static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid)
626{
627 struct nfqnl_instance *queue;
628
629 queue = instance_lookup(queue_num);
630 if (!queue)
631 return ERR_PTR(-ENODEV);
632
633 if (queue->peer_pid != nlpid)
634 return ERR_PTR(-EPERM);
635
636 return queue;
637}
638
639static struct nfqnl_msg_verdict_hdr*
640verdicthdr_get(const struct nlattr * const nfqa[])
641{
642 struct nfqnl_msg_verdict_hdr *vhdr;
643 unsigned int verdict;
644
645 if (!nfqa[NFQA_VERDICT_HDR])
646 return NULL;
647
648 vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
649 verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK;
650 if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN)
651 return NULL;
652 return vhdr;
653}
654
655static int nfq_id_after(unsigned int id, unsigned int max)
656{
657 return (int)(id - max) > 0;
658}
659
660static int
661nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
662 const struct nlmsghdr *nlh,
663 const struct nlattr * const nfqa[])
664{
665 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
666 struct nf_queue_entry *entry, *tmp;
667 unsigned int verdict, maxid;
668 struct nfqnl_msg_verdict_hdr *vhdr;
669 struct nfqnl_instance *queue;
670 LIST_HEAD(batch_list);
671 u16 queue_num = ntohs(nfmsg->res_id);
672
673 queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid);
674 if (IS_ERR(queue))
675 return PTR_ERR(queue);
676
677 vhdr = verdicthdr_get(nfqa);
678 if (!vhdr)
679 return -EINVAL;
680
681 verdict = ntohl(vhdr->verdict);
682 maxid = ntohl(vhdr->id);
683
684 spin_lock_bh(&queue->lock);
685
686 list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) {
687 if (nfq_id_after(entry->id, maxid))
688 break;
689 __dequeue_entry(queue, entry);
690 list_add_tail(&entry->list, &batch_list);
691 }
692
693 spin_unlock_bh(&queue->lock);
694
695 if (list_empty(&batch_list))
696 return -ENOENT;
697
698 list_for_each_entry_safe(entry, tmp, &batch_list, list) {
699 if (nfqa[NFQA_MARK])
700 entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
701 nf_reinject(entry, verdict);
702 }
703 return 0;
704}
705
706static int
707nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
708 const struct nlmsghdr *nlh,
709 const struct nlattr * const nfqa[])
710{
711 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
712 u_int16_t queue_num = ntohs(nfmsg->res_id);
713
714 struct nfqnl_msg_verdict_hdr *vhdr;
715 struct nfqnl_instance *queue;
716 unsigned int verdict;
717 struct nf_queue_entry *entry;
718
719 queue = instance_lookup(queue_num);
720 if (!queue)
721
722 queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid);
723 if (IS_ERR(queue))
724 return PTR_ERR(queue);
725
726 vhdr = verdicthdr_get(nfqa);
727 if (!vhdr)
728 return -EINVAL;
729
730 verdict = ntohl(vhdr->verdict);
731
732 entry = find_dequeue_entry(queue, ntohl(vhdr->id));
733 if (entry == NULL)
734 return -ENOENT;
735
736 if (nfqa[NFQA_PAYLOAD]) {
737 if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
738 nla_len(nfqa[NFQA_PAYLOAD]), entry) < 0)
739 verdict = NF_DROP;
740 }
741
742 if (nfqa[NFQA_MARK])
743 entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
744
745 nf_reinject(entry, verdict);
746 return 0;
747}
748
749static int
750nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
751 const struct nlmsghdr *nlh,
752 const struct nlattr * const nfqa[])
753{
754 return -ENOTSUPP;
755}
756
757static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
758 [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) },
759 [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) },
760};
761
762static const struct nf_queue_handler nfqh = {
763 .name = "nf_queue",
764 .outfn = &nfqnl_enqueue_packet,
765};
766
767static int
768nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
769 const struct nlmsghdr *nlh,
770 const struct nlattr * const nfqa[])
771{
772 struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
773 u_int16_t queue_num = ntohs(nfmsg->res_id);
774 struct nfqnl_instance *queue;
775 struct nfqnl_msg_config_cmd *cmd = NULL;
776 int ret = 0;
777
778 if (nfqa[NFQA_CFG_CMD]) {
779 cmd = nla_data(nfqa[NFQA_CFG_CMD]);
780
781 /* Commands without queue context - might sleep */
782 switch (cmd->command) {
783 case NFQNL_CFG_CMD_PF_BIND:
784 return nf_register_queue_handler(ntohs(cmd->pf),
785 &nfqh);
786 case NFQNL_CFG_CMD_PF_UNBIND:
787 return nf_unregister_queue_handler(ntohs(cmd->pf),
788 &nfqh);
789 }
790 }
791
792 rcu_read_lock();
793 queue = instance_lookup(queue_num);
794 if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
795 ret = -EPERM;
796 goto err_out_unlock;
797 }
798
799 if (cmd != NULL) {
800 switch (cmd->command) {
801 case NFQNL_CFG_CMD_BIND:
802 if (queue) {
803 ret = -EBUSY;
804 goto err_out_unlock;
805 }
806 queue = instance_create(queue_num, NETLINK_CB(skb).pid);
807 if (IS_ERR(queue)) {
808 ret = PTR_ERR(queue);
809 goto err_out_unlock;
810 }
811 break;
812 case NFQNL_CFG_CMD_UNBIND:
813 if (!queue) {
814 ret = -ENODEV;
815 goto err_out_unlock;
816 }
817 instance_destroy(queue);
818 break;
819 case NFQNL_CFG_CMD_PF_BIND:
820 case NFQNL_CFG_CMD_PF_UNBIND:
821 break;
822 default:
823 ret = -ENOTSUPP;
824 break;
825 }
826 }
827
828 if (nfqa[NFQA_CFG_PARAMS]) {
829 struct nfqnl_msg_config_params *params;
830
831 if (!queue) {
832 ret = -ENODEV;
833 goto err_out_unlock;
834 }
835 params = nla_data(nfqa[NFQA_CFG_PARAMS]);
836 nfqnl_set_mode(queue, params->copy_mode,
837 ntohl(params->copy_range));
838 }
839
840 if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) {
841 __be32 *queue_maxlen;
842
843 if (!queue) {
844 ret = -ENODEV;
845 goto err_out_unlock;
846 }
847 queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
848 spin_lock_bh(&queue->lock);
849 queue->queue_maxlen = ntohl(*queue_maxlen);
850 spin_unlock_bh(&queue->lock);
851 }
852
853err_out_unlock:
854 rcu_read_unlock();
855 return ret;
856}
857
858static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
859 [NFQNL_MSG_PACKET] = { .call_rcu = nfqnl_recv_unsupp,
860 .attr_count = NFQA_MAX, },
861 [NFQNL_MSG_VERDICT] = { .call_rcu = nfqnl_recv_verdict,
862 .attr_count = NFQA_MAX,
863 .policy = nfqa_verdict_policy },
864 [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config,
865 .attr_count = NFQA_CFG_MAX,
866 .policy = nfqa_cfg_policy },
867 [NFQNL_MSG_VERDICT_BATCH]={ .call_rcu = nfqnl_recv_verdict_batch,
868 .attr_count = NFQA_MAX,
869 .policy = nfqa_verdict_batch_policy },
870};
871
872static const struct nfnetlink_subsystem nfqnl_subsys = {
873 .name = "nf_queue",
874 .subsys_id = NFNL_SUBSYS_QUEUE,
875 .cb_count = NFQNL_MSG_MAX,
876 .cb = nfqnl_cb,
877};
878
879#ifdef CONFIG_PROC_FS
880struct iter_state {
881 unsigned int bucket;
882};
883
884static struct hlist_node *get_first(struct seq_file *seq)
885{
886 struct iter_state *st = seq->private;
887
888 if (!st)
889 return NULL;
890
891 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
892 if (!hlist_empty(&instance_table[st->bucket]))
893 return instance_table[st->bucket].first;
894 }
895 return NULL;
896}
897
898static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
899{
900 struct iter_state *st = seq->private;
901
902 h = h->next;
903 while (!h) {
904 if (++st->bucket >= INSTANCE_BUCKETS)
905 return NULL;
906
907 h = instance_table[st->bucket].first;
908 }
909 return h;
910}
911
912static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
913{
914 struct hlist_node *head;
915 head = get_first(seq);
916
917 if (head)
918 while (pos && (head = get_next(seq, head)))
919 pos--;
920 return pos ? NULL : head;
921}
922
923static void *seq_start(struct seq_file *seq, loff_t *pos)
924 __acquires(instances_lock)
925{
926 spin_lock(&instances_lock);
927 return get_idx(seq, *pos);
928}
929
930static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
931{
932 (*pos)++;
933 return get_next(s, v);
934}
935
936static void seq_stop(struct seq_file *s, void *v)
937 __releases(instances_lock)
938{
939 spin_unlock(&instances_lock);
940}
941
942static int seq_show(struct seq_file *s, void *v)
943{
944 const struct nfqnl_instance *inst = v;
945
946 return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
947 inst->queue_num,
948 inst->peer_pid, inst->queue_total,
949 inst->copy_mode, inst->copy_range,
950 inst->queue_dropped, inst->queue_user_dropped,
951 inst->id_sequence, 1);
952}
953
954static const struct seq_operations nfqnl_seq_ops = {
955 .start = seq_start,
956 .next = seq_next,
957 .stop = seq_stop,
958 .show = seq_show,
959};
960
961static int nfqnl_open(struct inode *inode, struct file *file)
962{
963 return seq_open_private(file, &nfqnl_seq_ops,
964 sizeof(struct iter_state));
965}
966
967static const struct file_operations nfqnl_file_ops = {
968 .owner = THIS_MODULE,
969 .open = nfqnl_open,
970 .read = seq_read,
971 .llseek = seq_lseek,
972 .release = seq_release_private,
973};
974
975#endif /* PROC_FS */
976
977static int __init nfnetlink_queue_init(void)
978{
979 int i, status = -ENOMEM;
980
981 for (i = 0; i < INSTANCE_BUCKETS; i++)
982 INIT_HLIST_HEAD(&instance_table[i]);
983
984 netlink_register_notifier(&nfqnl_rtnl_notifier);
985 status = nfnetlink_subsys_register(&nfqnl_subsys);
986 if (status < 0) {
987 printk(KERN_ERR "nf_queue: failed to create netlink socket\n");
988 goto cleanup_netlink_notifier;
989 }
990
991#ifdef CONFIG_PROC_FS
992 if (!proc_create("nfnetlink_queue", 0440,
993 proc_net_netfilter, &nfqnl_file_ops))
994 goto cleanup_subsys;
995#endif
996
997 register_netdevice_notifier(&nfqnl_dev_notifier);
998 return status;
999
1000#ifdef CONFIG_PROC_FS
1001cleanup_subsys:
1002 nfnetlink_subsys_unregister(&nfqnl_subsys);
1003#endif
1004cleanup_netlink_notifier:
1005 netlink_unregister_notifier(&nfqnl_rtnl_notifier);
1006 return status;
1007}
1008
1009static void __exit nfnetlink_queue_fini(void)
1010{
1011 nf_unregister_queue_handlers(&nfqh);
1012 unregister_netdevice_notifier(&nfqnl_dev_notifier);
1013#ifdef CONFIG_PROC_FS
1014 remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
1015#endif
1016 nfnetlink_subsys_unregister(&nfqnl_subsys);
1017 netlink_unregister_notifier(&nfqnl_rtnl_notifier);
1018
1019 rcu_barrier(); /* Wait for completion of call_rcu()'s */
1020}
1021
1022MODULE_DESCRIPTION("netfilter packet queue handler");
1023MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
1024MODULE_LICENSE("GPL");
1025MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE);
1026
1027module_init(nfnetlink_queue_init);
1028module_exit(nfnetlink_queue_fini);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
new file mode 100644
index 00000000000..9d782181b6c
--- /dev/null
+++ b/net/netfilter/xt_NOTRACK.c
@@ -0,0 +1,53 @@
1/* This is a module which is used for setting up fake conntracks
2 * on packets so that they are not seen by the conntrack/NAT code.
3 */
4#include <linux/module.h>
5#include <linux/skbuff.h>
6
7#include <linux/netfilter/x_tables.h>
8#include <net/netfilter/nf_conntrack.h>
9
10MODULE_DESCRIPTION("Xtables: Disabling connection tracking for packets");
11MODULE_LICENSE("GPL");
12MODULE_ALIAS("ipt_NOTRACK");
13MODULE_ALIAS("ip6t_NOTRACK");
14
15static unsigned int
16notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
17{
18 /* Previously seen (loopback)? Ignore. */
19 if (skb->nfct != NULL)
20 return XT_CONTINUE;
21
22 /* Attach fake conntrack entry.
23 If there is a real ct entry correspondig to this packet,
24 it'll hang aroun till timing out. We don't deal with it
25 for performance reasons. JK */
26 skb->nfct = &nf_ct_untracked_get()->ct_general;
27 skb->nfctinfo = IP_CT_NEW;
28 nf_conntrack_get(skb->nfct);
29
30 return XT_CONTINUE;
31}
32
33static struct xt_target notrack_tg_reg __read_mostly = {
34 .name = "NOTRACK",
35 .revision = 0,
36 .family = NFPROTO_UNSPEC,
37 .target = notrack_tg,
38 .table = "raw",
39 .me = THIS_MODULE,
40};
41
42static int __init notrack_tg_init(void)
43{
44 return xt_register_target(&notrack_tg_reg);
45}
46
47static void __exit notrack_tg_exit(void)
48{
49 xt_unregister_target(&notrack_tg_reg);
50}
51
52module_init(notrack_tg_init);
53module_exit(notrack_tg_exit);
diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c
new file mode 100644
index 00000000000..08086d680c2
--- /dev/null
+++ b/net/netfilter/xt_qtaguid.c
@@ -0,0 +1,2785 @@
1/*
2 * Kernel iptables module to track stats for packets based on user tags.
3 *
4 * (C) 2011 Google, Inc
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/*
12 * There are run-time debug flags enabled via the debug_mask module param, or
13 * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14 */
15#define DEBUG
16
17#include <linux/file.h>
18#include <linux/inetdevice.h>
19#include <linux/module.h>
20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_qtaguid.h>
22#include <linux/skbuff.h>
23#include <linux/workqueue.h>
24#include <net/addrconf.h>
25#include <net/sock.h>
26#include <net/tcp.h>
27#include <net/udp.h>
28
29#include <linux/netfilter/xt_socket.h>
30#include "xt_qtaguid_internal.h"
31#include "xt_qtaguid_print.h"
32
33/*
34 * We only use the xt_socket funcs within a similar context to avoid unexpected
35 * return values.
36 */
37#define XT_SOCKET_SUPPORTED_HOOKS \
38 ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
39
40
41static const char *module_procdirname = "xt_qtaguid";
42static struct proc_dir_entry *xt_qtaguid_procdir;
43
44static unsigned int proc_iface_perms = S_IRUGO;
45module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
46
47static struct proc_dir_entry *xt_qtaguid_stats_file;
48static unsigned int proc_stats_perms = S_IRUGO;
49module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
50
51static struct proc_dir_entry *xt_qtaguid_ctrl_file;
52#ifdef CONFIG_ANDROID_PARANOID_NETWORK
53static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
54#else
55static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
56#endif
57module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
58
59#ifdef CONFIG_ANDROID_PARANOID_NETWORK
60#include <linux/android_aid.h>
61static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
62static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
63#else
64/* 0 means, don't limit anybody */
65static gid_t proc_stats_readall_gid;
66static gid_t proc_ctrl_write_gid;
67#endif
68module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
69 S_IRUGO | S_IWUSR);
70module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
71 S_IRUGO | S_IWUSR);
72
73/*
74 * Limit the number of active tags (via socket tags) for a given UID.
75 * Multiple processes could share the UID.
76 */
77static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
78module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
79
80/*
81 * After the kernel has initiallized this module, it is still possible
82 * to make it passive.
83 * Setting passive to Y:
84 * - the iface stats handling will not act on notifications.
85 * - iptables matches will never match.
86 * - ctrl commands silently succeed.
87 * - stats are always empty.
88 * This is mostly usefull when a bug is suspected.
89 */
90static bool module_passive;
91module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
92
93/*
94 * Control how qtaguid data is tracked per proc/uid.
95 * Setting tag_tracking_passive to Y:
96 * - don't create proc specific structs to track tags
97 * - don't check that active tag stats exceed some limits.
98 * - don't clean up socket tags on process exits.
99 * This is mostly usefull when a bug is suspected.
100 */
101static bool qtu_proc_handling_passive;
102module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
103 S_IRUGO | S_IWUSR);
104
105#define QTU_DEV_NAME "xt_qtaguid"
106
107uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
108module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
109
110/*---------------------------------------------------------------------------*/
111static const char *iface_stat_procdirname = "iface_stat";
112static struct proc_dir_entry *iface_stat_procdir;
113static const char *iface_stat_all_procfilename = "iface_stat_all";
114static struct proc_dir_entry *iface_stat_all_procfile;
115
116/*
117 * Ordering of locks:
118 * outer locks:
119 * iface_stat_list_lock
120 * sock_tag_list_lock
121 * inner locks:
122 * uid_tag_data_tree_lock
123 * tag_counter_set_list_lock
124 * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
125 * is acquired.
126 *
127 * Call tree with all lock holders as of 2011-09-25:
128 *
129 * iface_stat_all_proc_read()
130 * iface_stat_list_lock
131 * (struct iface_stat)
132 *
133 * qtaguid_ctrl_proc_read()
134 * sock_tag_list_lock
135 * (sock_tag_tree)
136 * (struct proc_qtu_data->sock_tag_list)
137 * prdebug_full_state()
138 * sock_tag_list_lock
139 * (sock_tag_tree)
140 * uid_tag_data_tree_lock
141 * (uid_tag_data_tree)
142 * (proc_qtu_data_tree)
143 * iface_stat_list_lock
144 *
145 * qtaguid_stats_proc_read()
146 * iface_stat_list_lock
147 * struct iface_stat->tag_stat_list_lock
148 *
149 * qtudev_open()
150 * uid_tag_data_tree_lock
151 *
152 * qtudev_release()
153 * sock_tag_data_list_lock
154 * uid_tag_data_tree_lock
155 * prdebug_full_state()
156 * sock_tag_list_lock
157 * uid_tag_data_tree_lock
158 * iface_stat_list_lock
159 *
160 * iface_netdev_event_handler()
161 * iface_stat_create()
162 * iface_stat_list_lock
163 * iface_stat_update()
164 * iface_stat_list_lock
165 *
166 * iface_inetaddr_event_handler()
167 * iface_stat_create()
168 * iface_stat_list_lock
169 * iface_stat_update()
170 * iface_stat_list_lock
171 *
172 * iface_inet6addr_event_handler()
173 * iface_stat_create_ipv6()
174 * iface_stat_list_lock
175 * iface_stat_update()
176 * iface_stat_list_lock
177 *
178 * qtaguid_mt()
179 * account_for_uid()
180 * if_tag_stat_update()
181 * get_sock_stat()
182 * sock_tag_list_lock
183 * struct iface_stat->tag_stat_list_lock
184 * tag_stat_update()
185 * get_active_counter_set()
186 * tag_counter_set_list_lock
187 * tag_stat_update()
188 * get_active_counter_set()
189 * tag_counter_set_list_lock
190 *
191 *
192 * qtaguid_ctrl_parse()
193 * ctrl_cmd_delete()
194 * sock_tag_list_lock
195 * tag_counter_set_list_lock
196 * iface_stat_list_lock
197 * struct iface_stat->tag_stat_list_lock
198 * uid_tag_data_tree_lock
199 * ctrl_cmd_counter_set()
200 * tag_counter_set_list_lock
201 * ctrl_cmd_tag()
202 * sock_tag_list_lock
203 * (sock_tag_tree)
204 * get_tag_ref()
205 * uid_tag_data_tree_lock
206 * (uid_tag_data_tree)
207 * uid_tag_data_tree_lock
208 * (proc_qtu_data_tree)
209 * ctrl_cmd_untag()
210 * sock_tag_list_lock
211 * uid_tag_data_tree_lock
212 *
213 */
214static LIST_HEAD(iface_stat_list);
215static DEFINE_SPINLOCK(iface_stat_list_lock);
216
217static struct rb_root sock_tag_tree = RB_ROOT;
218static DEFINE_SPINLOCK(sock_tag_list_lock);
219
220static struct rb_root tag_counter_set_tree = RB_ROOT;
221static DEFINE_SPINLOCK(tag_counter_set_list_lock);
222
223static struct rb_root uid_tag_data_tree = RB_ROOT;
224static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
225
226static struct rb_root proc_qtu_data_tree = RB_ROOT;
227/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
228
229static struct qtaguid_event_counts qtu_events;
230/*----------------------------------------------*/
231static bool can_manipulate_uids(void)
232{
233 /* root pwnd */
234 return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
235 || in_egroup_p(proc_ctrl_write_gid);
236}
237
238static bool can_impersonate_uid(uid_t uid)
239{
240 return uid == current_fsuid() || can_manipulate_uids();
241}
242
243static bool can_read_other_uid_stats(uid_t uid)
244{
245 /* root pwnd */
246 return unlikely(!current_fsuid()) || uid == current_fsuid()
247 || unlikely(!proc_stats_readall_gid)
248 || in_egroup_p(proc_stats_readall_gid);
249}
250
251static inline void dc_add_byte_packets(struct data_counters *counters, int set,
252 enum ifs_tx_rx direction,
253 enum ifs_proto ifs_proto,
254 int bytes,
255 int packets)
256{
257 counters->bpc[set][direction][ifs_proto].bytes += bytes;
258 counters->bpc[set][direction][ifs_proto].packets += packets;
259}
260
261static inline uint64_t dc_sum_bytes(struct data_counters *counters,
262 int set,
263 enum ifs_tx_rx direction)
264{
265 return counters->bpc[set][direction][IFS_TCP].bytes
266 + counters->bpc[set][direction][IFS_UDP].bytes
267 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
268}
269
270static inline uint64_t dc_sum_packets(struct data_counters *counters,
271 int set,
272 enum ifs_tx_rx direction)
273{
274 return counters->bpc[set][direction][IFS_TCP].packets
275 + counters->bpc[set][direction][IFS_UDP].packets
276 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
277}
278
279static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
280{
281 struct rb_node *node = root->rb_node;
282
283 while (node) {
284 struct tag_node *data = rb_entry(node, struct tag_node, node);
285 int result;
286 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
287 " node=%p data=%p\n", tag, node, data);
288 result = tag_compare(tag, data->tag);
289 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
290 " data.tag=0x%llx (uid=%u) res=%d\n",
291 tag, data->tag, get_uid_from_tag(data->tag), result);
292 if (result < 0)
293 node = node->rb_left;
294 else if (result > 0)
295 node = node->rb_right;
296 else
297 return data;
298 }
299 return NULL;
300}
301
302static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
303{
304 struct rb_node **new = &(root->rb_node), *parent = NULL;
305
306 /* Figure out where to put new node */
307 while (*new) {
308 struct tag_node *this = rb_entry(*new, struct tag_node,
309 node);
310 int result = tag_compare(data->tag, this->tag);
311 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
312 " (uid=%u)\n", __func__,
313 this->tag,
314 get_uid_from_tag(this->tag));
315 parent = *new;
316 if (result < 0)
317 new = &((*new)->rb_left);
318 else if (result > 0)
319 new = &((*new)->rb_right);
320 else
321 BUG();
322 }
323
324 /* Add new node and rebalance tree. */
325 rb_link_node(&data->node, parent, new);
326 rb_insert_color(&data->node, root);
327}
328
329static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
330{
331 tag_node_tree_insert(&data->tn, root);
332}
333
334static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
335{
336 struct tag_node *node = tag_node_tree_search(root, tag);
337 if (!node)
338 return NULL;
339 return rb_entry(&node->node, struct tag_stat, tn.node);
340}
341
342static void tag_counter_set_tree_insert(struct tag_counter_set *data,
343 struct rb_root *root)
344{
345 tag_node_tree_insert(&data->tn, root);
346}
347
348static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
349 tag_t tag)
350{
351 struct tag_node *node = tag_node_tree_search(root, tag);
352 if (!node)
353 return NULL;
354 return rb_entry(&node->node, struct tag_counter_set, tn.node);
355
356}
357
358static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
359{
360 tag_node_tree_insert(&data->tn, root);
361}
362
363static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
364{
365 struct tag_node *node = tag_node_tree_search(root, tag);
366 if (!node)
367 return NULL;
368 return rb_entry(&node->node, struct tag_ref, tn.node);
369}
370
371static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
372 const struct sock *sk)
373{
374 struct rb_node *node = root->rb_node;
375
376 while (node) {
377 struct sock_tag *data = rb_entry(node, struct sock_tag,
378 sock_node);
379 if (sk < data->sk)
380 node = node->rb_left;
381 else if (sk > data->sk)
382 node = node->rb_right;
383 else
384 return data;
385 }
386 return NULL;
387}
388
389static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
390{
391 struct rb_node **new = &(root->rb_node), *parent = NULL;
392
393 /* Figure out where to put new node */
394 while (*new) {
395 struct sock_tag *this = rb_entry(*new, struct sock_tag,
396 sock_node);
397 parent = *new;
398 if (data->sk < this->sk)
399 new = &((*new)->rb_left);
400 else if (data->sk > this->sk)
401 new = &((*new)->rb_right);
402 else
403 BUG();
404 }
405
406 /* Add new node and rebalance tree. */
407 rb_link_node(&data->sock_node, parent, new);
408 rb_insert_color(&data->sock_node, root);
409}
410
411static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
412{
413 struct rb_node *node;
414 struct sock_tag *st_entry;
415
416 node = rb_first(st_to_free_tree);
417 while (node) {
418 st_entry = rb_entry(node, struct sock_tag, sock_node);
419 node = rb_next(node);
420 CT_DEBUG("qtaguid: %s(): "
421 "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
422 st_entry->sk,
423 st_entry->tag,
424 get_uid_from_tag(st_entry->tag));
425 rb_erase(&st_entry->sock_node, st_to_free_tree);
426 sockfd_put(st_entry->socket);
427 kfree(st_entry);
428 }
429}
430
431static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
432 const pid_t pid)
433{
434 struct rb_node *node = root->rb_node;
435
436 while (node) {
437 struct proc_qtu_data *data = rb_entry(node,
438 struct proc_qtu_data,
439 node);
440 if (pid < data->pid)
441 node = node->rb_left;
442 else if (pid > data->pid)
443 node = node->rb_right;
444 else
445 return data;
446 }
447 return NULL;
448}
449
450static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
451 struct rb_root *root)
452{
453 struct rb_node **new = &(root->rb_node), *parent = NULL;
454
455 /* Figure out where to put new node */
456 while (*new) {
457 struct proc_qtu_data *this = rb_entry(*new,
458 struct proc_qtu_data,
459 node);
460 parent = *new;
461 if (data->pid < this->pid)
462 new = &((*new)->rb_left);
463 else if (data->pid > this->pid)
464 new = &((*new)->rb_right);
465 else
466 BUG();
467 }
468
469 /* Add new node and rebalance tree. */
470 rb_link_node(&data->node, parent, new);
471 rb_insert_color(&data->node, root);
472}
473
474static void uid_tag_data_tree_insert(struct uid_tag_data *data,
475 struct rb_root *root)
476{
477 struct rb_node **new = &(root->rb_node), *parent = NULL;
478
479 /* Figure out where to put new node */
480 while (*new) {
481 struct uid_tag_data *this = rb_entry(*new,
482 struct uid_tag_data,
483 node);
484 parent = *new;
485 if (data->uid < this->uid)
486 new = &((*new)->rb_left);
487 else if (data->uid > this->uid)
488 new = &((*new)->rb_right);
489 else
490 BUG();
491 }
492
493 /* Add new node and rebalance tree. */
494 rb_link_node(&data->node, parent, new);
495 rb_insert_color(&data->node, root);
496}
497
498static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
499 uid_t uid)
500{
501 struct rb_node *node = root->rb_node;
502
503 while (node) {
504 struct uid_tag_data *data = rb_entry(node,
505 struct uid_tag_data,
506 node);
507 if (uid < data->uid)
508 node = node->rb_left;
509 else if (uid > data->uid)
510 node = node->rb_right;
511 else
512 return data;
513 }
514 return NULL;
515}
516
517/*
518 * Allocates a new uid_tag_data struct if needed.
519 * Returns a pointer to the found or allocated uid_tag_data.
520 * Returns a PTR_ERR on failures, and lock is not held.
521 * If found is not NULL:
522 * sets *found to true if not allocated.
523 * sets *found to false if allocated.
524 */
525struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
526{
527 struct uid_tag_data *utd_entry;
528
529 /* Look for top level uid_tag_data for the UID */
530 utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
531 DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
532
533 if (found_res)
534 *found_res = utd_entry;
535 if (utd_entry)
536 return utd_entry;
537
538 utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
539 if (!utd_entry) {
540 pr_err("qtaguid: get_uid_data(%u): "
541 "tag data alloc failed\n", uid);
542 return ERR_PTR(-ENOMEM);
543 }
544
545 utd_entry->uid = uid;
546 utd_entry->tag_ref_tree = RB_ROOT;
547 uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
548 DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
549 return utd_entry;
550}
551
552/* Never returns NULL. Either PTR_ERR or a valid ptr. */
553static struct tag_ref *new_tag_ref(tag_t new_tag,
554 struct uid_tag_data *utd_entry)
555{
556 struct tag_ref *tr_entry;
557 int res;
558
559 if (utd_entry->num_active_tags + 1 > max_sock_tags) {
560 pr_info("qtaguid: new_tag_ref(0x%llx): "
561 "tag ref alloc quota exceeded. max=%d\n",
562 new_tag, max_sock_tags);
563 res = -EMFILE;
564 goto err_res;
565
566 }
567
568 tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
569 if (!tr_entry) {
570 pr_err("qtaguid: new_tag_ref(0x%llx): "
571 "tag ref alloc failed\n",
572 new_tag);
573 res = -ENOMEM;
574 goto err_res;
575 }
576 tr_entry->tn.tag = new_tag;
577 /* tr_entry->num_sock_tags handled by caller */
578 utd_entry->num_active_tags++;
579 tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
580 DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
581 " inserted new tag ref %p\n",
582 new_tag, tr_entry);
583 return tr_entry;
584
585err_res:
586 return ERR_PTR(res);
587}
588
589static struct tag_ref *lookup_tag_ref(tag_t full_tag,
590 struct uid_tag_data **utd_res)
591{
592 struct uid_tag_data *utd_entry;
593 struct tag_ref *tr_entry;
594 bool found_utd;
595 uid_t uid = get_uid_from_tag(full_tag);
596
597 DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
598 full_tag, uid);
599
600 utd_entry = get_uid_data(uid, &found_utd);
601 if (IS_ERR_OR_NULL(utd_entry)) {
602 if (utd_res)
603 *utd_res = utd_entry;
604 return NULL;
605 }
606
607 tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
608 if (utd_res)
609 *utd_res = utd_entry;
610 DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
611 full_tag, utd_entry, tr_entry);
612 return tr_entry;
613}
614
615/* Never returns NULL. Either PTR_ERR or a valid ptr. */
616static struct tag_ref *get_tag_ref(tag_t full_tag,
617 struct uid_tag_data **utd_res)
618{
619 struct uid_tag_data *utd_entry;
620 struct tag_ref *tr_entry;
621
622 DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
623 full_tag);
624 spin_lock_bh(&uid_tag_data_tree_lock);
625 tr_entry = lookup_tag_ref(full_tag, &utd_entry);
626 BUG_ON(IS_ERR_OR_NULL(utd_entry));
627 if (!tr_entry)
628 tr_entry = new_tag_ref(full_tag, utd_entry);
629
630 spin_unlock_bh(&uid_tag_data_tree_lock);
631 if (utd_res)
632 *utd_res = utd_entry;
633 DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
634 full_tag, utd_entry, tr_entry);
635 return tr_entry;
636}
637
638/* Checks and maybe frees the UID Tag Data entry */
639static void put_utd_entry(struct uid_tag_data *utd_entry)
640{
641 /* Are we done with the UID tag data entry? */
642 if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
643 !utd_entry->num_pqd) {
644 DR_DEBUG("qtaguid: %s(): "
645 "erase utd_entry=%p uid=%u "
646 "by pid=%u tgid=%u uid=%u\n", __func__,
647 utd_entry, utd_entry->uid,
648 current->pid, current->tgid, current_fsuid());
649 BUG_ON(utd_entry->num_active_tags);
650 rb_erase(&utd_entry->node, &uid_tag_data_tree);
651 kfree(utd_entry);
652 } else {
653 DR_DEBUG("qtaguid: %s(): "
654 "utd_entry=%p still has %d tags %d proc_qtu_data\n",
655 __func__, utd_entry, utd_entry->num_active_tags,
656 utd_entry->num_pqd);
657 BUG_ON(!(utd_entry->num_active_tags ||
658 utd_entry->num_pqd));
659 }
660}
661
662/*
663 * If no sock_tags are using this tag_ref,
664 * decrements refcount of utd_entry, removes tr_entry
665 * from utd_entry->tag_ref_tree and frees.
666 */
667static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
668 struct uid_tag_data *utd_entry)
669{
670 DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
671 tr_entry, tr_entry->tn.tag,
672 get_uid_from_tag(tr_entry->tn.tag));
673 if (!tr_entry->num_sock_tags) {
674 BUG_ON(!utd_entry->num_active_tags);
675 utd_entry->num_active_tags--;
676 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
677 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
678 kfree(tr_entry);
679 }
680}
681
682static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
683{
684 struct rb_node *node;
685 struct tag_ref *tr_entry;
686 tag_t acct_tag;
687
688 DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
689 full_tag, get_uid_from_tag(full_tag));
690 acct_tag = get_atag_from_tag(full_tag);
691 node = rb_first(&utd_entry->tag_ref_tree);
692 while (node) {
693 tr_entry = rb_entry(node, struct tag_ref, tn.node);
694 node = rb_next(node);
695 if (!acct_tag || tr_entry->tn.tag == full_tag)
696 free_tag_ref_from_utd_entry(tr_entry, utd_entry);
697 }
698}
699
700static int read_proc_u64(char *page, char **start, off_t off,
701 int count, int *eof, void *data)
702{
703 int len;
704 uint64_t value;
705 char *p = page;
706 uint64_t *iface_entry = data;
707
708 if (!data)
709 return 0;
710
711 value = *iface_entry;
712 p += sprintf(p, "%llu\n", value);
713 len = (p - page) - off;
714 *eof = (len <= count) ? 1 : 0;
715 *start = page + off;
716 return len;
717}
718
719static int read_proc_bool(char *page, char **start, off_t off,
720 int count, int *eof, void *data)
721{
722 int len;
723 bool value;
724 char *p = page;
725 bool *bool_entry = data;
726
727 if (!data)
728 return 0;
729
730 value = *bool_entry;
731 p += sprintf(p, "%u\n", value);
732 len = (p - page) - off;
733 *eof = (len <= count) ? 1 : 0;
734 *start = page + off;
735 return len;
736}
737
738static int get_active_counter_set(tag_t tag)
739{
740 int active_set = 0;
741 struct tag_counter_set *tcs;
742
743 MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
744 " (uid=%u)\n",
745 tag, get_uid_from_tag(tag));
746 /* For now we only handle UID tags for active sets */
747 tag = get_utag_from_tag(tag);
748 spin_lock_bh(&tag_counter_set_list_lock);
749 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
750 if (tcs)
751 active_set = tcs->active_set;
752 spin_unlock_bh(&tag_counter_set_list_lock);
753 return active_set;
754}
755
756/*
757 * Find the entry for tracking the specified interface.
758 * Caller must hold iface_stat_list_lock
759 */
760static struct iface_stat *get_iface_entry(const char *ifname)
761{
762 struct iface_stat *iface_entry;
763
764 /* Find the entry for tracking the specified tag within the interface */
765 if (ifname == NULL) {
766 pr_info("qtaguid: iface_stat: get() NULL device name\n");
767 return NULL;
768 }
769
770 /* Iterate over interfaces */
771 list_for_each_entry(iface_entry, &iface_stat_list, list) {
772 if (!strcmp(ifname, iface_entry->ifname))
773 goto done;
774 }
775 iface_entry = NULL;
776done:
777 return iface_entry;
778}
779
780static int iface_stat_all_proc_read(char *page, char **num_items_returned,
781 off_t items_to_skip, int char_count,
782 int *eof, void *data)
783{
784 char *outp = page;
785 int item_index = 0;
786 int len;
787 struct iface_stat *iface_entry;
788 struct rtnl_link_stats64 dev_stats, *stats;
789 struct rtnl_link_stats64 no_dev_stats = {0};
790
791 if (unlikely(module_passive)) {
792 *eof = 1;
793 return 0;
794 }
795
796 CT_DEBUG("qtaguid:proc iface_stat_all "
797 "page=%p *num_items_returned=%p off=%ld "
798 "char_count=%d *eof=%d\n", page, *num_items_returned,
799 items_to_skip, char_count, *eof);
800
801 if (*eof)
802 return 0;
803
804 /*
805 * This lock will prevent iface_stat_update() from changing active,
806 * and in turn prevent an interface from unregistering itself.
807 */
808 spin_lock_bh(&iface_stat_list_lock);
809 list_for_each_entry(iface_entry, &iface_stat_list, list) {
810 if (item_index++ < items_to_skip)
811 continue;
812
813 if (iface_entry->active) {
814 stats = dev_get_stats(iface_entry->net_dev,
815 &dev_stats);
816 } else {
817 stats = &no_dev_stats;
818 }
819 len = snprintf(outp, char_count,
820 "%s %d "
821 "%llu %llu %llu %llu "
822 "%llu %llu %llu %llu\n",
823 iface_entry->ifname,
824 iface_entry->active,
825 iface_entry->totals[IFS_RX].bytes,
826 iface_entry->totals[IFS_RX].packets,
827 iface_entry->totals[IFS_TX].bytes,
828 iface_entry->totals[IFS_TX].packets,
829 stats->rx_bytes, stats->rx_packets,
830 stats->tx_bytes, stats->tx_packets);
831 if (len >= char_count) {
832 spin_unlock_bh(&iface_stat_list_lock);
833 *outp = '\0';
834 return outp - page;
835 }
836 outp += len;
837 char_count -= len;
838 (*num_items_returned)++;
839 }
840 spin_unlock_bh(&iface_stat_list_lock);
841
842 *eof = 1;
843 return outp - page;
844}
845
846static void iface_create_proc_worker(struct work_struct *work)
847{
848 struct proc_dir_entry *proc_entry;
849 struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
850 iface_work);
851 struct iface_stat *new_iface = isw->iface_entry;
852
853 /* iface_entries are not deleted, so safe to manipulate. */
854 proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
855 if (IS_ERR_OR_NULL(proc_entry)) {
856 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
857 kfree(isw);
858 return;
859 }
860
861 new_iface->proc_ptr = proc_entry;
862
863 create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
864 read_proc_u64, &new_iface->totals[IFS_TX].bytes);
865 create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
866 read_proc_u64, &new_iface->totals[IFS_RX].bytes);
867 create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
868 read_proc_u64, &new_iface->totals[IFS_TX].packets);
869 create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
870 read_proc_u64, &new_iface->totals[IFS_RX].packets);
871 create_proc_read_entry("active", proc_iface_perms, proc_entry,
872 read_proc_bool, &new_iface->active);
873
874 IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
875 "entry=%p dev=%s\n", new_iface, new_iface->ifname);
876 kfree(isw);
877}
878
879/*
880 * Will set the entry's active state, and
881 * update the net_dev accordingly also.
882 */
883static void _iface_stat_set_active(struct iface_stat *entry,
884 struct net_device *net_dev,
885 bool activate)
886{
887 if (activate) {
888 entry->net_dev = net_dev;
889 entry->active = true;
890 IF_DEBUG("qtaguid: %s(%s): "
891 "enable tracking. rfcnt=%d\n", __func__,
892 entry->ifname,
893 percpu_read(*net_dev->pcpu_refcnt));
894 } else {
895 entry->active = false;
896 entry->net_dev = NULL;
897 IF_DEBUG("qtaguid: %s(%s): "
898 "disable tracking. rfcnt=%d\n", __func__,
899 entry->ifname,
900 percpu_read(*net_dev->pcpu_refcnt));
901
902 }
903}
904
905/* Caller must hold iface_stat_list_lock */
906static struct iface_stat *iface_alloc(struct net_device *net_dev)
907{
908 struct iface_stat *new_iface;
909 struct iface_stat_work *isw;
910
911 new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
912 if (new_iface == NULL) {
913 pr_err("qtaguid: iface_stat: create(%s): "
914 "iface_stat alloc failed\n", net_dev->name);
915 return NULL;
916 }
917 new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
918 if (new_iface->ifname == NULL) {
919 pr_err("qtaguid: iface_stat: create(%s): "
920 "ifname alloc failed\n", net_dev->name);
921 kfree(new_iface);
922 return NULL;
923 }
924 spin_lock_init(&new_iface->tag_stat_list_lock);
925 new_iface->tag_stat_tree = RB_ROOT;
926 _iface_stat_set_active(new_iface, net_dev, true);
927
928 /*
929 * ipv6 notifier chains are atomic :(
930 * No create_proc_read_entry() for you!
931 */
932 isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
933 if (!isw) {
934 pr_err("qtaguid: iface_stat: create(%s): "
935 "work alloc failed\n", new_iface->ifname);
936 _iface_stat_set_active(new_iface, net_dev, false);
937 kfree(new_iface->ifname);
938 kfree(new_iface);
939 return NULL;
940 }
941 isw->iface_entry = new_iface;
942 INIT_WORK(&isw->iface_work, iface_create_proc_worker);
943 schedule_work(&isw->iface_work);
944 list_add(&new_iface->list, &iface_stat_list);
945 return new_iface;
946}
947
948static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
949 struct iface_stat *iface)
950{
951 struct rtnl_link_stats64 dev_stats, *stats;
952 bool stats_rewound;
953
954 stats = dev_get_stats(net_dev, &dev_stats);
955 /* No empty packets */
956 stats_rewound =
957 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
958 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
959
960 IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
961 "bytes rx/tx=%llu/%llu "
962 "active=%d last_known=%d "
963 "stats_rewound=%d\n", __func__,
964 net_dev ? net_dev->name : "?",
965 iface, net_dev,
966 stats->rx_bytes, stats->tx_bytes,
967 iface->active, iface->last_known_valid, stats_rewound);
968
969 if (iface->active && iface->last_known_valid && stats_rewound) {
970 pr_warn_once("qtaguid: iface_stat: %s(%s): "
971 "iface reset its stats unexpectedly\n", __func__,
972 net_dev->name);
973
974 iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes;
975 iface->totals[IFS_TX].packets +=
976 iface->last_known[IFS_TX].packets;
977 iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes;
978 iface->totals[IFS_RX].packets +=
979 iface->last_known[IFS_RX].packets;
980 iface->last_known_valid = false;
981 IF_DEBUG("qtaguid: %s(%s): iface=%p "
982 "used last known bytes rx/tx=%llu/%llu\n", __func__,
983 iface->ifname, iface, iface->last_known[IFS_RX].bytes,
984 iface->last_known[IFS_TX].bytes);
985 }
986}
987
988/*
989 * Create a new entry for tracking the specified interface.
990 * Do nothing if the entry already exists.
991 * Called when an interface is configured with a valid IP address.
992 */
993static void iface_stat_create(struct net_device *net_dev,
994 struct in_ifaddr *ifa)
995{
996 struct in_device *in_dev = NULL;
997 const char *ifname;
998 struct iface_stat *entry;
999 __be32 ipaddr = 0;
1000 struct iface_stat *new_iface;
1001
1002 IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
1003 net_dev ? net_dev->name : "?",
1004 ifa, net_dev);
1005 if (!net_dev) {
1006 pr_err("qtaguid: iface_stat: create(): no net dev\n");
1007 return;
1008 }
1009
1010 ifname = net_dev->name;
1011 if (!ifa) {
1012 in_dev = in_dev_get(net_dev);
1013 if (!in_dev) {
1014 pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
1015 ifname);
1016 return;
1017 }
1018 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
1019 ifname, in_dev);
1020 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1021 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1022 "ifa=%p ifa_label=%s\n",
1023 ifname, ifa,
1024 ifa->ifa_label ? ifa->ifa_label : "(null)");
1025 if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
1026 break;
1027 }
1028 }
1029
1030 if (!ifa) {
1031 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
1032 ifname);
1033 goto done_put;
1034 }
1035 ipaddr = ifa->ifa_local;
1036
1037 spin_lock_bh(&iface_stat_list_lock);
1038 entry = get_iface_entry(ifname);
1039 if (entry != NULL) {
1040 bool activate = !ipv4_is_loopback(ipaddr);
1041 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
1042 ifname, entry);
1043 iface_check_stats_reset_and_adjust(net_dev, entry);
1044 _iface_stat_set_active(entry, net_dev, activate);
1045 IF_DEBUG("qtaguid: %s(%s): "
1046 "tracking now %d on ip=%pI4\n", __func__,
1047 entry->ifname, activate, &ipaddr);
1048 goto done_unlock_put;
1049 } else if (ipv4_is_loopback(ipaddr)) {
1050 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1051 "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
1052 goto done_unlock_put;
1053 }
1054
1055 new_iface = iface_alloc(net_dev);
1056 IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1057 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1058done_unlock_put:
1059 spin_unlock_bh(&iface_stat_list_lock);
1060done_put:
1061 if (in_dev)
1062 in_dev_put(in_dev);
1063}
1064
1065static void iface_stat_create_ipv6(struct net_device *net_dev,
1066 struct inet6_ifaddr *ifa)
1067{
1068 struct in_device *in_dev;
1069 const char *ifname;
1070 struct iface_stat *entry;
1071 struct iface_stat *new_iface;
1072 int addr_type;
1073
1074 IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1075 ifa, net_dev, net_dev ? net_dev->name : "");
1076 if (!net_dev) {
1077 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1078 return;
1079 }
1080 ifname = net_dev->name;
1081
1082 in_dev = in_dev_get(net_dev);
1083 if (!in_dev) {
1084 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1085 ifname);
1086 return;
1087 }
1088
1089 IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1090 ifname, in_dev);
1091
1092 if (!ifa) {
1093 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1094 ifname);
1095 goto done_put;
1096 }
1097 addr_type = ipv6_addr_type(&ifa->addr);
1098
1099 spin_lock_bh(&iface_stat_list_lock);
1100 entry = get_iface_entry(ifname);
1101 if (entry != NULL) {
1102 bool activate = !(addr_type & IPV6_ADDR_LOOPBACK);
1103 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1104 ifname, entry);
1105 iface_check_stats_reset_and_adjust(net_dev, entry);
1106 _iface_stat_set_active(entry, net_dev, activate);
1107 IF_DEBUG("qtaguid: %s(%s): "
1108 "tracking now %d on ip=%pI6c\n", __func__,
1109 entry->ifname, activate, &ifa->addr);
1110 goto done_unlock_put;
1111 } else if (addr_type & IPV6_ADDR_LOOPBACK) {
1112 IF_DEBUG("qtaguid: %s(%s): "
1113 "ignore loopback dev. ip=%pI6c\n", __func__,
1114 ifname, &ifa->addr);
1115 goto done_unlock_put;
1116 }
1117
1118 new_iface = iface_alloc(net_dev);
1119 IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1120 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1121
1122done_unlock_put:
1123 spin_unlock_bh(&iface_stat_list_lock);
1124done_put:
1125 in_dev_put(in_dev);
1126}
1127
1128static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1129{
1130 MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1131 return sock_tag_tree_search(&sock_tag_tree, sk);
1132}
1133
1134static struct sock_tag *get_sock_stat(const struct sock *sk)
1135{
1136 struct sock_tag *sock_tag_entry;
1137 MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1138 if (!sk)
1139 return NULL;
1140 spin_lock_bh(&sock_tag_list_lock);
1141 sock_tag_entry = get_sock_stat_nl(sk);
1142 spin_unlock_bh(&sock_tag_list_lock);
1143 return sock_tag_entry;
1144}
1145
1146static void
1147data_counters_update(struct data_counters *dc, int set,
1148 enum ifs_tx_rx direction, int proto, int bytes)
1149{
1150 switch (proto) {
1151 case IPPROTO_TCP:
1152 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1153 break;
1154 case IPPROTO_UDP:
1155 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1156 break;
1157 case IPPROTO_IP:
1158 default:
1159 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1160 1);
1161 break;
1162 }
1163}
1164
1165/*
1166 * Update stats for the specified interface. Do nothing if the entry
1167 * does not exist (when a device was never configured with an IP address).
1168 * Called when an device is being unregistered.
1169 */
1170static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1171{
1172 struct rtnl_link_stats64 dev_stats, *stats;
1173 struct iface_stat *entry;
1174
1175 stats = dev_get_stats(net_dev, &dev_stats);
1176 spin_lock_bh(&iface_stat_list_lock);
1177 entry = get_iface_entry(net_dev->name);
1178 if (entry == NULL) {
1179 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1180 net_dev->name);
1181 spin_unlock_bh(&iface_stat_list_lock);
1182 return;
1183 }
1184
1185 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1186 net_dev->name, entry);
1187 if (!entry->active) {
1188 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1189 net_dev->name);
1190 spin_unlock_bh(&iface_stat_list_lock);
1191 return;
1192 }
1193
1194 if (stash_only) {
1195 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1196 entry->last_known[IFS_TX].packets = stats->tx_packets;
1197 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1198 entry->last_known[IFS_RX].packets = stats->rx_packets;
1199 entry->last_known_valid = true;
1200 IF_DEBUG("qtaguid: %s(%s): "
1201 "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1202 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1203 spin_unlock_bh(&iface_stat_list_lock);
1204 return;
1205 }
1206 entry->totals[IFS_TX].bytes += stats->tx_bytes;
1207 entry->totals[IFS_TX].packets += stats->tx_packets;
1208 entry->totals[IFS_RX].bytes += stats->rx_bytes;
1209 entry->totals[IFS_RX].packets += stats->rx_packets;
1210 /* We don't need the last_known[] anymore */
1211 entry->last_known_valid = false;
1212 _iface_stat_set_active(entry, net_dev, false);
1213 IF_DEBUG("qtaguid: %s(%s): "
1214 "disable tracking. rx/tx=%llu/%llu\n", __func__,
1215 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1216 spin_unlock_bh(&iface_stat_list_lock);
1217}
1218
1219static void tag_stat_update(struct tag_stat *tag_entry,
1220 enum ifs_tx_rx direction, int proto, int bytes)
1221{
1222 int active_set;
1223 active_set = get_active_counter_set(tag_entry->tn.tag);
1224 MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1225 "dir=%d proto=%d bytes=%d)\n",
1226 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1227 active_set, direction, proto, bytes);
1228 data_counters_update(&tag_entry->counters, active_set, direction,
1229 proto, bytes);
1230 if (tag_entry->parent_counters)
1231 data_counters_update(tag_entry->parent_counters, active_set,
1232 direction, proto, bytes);
1233}
1234
1235/*
1236 * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1237 * the interface.
1238 * iface_entry->tag_stat_list_lock should be held.
1239 */
1240static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1241 tag_t tag)
1242{
1243 struct tag_stat *new_tag_stat_entry = NULL;
1244 IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1245 " (uid=%u)\n", __func__,
1246 iface_entry, tag, get_uid_from_tag(tag));
1247 new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1248 if (!new_tag_stat_entry) {
1249 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1250 goto done;
1251 }
1252 new_tag_stat_entry->tn.tag = tag;
1253 tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1254done:
1255 return new_tag_stat_entry;
1256}
1257
1258static void if_tag_stat_update(const char *ifname, uid_t uid,
1259 const struct sock *sk, enum ifs_tx_rx direction,
1260 int proto, int bytes)
1261{
1262 struct tag_stat *tag_stat_entry;
1263 tag_t tag, acct_tag;
1264 tag_t uid_tag;
1265 struct data_counters *uid_tag_counters;
1266 struct sock_tag *sock_tag_entry;
1267 struct iface_stat *iface_entry;
1268 struct tag_stat *new_tag_stat;
1269 MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1270 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1271 ifname, uid, sk, direction, proto, bytes);
1272
1273
1274 iface_entry = get_iface_entry(ifname);
1275 if (!iface_entry) {
1276 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
1277 ifname);
1278 return;
1279 }
1280 /* It is ok to process data when an iface_entry is inactive */
1281
1282 MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1283 ifname, iface_entry);
1284
1285 /*
1286 * Look for a tagged sock.
1287 * It will have an acct_uid.
1288 */
1289 sock_tag_entry = get_sock_stat(sk);
1290 if (sock_tag_entry) {
1291 tag = sock_tag_entry->tag;
1292 acct_tag = get_atag_from_tag(tag);
1293 uid_tag = get_utag_from_tag(tag);
1294 } else {
1295 acct_tag = make_atag_from_value(0);
1296 tag = combine_atag_with_uid(acct_tag, uid);
1297 uid_tag = make_tag_from_uid(uid);
1298 }
1299 MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1300 " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1301 tag, get_uid_from_tag(tag), iface_entry);
1302 /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1303 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1304
1305 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1306 tag);
1307 if (tag_stat_entry) {
1308 /*
1309 * Updating the {acct_tag, uid_tag} entry handles both stats:
1310 * {0, uid_tag} will also get updated.
1311 */
1312 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1313 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1314 return;
1315 }
1316
1317 /* Loop over tag list under this interface for {0,uid_tag} */
1318 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1319 uid_tag);
1320 if (!tag_stat_entry) {
1321 /* Here: the base uid_tag did not exist */
1322 /*
1323 * No parent counters. So
1324 * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1325 */
1326 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1327 uid_tag_counters = &new_tag_stat->counters;
1328 } else {
1329 uid_tag_counters = &tag_stat_entry->counters;
1330 }
1331
1332 if (acct_tag) {
1333 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1334 new_tag_stat->parent_counters = uid_tag_counters;
1335 }
1336 tag_stat_update(new_tag_stat, direction, proto, bytes);
1337 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1338}
1339
1340static int iface_netdev_event_handler(struct notifier_block *nb,
1341 unsigned long event, void *ptr) {
1342 struct net_device *dev = ptr;
1343
1344 if (unlikely(module_passive))
1345 return NOTIFY_DONE;
1346
1347 IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1348 "ev=0x%lx/%s netdev=%p->name=%s\n",
1349 event, netdev_evt_str(event), dev, dev ? dev->name : "");
1350
1351 switch (event) {
1352 case NETDEV_UP:
1353 iface_stat_create(dev, NULL);
1354 atomic64_inc(&qtu_events.iface_events);
1355 break;
1356 case NETDEV_DOWN:
1357 case NETDEV_UNREGISTER:
1358 iface_stat_update(dev, event == NETDEV_DOWN);
1359 atomic64_inc(&qtu_events.iface_events);
1360 break;
1361 }
1362 return NOTIFY_DONE;
1363}
1364
1365static int iface_inet6addr_event_handler(struct notifier_block *nb,
1366 unsigned long event, void *ptr)
1367{
1368 struct inet6_ifaddr *ifa = ptr;
1369 struct net_device *dev;
1370
1371 if (unlikely(module_passive))
1372 return NOTIFY_DONE;
1373
1374 IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1375 "ev=0x%lx/%s ifa=%p\n",
1376 event, netdev_evt_str(event), ifa);
1377
1378 switch (event) {
1379 case NETDEV_UP:
1380 BUG_ON(!ifa || !ifa->idev);
1381 dev = (struct net_device *)ifa->idev->dev;
1382 iface_stat_create_ipv6(dev, ifa);
1383 atomic64_inc(&qtu_events.iface_events);
1384 break;
1385 case NETDEV_DOWN:
1386 case NETDEV_UNREGISTER:
1387 BUG_ON(!ifa || !ifa->idev);
1388 dev = (struct net_device *)ifa->idev->dev;
1389 iface_stat_update(dev, event == NETDEV_DOWN);
1390 atomic64_inc(&qtu_events.iface_events);
1391 break;
1392 }
1393 return NOTIFY_DONE;
1394}
1395
1396static int iface_inetaddr_event_handler(struct notifier_block *nb,
1397 unsigned long event, void *ptr)
1398{
1399 struct in_ifaddr *ifa = ptr;
1400 struct net_device *dev;
1401
1402 if (unlikely(module_passive))
1403 return NOTIFY_DONE;
1404
1405 IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1406 "ev=0x%lx/%s ifa=%p\n",
1407 event, netdev_evt_str(event), ifa);
1408
1409 switch (event) {
1410 case NETDEV_UP:
1411 BUG_ON(!ifa || !ifa->ifa_dev);
1412 dev = ifa->ifa_dev->dev;
1413 iface_stat_create(dev, ifa);
1414 atomic64_inc(&qtu_events.iface_events);
1415 break;
1416 case NETDEV_DOWN:
1417 case NETDEV_UNREGISTER:
1418 BUG_ON(!ifa || !ifa->ifa_dev);
1419 dev = ifa->ifa_dev->dev;
1420 iface_stat_update(dev, event == NETDEV_DOWN);
1421 atomic64_inc(&qtu_events.iface_events);
1422 break;
1423 }
1424 return NOTIFY_DONE;
1425}
1426
1427static struct notifier_block iface_netdev_notifier_blk = {
1428 .notifier_call = iface_netdev_event_handler,
1429};
1430
1431static struct notifier_block iface_inetaddr_notifier_blk = {
1432 .notifier_call = iface_inetaddr_event_handler,
1433};
1434
1435static struct notifier_block iface_inet6addr_notifier_blk = {
1436 .notifier_call = iface_inet6addr_event_handler,
1437};
1438
1439static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1440{
1441 int err;
1442
1443 iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1444 if (!iface_stat_procdir) {
1445 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1446 err = -1;
1447 goto err;
1448 }
1449
1450 iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename,
1451 proc_iface_perms,
1452 parent_procdir);
1453 if (!iface_stat_all_procfile) {
1454 pr_err("qtaguid: iface_stat: init "
1455 " failed to create stat_all proc entry\n");
1456 err = -1;
1457 goto err_zap_entry;
1458 }
1459 iface_stat_all_procfile->read_proc = iface_stat_all_proc_read;
1460
1461
1462 err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1463 if (err) {
1464 pr_err("qtaguid: iface_stat: init "
1465 "failed to register dev event handler\n");
1466 goto err_zap_all_stats_entry;
1467 }
1468 err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1469 if (err) {
1470 pr_err("qtaguid: iface_stat: init "
1471 "failed to register ipv4 dev event handler\n");
1472 goto err_unreg_nd;
1473 }
1474
1475 err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1476 if (err) {
1477 pr_err("qtaguid: iface_stat: init "
1478 "failed to register ipv6 dev event handler\n");
1479 goto err_unreg_ip4_addr;
1480 }
1481 return 0;
1482
1483err_unreg_ip4_addr:
1484 unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1485err_unreg_nd:
1486 unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1487err_zap_all_stats_entry:
1488 remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1489err_zap_entry:
1490 remove_proc_entry(iface_stat_procdirname, parent_procdir);
1491err:
1492 return err;
1493}
1494
1495static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1496 struct xt_action_param *par)
1497{
1498 struct sock *sk;
1499 unsigned int hook_mask = (1 << par->hooknum);
1500
1501 MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1502 par->hooknum, par->family);
1503
1504 /*
1505 * Let's not abuse the the xt_socket_get*_sk(), or else it will
1506 * return garbage SKs.
1507 */
1508 if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1509 return NULL;
1510
1511 switch (par->family) {
1512 case NFPROTO_IPV6:
1513 sk = xt_socket_get6_sk(skb, par);
1514 break;
1515 case NFPROTO_IPV4:
1516 sk = xt_socket_get4_sk(skb, par);
1517 break;
1518 default:
1519 return NULL;
1520 }
1521
1522 /*
1523 * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1524 * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1525 * Not fixed in 3.0-r3 :(
1526 */
1527 if (sk) {
1528 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1529 "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1530 if (sk->sk_state == TCP_TIME_WAIT) {
1531 xt_socket_put_sk(sk);
1532 sk = NULL;
1533 }
1534 }
1535 return sk;
1536}
1537
1538static void account_for_uid(const struct sk_buff *skb,
1539 const struct sock *alternate_sk, uid_t uid,
1540 struct xt_action_param *par)
1541{
1542 const struct net_device *el_dev;
1543
1544 if (!skb->dev) {
1545 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1546 el_dev = par->in ? : par->out;
1547 } else {
1548 const struct net_device *other_dev;
1549 el_dev = skb->dev;
1550 other_dev = par->in ? : par->out;
1551 if (el_dev != other_dev) {
1552 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1553 "par->(in/out)=%p %s\n",
1554 par->hooknum, el_dev, el_dev->name, other_dev,
1555 other_dev->name);
1556 }
1557 }
1558
1559 if (unlikely(!el_dev)) {
1560 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1561 } else if (unlikely(!el_dev->name)) {
1562 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1563 } else {
1564 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n",
1565 par->hooknum,
1566 el_dev->name,
1567 el_dev->type);
1568
1569 if_tag_stat_update(el_dev->name, uid,
1570 skb->sk ? skb->sk : alternate_sk,
1571 par->in ? IFS_RX : IFS_TX,
1572 ip_hdr(skb)->protocol, skb->len);
1573 }
1574}
1575
1576static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1577{
1578 const struct xt_qtaguid_match_info *info = par->matchinfo;
1579 const struct file *filp;
1580 bool got_sock = false;
1581 struct sock *sk;
1582 uid_t sock_uid;
1583 bool res;
1584
1585 if (unlikely(module_passive))
1586 return (info->match ^ info->invert) == 0;
1587
1588 MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1589 par->hooknum, skb, par->in, par->out, par->family);
1590
1591 atomic64_inc(&qtu_events.match_calls);
1592 if (skb == NULL) {
1593 res = (info->match ^ info->invert) == 0;
1594 goto ret_res;
1595 }
1596
1597 sk = skb->sk;
1598
1599 if (sk == NULL) {
1600 /*
1601 * A missing sk->sk_socket happens when packets are in-flight
1602 * and the matching socket is already closed and gone.
1603 */
1604 sk = qtaguid_find_sk(skb, par);
1605 /*
1606 * If we got the socket from the find_sk(), we will need to put
1607 * it back, as nf_tproxy_get_sock_v4() got it.
1608 */
1609 got_sock = sk;
1610 if (sk)
1611 atomic64_inc(&qtu_events.match_found_sk_in_ct);
1612 else
1613 atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1614 } else {
1615 atomic64_inc(&qtu_events.match_found_sk);
1616 }
1617 MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n",
1618 par->hooknum, sk, got_sock, ip_hdr(skb)->protocol);
1619 if (sk != NULL) {
1620 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1621 par->hooknum, sk, sk->sk_socket,
1622 sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1623 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1624 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1625 par->hooknum, filp ? filp->f_cred->fsuid : -1);
1626 }
1627
1628 if (sk == NULL || sk->sk_socket == NULL) {
1629 /*
1630 * Here, the qtaguid_find_sk() using connection tracking
1631 * couldn't find the owner, so for now we just count them
1632 * against the system.
1633 */
1634 /*
1635 * TODO: unhack how to force just accounting.
1636 * For now we only do iface stats when the uid-owner is not
1637 * requested.
1638 */
1639 if (!(info->match & XT_QTAGUID_UID))
1640 account_for_uid(skb, sk, 0, par);
1641 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1642 par->hooknum,
1643 sk ? sk->sk_socket : NULL);
1644 res = (info->match ^ info->invert) == 0;
1645 atomic64_inc(&qtu_events.match_no_sk);
1646 goto put_sock_ret_res;
1647 } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1648 res = false;
1649 goto put_sock_ret_res;
1650 }
1651 filp = sk->sk_socket->file;
1652 if (filp == NULL) {
1653 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1654 account_for_uid(skb, sk, 0, par);
1655 res = ((info->match ^ info->invert) &
1656 (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1657 atomic64_inc(&qtu_events.match_no_sk_file);
1658 goto put_sock_ret_res;
1659 }
1660 sock_uid = filp->f_cred->fsuid;
1661 /*
1662 * TODO: unhack how to force just accounting.
1663 * For now we only do iface stats when the uid-owner is not requested
1664 */
1665 if (!(info->match & XT_QTAGUID_UID))
1666 account_for_uid(skb, sk, sock_uid, par);
1667
1668 /*
1669 * The following two tests fail the match when:
1670 * id not in range AND no inverted condition requested
1671 * or id in range AND inverted condition requested
1672 * Thus (!a && b) || (a && !b) == a ^ b
1673 */
1674 if (info->match & XT_QTAGUID_UID)
1675 if ((filp->f_cred->fsuid >= info->uid_min &&
1676 filp->f_cred->fsuid <= info->uid_max) ^
1677 !(info->invert & XT_QTAGUID_UID)) {
1678 MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1679 par->hooknum);
1680 res = false;
1681 goto put_sock_ret_res;
1682 }
1683 if (info->match & XT_QTAGUID_GID)
1684 if ((filp->f_cred->fsgid >= info->gid_min &&
1685 filp->f_cred->fsgid <= info->gid_max) ^
1686 !(info->invert & XT_QTAGUID_GID)) {
1687 MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1688 par->hooknum);
1689 res = false;
1690 goto put_sock_ret_res;
1691 }
1692
1693 MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1694 res = true;
1695
1696put_sock_ret_res:
1697 if (got_sock)
1698 xt_socket_put_sk(sk);
1699ret_res:
1700 MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1701 return res;
1702}
1703
1704#ifdef DDEBUG
1705/* This function is not in xt_qtaguid_print.c because of locks visibility */
1706static void prdebug_full_state(int indent_level, const char *fmt, ...)
1707{
1708 va_list args;
1709 char *fmt_buff;
1710 char *buff;
1711
1712 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1713 return;
1714
1715 fmt_buff = kasprintf(GFP_ATOMIC,
1716 "qtaguid: %s(): %s {\n", __func__, fmt);
1717 BUG_ON(!fmt_buff);
1718 va_start(args, fmt);
1719 buff = kvasprintf(GFP_ATOMIC,
1720 fmt_buff, args);
1721 BUG_ON(!buff);
1722 pr_debug("%s", buff);
1723 kfree(fmt_buff);
1724 kfree(buff);
1725 va_end(args);
1726
1727 spin_lock_bh(&sock_tag_list_lock);
1728 prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1729 spin_unlock_bh(&sock_tag_list_lock);
1730
1731 spin_lock_bh(&sock_tag_list_lock);
1732 spin_lock_bh(&uid_tag_data_tree_lock);
1733 prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1734 prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1735 spin_unlock_bh(&uid_tag_data_tree_lock);
1736 spin_unlock_bh(&sock_tag_list_lock);
1737
1738 spin_lock_bh(&iface_stat_list_lock);
1739 prdebug_iface_stat_list(indent_level, &iface_stat_list);
1740 spin_unlock_bh(&iface_stat_list_lock);
1741
1742 pr_debug("qtaguid: %s(): }\n", __func__);
1743}
1744#else
1745static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1746#endif
1747
1748/*
1749 * Procfs reader to get all active socket tags using style "1)" as described in
1750 * fs/proc/generic.c
1751 */
1752static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1753 off_t items_to_skip, int char_count, int *eof,
1754 void *data)
1755{
1756 char *outp = page;
1757 int len;
1758 uid_t uid;
1759 struct rb_node *node;
1760 struct sock_tag *sock_tag_entry;
1761 int item_index = 0;
1762 int indent_level = 0;
1763 long f_count;
1764
1765 if (unlikely(module_passive)) {
1766 *eof = 1;
1767 return 0;
1768 }
1769
1770 if (*eof)
1771 return 0;
1772
1773 CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
1774 page, items_to_skip, char_count, *eof);
1775
1776 spin_lock_bh(&sock_tag_list_lock);
1777 for (node = rb_first(&sock_tag_tree);
1778 node;
1779 node = rb_next(node)) {
1780 if (item_index++ < items_to_skip)
1781 continue;
1782 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1783 uid = get_uid_from_tag(sock_tag_entry->tag);
1784 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1785 "pid=%u\n",
1786 sock_tag_entry->sk,
1787 sock_tag_entry->tag,
1788 uid,
1789 sock_tag_entry->pid
1790 );
1791 f_count = atomic_long_read(
1792 &sock_tag_entry->socket->file->f_count);
1793 len = snprintf(outp, char_count,
1794 "sock=%p tag=0x%llx (uid=%u) pid=%u "
1795 "f_count=%lu\n",
1796 sock_tag_entry->sk,
1797 sock_tag_entry->tag, uid,
1798 sock_tag_entry->pid, f_count);
1799 if (len >= char_count) {
1800 spin_unlock_bh(&sock_tag_list_lock);
1801 *outp = '\0';
1802 return outp - page;
1803 }
1804 outp += len;
1805 char_count -= len;
1806 (*num_items_returned)++;
1807 }
1808 spin_unlock_bh(&sock_tag_list_lock);
1809
1810 if (item_index++ >= items_to_skip) {
1811 len = snprintf(outp, char_count,
1812 "events: sockets_tagged=%llu "
1813 "sockets_untagged=%llu "
1814 "counter_set_changes=%llu "
1815 "delete_cmds=%llu "
1816 "iface_events=%llu "
1817 "match_calls=%llu "
1818 "match_found_sk=%llu "
1819 "match_found_sk_in_ct=%llu "
1820 "match_found_no_sk_in_ct=%llu "
1821 "match_no_sk=%llu "
1822 "match_no_sk_file=%llu\n",
1823 atomic64_read(&qtu_events.sockets_tagged),
1824 atomic64_read(&qtu_events.sockets_untagged),
1825 atomic64_read(&qtu_events.counter_set_changes),
1826 atomic64_read(&qtu_events.delete_cmds),
1827 atomic64_read(&qtu_events.iface_events),
1828 atomic64_read(&qtu_events.match_calls),
1829 atomic64_read(&qtu_events.match_found_sk),
1830 atomic64_read(&qtu_events.match_found_sk_in_ct),
1831 atomic64_read(
1832 &qtu_events.match_found_no_sk_in_ct),
1833 atomic64_read(&qtu_events.match_no_sk),
1834 atomic64_read(&qtu_events.match_no_sk_file));
1835 if (len >= char_count) {
1836 *outp = '\0';
1837 return outp - page;
1838 }
1839 outp += len;
1840 char_count -= len;
1841 (*num_items_returned)++;
1842 }
1843
1844 /* Count the following as part of the last item_index */
1845 if (item_index > items_to_skip) {
1846 prdebug_full_state(indent_level, "proc ctrl");
1847 }
1848
1849 *eof = 1;
1850 return outp - page;
1851}
1852
1853/*
1854 * Delete socket tags, and stat tags associated with a given
1855 * accouting tag and uid.
1856 */
1857static int ctrl_cmd_delete(const char *input)
1858{
1859 char cmd;
1860 uid_t uid;
1861 uid_t entry_uid;
1862 tag_t acct_tag;
1863 tag_t tag;
1864 int res, argc;
1865 struct iface_stat *iface_entry;
1866 struct rb_node *node;
1867 struct sock_tag *st_entry;
1868 struct rb_root st_to_free_tree = RB_ROOT;
1869 struct tag_stat *ts_entry;
1870 struct tag_counter_set *tcs_entry;
1871 struct tag_ref *tr_entry;
1872 struct uid_tag_data *utd_entry;
1873
1874 argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1875 CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1876 "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1877 acct_tag, uid);
1878 if (argc < 2) {
1879 res = -EINVAL;
1880 goto err;
1881 }
1882 if (!valid_atag(acct_tag)) {
1883 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
1884 res = -EINVAL;
1885 goto err;
1886 }
1887 if (argc < 3) {
1888 uid = current_fsuid();
1889 } else if (!can_impersonate_uid(uid)) {
1890 pr_info("qtaguid: ctrl_delete(%s): "
1891 "insufficient priv from pid=%u tgid=%u uid=%u\n",
1892 input, current->pid, current->tgid, current_fsuid());
1893 res = -EPERM;
1894 goto err;
1895 }
1896
1897 tag = combine_atag_with_uid(acct_tag, uid);
1898 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1899 "looking for tag=0x%llx (uid=%u)\n",
1900 input, tag, uid);
1901
1902 /* Delete socket tags */
1903 spin_lock_bh(&sock_tag_list_lock);
1904 node = rb_first(&sock_tag_tree);
1905 while (node) {
1906 st_entry = rb_entry(node, struct sock_tag, sock_node);
1907 entry_uid = get_uid_from_tag(st_entry->tag);
1908 node = rb_next(node);
1909 if (entry_uid != uid)
1910 continue;
1911
1912 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
1913 input, st_entry->tag, entry_uid);
1914
1915 if (!acct_tag || st_entry->tag == tag) {
1916 rb_erase(&st_entry->sock_node, &sock_tag_tree);
1917 /* Can't sockfd_put() within spinlock, do it later. */
1918 sock_tag_tree_insert(st_entry, &st_to_free_tree);
1919 tr_entry = lookup_tag_ref(st_entry->tag, NULL);
1920 BUG_ON(tr_entry->num_sock_tags <= 0);
1921 tr_entry->num_sock_tags--;
1922 /*
1923 * TODO: remove if, and start failing.
1924 * This is a hack to work around the fact that in some
1925 * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
1926 * and are trying to work around apps
1927 * that didn't open the /dev/xt_qtaguid.
1928 */
1929 if (st_entry->list.next && st_entry->list.prev)
1930 list_del(&st_entry->list);
1931 }
1932 }
1933 spin_unlock_bh(&sock_tag_list_lock);
1934
1935 sock_tag_tree_erase(&st_to_free_tree);
1936
1937 /* Delete tag counter-sets */
1938 spin_lock_bh(&tag_counter_set_list_lock);
1939 /* Counter sets are only on the uid tag, not full tag */
1940 tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1941 if (tcs_entry) {
1942 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1943 "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
1944 input,
1945 tcs_entry->tn.tag,
1946 get_uid_from_tag(tcs_entry->tn.tag),
1947 tcs_entry->active_set);
1948 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
1949 kfree(tcs_entry);
1950 }
1951 spin_unlock_bh(&tag_counter_set_list_lock);
1952
1953 /*
1954 * If acct_tag is 0, then all entries belonging to uid are
1955 * erased.
1956 */
1957 spin_lock_bh(&iface_stat_list_lock);
1958 list_for_each_entry(iface_entry, &iface_stat_list, list) {
1959 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1960 node = rb_first(&iface_entry->tag_stat_tree);
1961 while (node) {
1962 ts_entry = rb_entry(node, struct tag_stat, tn.node);
1963 entry_uid = get_uid_from_tag(ts_entry->tn.tag);
1964 node = rb_next(node);
1965
1966 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1967 "ts tag=0x%llx (uid=%u)\n",
1968 input, ts_entry->tn.tag, entry_uid);
1969
1970 if (entry_uid != uid)
1971 continue;
1972 if (!acct_tag || ts_entry->tn.tag == tag) {
1973 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1974 "erase ts: %s 0x%llx %u\n",
1975 input, iface_entry->ifname,
1976 get_atag_from_tag(ts_entry->tn.tag),
1977 entry_uid);
1978 rb_erase(&ts_entry->tn.node,
1979 &iface_entry->tag_stat_tree);
1980 kfree(ts_entry);
1981 }
1982 }
1983 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1984 }
1985 spin_unlock_bh(&iface_stat_list_lock);
1986
1987 /* Cleanup the uid_tag_data */
1988 spin_lock_bh(&uid_tag_data_tree_lock);
1989 node = rb_first(&uid_tag_data_tree);
1990 while (node) {
1991 utd_entry = rb_entry(node, struct uid_tag_data, node);
1992 entry_uid = utd_entry->uid;
1993 node = rb_next(node);
1994
1995 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1996 "utd uid=%u\n",
1997 input, entry_uid);
1998
1999 if (entry_uid != uid)
2000 continue;
2001 /*
2002 * Go over the tag_refs, and those that don't have
2003 * sock_tags using them are freed.
2004 */
2005 put_tag_ref_tree(tag, utd_entry);
2006 put_utd_entry(utd_entry);
2007 }
2008 spin_unlock_bh(&uid_tag_data_tree_lock);
2009
2010 atomic64_inc(&qtu_events.delete_cmds);
2011 res = 0;
2012
2013err:
2014 return res;
2015}
2016
2017static int ctrl_cmd_counter_set(const char *input)
2018{
2019 char cmd;
2020 uid_t uid = 0;
2021 tag_t tag;
2022 int res, argc;
2023 struct tag_counter_set *tcs;
2024 int counter_set;
2025
2026 argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2027 CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2028 "set=%d uid=%u\n", input, argc, cmd,
2029 counter_set, uid);
2030 if (argc != 3) {
2031 res = -EINVAL;
2032 goto err;
2033 }
2034 if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2035 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2036 input);
2037 res = -EINVAL;
2038 goto err;
2039 }
2040 if (!can_manipulate_uids()) {
2041 pr_info("qtaguid: ctrl_counterset(%s): "
2042 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2043 input, current->pid, current->tgid, current_fsuid());
2044 res = -EPERM;
2045 goto err;
2046 }
2047
2048 tag = make_tag_from_uid(uid);
2049 spin_lock_bh(&tag_counter_set_list_lock);
2050 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2051 if (!tcs) {
2052 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2053 if (!tcs) {
2054 spin_unlock_bh(&tag_counter_set_list_lock);
2055 pr_err("qtaguid: ctrl_counterset(%s): "
2056 "failed to alloc counter set\n",
2057 input);
2058 res = -ENOMEM;
2059 goto err;
2060 }
2061 tcs->tn.tag = tag;
2062 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2063 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2064 "(uid=%u) set=%d\n",
2065 input, tag, get_uid_from_tag(tag), counter_set);
2066 }
2067 tcs->active_set = counter_set;
2068 spin_unlock_bh(&tag_counter_set_list_lock);
2069 atomic64_inc(&qtu_events.counter_set_changes);
2070 res = 0;
2071
2072err:
2073 return res;
2074}
2075
2076static int ctrl_cmd_tag(const char *input)
2077{
2078 char cmd;
2079 int sock_fd = 0;
2080 uid_t uid = 0;
2081 tag_t acct_tag = make_atag_from_value(0);
2082 tag_t full_tag;
2083 struct socket *el_socket;
2084 int res, argc;
2085 struct sock_tag *sock_tag_entry;
2086 struct tag_ref *tag_ref_entry;
2087 struct uid_tag_data *uid_tag_data_entry;
2088 struct proc_qtu_data *pqd_entry;
2089
2090 /* Unassigned args will get defaulted later. */
2091 argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2092 CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2093 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2094 acct_tag, uid);
2095 if (argc < 2) {
2096 res = -EINVAL;
2097 goto err;
2098 }
2099 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2100 if (!el_socket) {
2101 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2102 " sock_fd=%d err=%d\n", input, sock_fd, res);
2103 goto err;
2104 }
2105 CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2106 input, atomic_long_read(&el_socket->file->f_count),
2107 el_socket->sk);
2108 if (argc < 3) {
2109 acct_tag = make_atag_from_value(0);
2110 } else if (!valid_atag(acct_tag)) {
2111 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2112 res = -EINVAL;
2113 goto err_put;
2114 }
2115 CT_DEBUG("qtaguid: ctrl_tag(%s): "
2116 "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2117 "in_group=%d in_egroup=%d\n",
2118 input, current->pid, current->tgid, current_uid(),
2119 current_euid(), current_fsuid(),
2120 in_group_p(proc_ctrl_write_gid),
2121 in_egroup_p(proc_ctrl_write_gid));
2122 if (argc < 4) {
2123 uid = current_fsuid();
2124 } else if (!can_impersonate_uid(uid)) {
2125 pr_info("qtaguid: ctrl_tag(%s): "
2126 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2127 input, current->pid, current->tgid, current_fsuid());
2128 res = -EPERM;
2129 goto err_put;
2130 }
2131 full_tag = combine_atag_with_uid(acct_tag, uid);
2132
2133 spin_lock_bh(&sock_tag_list_lock);
2134 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2135 tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2136 if (IS_ERR(tag_ref_entry)) {
2137 res = PTR_ERR(tag_ref_entry);
2138 spin_unlock_bh(&sock_tag_list_lock);
2139 goto err_put;
2140 }
2141 tag_ref_entry->num_sock_tags++;
2142 if (sock_tag_entry) {
2143 struct tag_ref *prev_tag_ref_entry;
2144
2145 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2146 "st@%p ...->f_count=%ld\n",
2147 input, el_socket->sk, sock_tag_entry,
2148 atomic_long_read(&el_socket->file->f_count));
2149 /*
2150 * This is a re-tagging, so release the sock_fd that was
2151 * locked at the time of the 1st tagging.
2152 * There is still the ref from this call's sockfd_lookup() so
2153 * it can be done within the spinlock.
2154 */
2155 sockfd_put(sock_tag_entry->socket);
2156 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2157 &uid_tag_data_entry);
2158 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2159 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2160 prev_tag_ref_entry->num_sock_tags--;
2161 sock_tag_entry->tag = full_tag;
2162 } else {
2163 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2164 input, el_socket->sk);
2165 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2166 GFP_ATOMIC);
2167 if (!sock_tag_entry) {
2168 pr_err("qtaguid: ctrl_tag(%s): "
2169 "socket tag alloc failed\n",
2170 input);
2171 spin_unlock_bh(&sock_tag_list_lock);
2172 res = -ENOMEM;
2173 goto err_tag_unref_put;
2174 }
2175 sock_tag_entry->sk = el_socket->sk;
2176 sock_tag_entry->socket = el_socket;
2177 sock_tag_entry->pid = current->tgid;
2178 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2179 uid);
2180 spin_lock_bh(&uid_tag_data_tree_lock);
2181 pqd_entry = proc_qtu_data_tree_search(
2182 &proc_qtu_data_tree, current->tgid);
2183 /*
2184 * TODO: remove if, and start failing.
2185 * At first, we want to catch user-space code that is not
2186 * opening the /dev/xt_qtaguid.
2187 */
2188 if (IS_ERR_OR_NULL(pqd_entry))
2189 pr_warn_once(
2190 "qtaguid: %s(): "
2191 "User space forgot to open /dev/xt_qtaguid? "
2192 "pid=%u tgid=%u uid=%u\n", __func__,
2193 current->pid, current->tgid,
2194 current_fsuid());
2195 else
2196 list_add(&sock_tag_entry->list,
2197 &pqd_entry->sock_tag_list);
2198 spin_unlock_bh(&uid_tag_data_tree_lock);
2199
2200 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2201 atomic64_inc(&qtu_events.sockets_tagged);
2202 }
2203 spin_unlock_bh(&sock_tag_list_lock);
2204 /* We keep the ref to the socket (file) until it is untagged */
2205 CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2206 input, sock_tag_entry,
2207 atomic_long_read(&el_socket->file->f_count));
2208 return 0;
2209
2210err_tag_unref_put:
2211 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2212 tag_ref_entry->num_sock_tags--;
2213 free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2214err_put:
2215 CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2216 input, atomic_long_read(&el_socket->file->f_count) - 1);
2217 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2218 sockfd_put(el_socket);
2219 return res;
2220
2221err:
2222 CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2223 return res;
2224}
2225
2226static int ctrl_cmd_untag(const char *input)
2227{
2228 char cmd;
2229 int sock_fd = 0;
2230 struct socket *el_socket;
2231 int res, argc;
2232 struct sock_tag *sock_tag_entry;
2233 struct tag_ref *tag_ref_entry;
2234 struct uid_tag_data *utd_entry;
2235 struct proc_qtu_data *pqd_entry;
2236
2237 argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2238 CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2239 input, argc, cmd, sock_fd);
2240 if (argc < 2) {
2241 res = -EINVAL;
2242 goto err;
2243 }
2244 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2245 if (!el_socket) {
2246 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2247 " sock_fd=%d err=%d\n", input, sock_fd, res);
2248 goto err;
2249 }
2250 CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2251 input, atomic_long_read(&el_socket->file->f_count),
2252 el_socket->sk);
2253 spin_lock_bh(&sock_tag_list_lock);
2254 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2255 if (!sock_tag_entry) {
2256 spin_unlock_bh(&sock_tag_list_lock);
2257 res = -EINVAL;
2258 goto err_put;
2259 }
2260 /*
2261 * The socket already belongs to the current process
2262 * so it can do whatever it wants to it.
2263 */
2264 rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2265
2266 tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2267 BUG_ON(!tag_ref_entry);
2268 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2269 spin_lock_bh(&uid_tag_data_tree_lock);
2270 pqd_entry = proc_qtu_data_tree_search(
2271 &proc_qtu_data_tree, current->tgid);
2272 /*
2273 * TODO: remove if, and start failing.
2274 * At first, we want to catch user-space code that is not
2275 * opening the /dev/xt_qtaguid.
2276 */
2277 if (IS_ERR_OR_NULL(pqd_entry))
2278 pr_warn_once("qtaguid: %s(): "
2279 "User space forgot to open /dev/xt_qtaguid? "
2280 "pid=%u tgid=%u uid=%u\n", __func__,
2281 current->pid, current->tgid, current_fsuid());
2282 else
2283 list_del(&sock_tag_entry->list);
2284 spin_unlock_bh(&uid_tag_data_tree_lock);
2285 /*
2286 * We don't free tag_ref from the utd_entry here,
2287 * only during a cmd_delete().
2288 */
2289 tag_ref_entry->num_sock_tags--;
2290 spin_unlock_bh(&sock_tag_list_lock);
2291 /*
2292 * Release the sock_fd that was grabbed at tag time,
2293 * and once more for the sockfd_lookup() here.
2294 */
2295 sockfd_put(sock_tag_entry->socket);
2296 CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2297 input, sock_tag_entry,
2298 atomic_long_read(&el_socket->file->f_count) - 1);
2299 sockfd_put(el_socket);
2300
2301 kfree(sock_tag_entry);
2302 atomic64_inc(&qtu_events.sockets_untagged);
2303
2304 return 0;
2305
2306err_put:
2307 CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2308 input, atomic_long_read(&el_socket->file->f_count) - 1);
2309 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2310 sockfd_put(el_socket);
2311 return res;
2312
2313err:
2314 CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2315 return res;
2316}
2317
2318static int qtaguid_ctrl_parse(const char *input, int count)
2319{
2320 char cmd;
2321 int res;
2322
2323 cmd = input[0];
2324 /* Collect params for commands */
2325 switch (cmd) {
2326 case 'd':
2327 res = ctrl_cmd_delete(input);
2328 break;
2329
2330 case 's':
2331 res = ctrl_cmd_counter_set(input);
2332 break;
2333
2334 case 't':
2335 res = ctrl_cmd_tag(input);
2336 break;
2337
2338 case 'u':
2339 res = ctrl_cmd_untag(input);
2340 break;
2341
2342 default:
2343 res = -EINVAL;
2344 goto err;
2345 }
2346 if (!res)
2347 res = count;
2348err:
2349 CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2350 return res;
2351}
2352
2353#define MAX_QTAGUID_CTRL_INPUT_LEN 255
2354static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2355 unsigned long count, void *data)
2356{
2357 char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2358
2359 if (unlikely(module_passive))
2360 return count;
2361
2362 if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2363 return -EINVAL;
2364
2365 if (copy_from_user(input_buf, buffer, count))
2366 return -EFAULT;
2367
2368 input_buf[count] = '\0';
2369 return qtaguid_ctrl_parse(input_buf, count);
2370}
2371
2372struct proc_print_info {
2373 char *outp;
2374 char **num_items_returned;
2375 struct iface_stat *iface_entry;
2376 struct tag_stat *ts_entry;
2377 int item_index;
2378 int items_to_skip;
2379 int char_count;
2380};
2381
2382static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
2383{
2384 int len;
2385 struct data_counters *cnts;
2386
2387 if (!ppi->item_index) {
2388 if (ppi->item_index++ < ppi->items_to_skip)
2389 return 0;
2390 len = snprintf(ppi->outp, ppi->char_count,
2391 "idx iface acct_tag_hex uid_tag_int cnt_set "
2392 "rx_bytes rx_packets "
2393 "tx_bytes tx_packets "
2394 "rx_tcp_bytes rx_tcp_packets "
2395 "rx_udp_bytes rx_udp_packets "
2396 "rx_other_bytes rx_other_packets "
2397 "tx_tcp_bytes tx_tcp_packets "
2398 "tx_udp_bytes tx_udp_packets "
2399 "tx_other_bytes tx_other_packets\n");
2400 } else {
2401 tag_t tag = ppi->ts_entry->tn.tag;
2402 uid_t stat_uid = get_uid_from_tag(tag);
2403
2404 if (!can_read_other_uid_stats(stat_uid)) {
2405 CT_DEBUG("qtaguid: stats line: "
2406 "%s 0x%llx %u: insufficient priv "
2407 "from pid=%u tgid=%u uid=%u\n",
2408 ppi->iface_entry->ifname,
2409 get_atag_from_tag(tag), stat_uid,
2410 current->pid, current->tgid, current_fsuid());
2411 return 0;
2412 }
2413 if (ppi->item_index++ < ppi->items_to_skip)
2414 return 0;
2415 cnts = &ppi->ts_entry->counters;
2416 len = snprintf(
2417 ppi->outp, ppi->char_count,
2418 "%d %s 0x%llx %u %u "
2419 "%llu %llu "
2420 "%llu %llu "
2421 "%llu %llu "
2422 "%llu %llu "
2423 "%llu %llu "
2424 "%llu %llu "
2425 "%llu %llu "
2426 "%llu %llu\n",
2427 ppi->item_index,
2428 ppi->iface_entry->ifname,
2429 get_atag_from_tag(tag),
2430 stat_uid,
2431 cnt_set,
2432 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2433 dc_sum_packets(cnts, cnt_set, IFS_RX),
2434 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2435 dc_sum_packets(cnts, cnt_set, IFS_TX),
2436 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2437 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2438 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2439 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2440 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2441 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2442 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2443 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2444 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2445 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2446 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2447 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2448 }
2449 return len;
2450}
2451
2452static bool pp_sets(struct proc_print_info *ppi)
2453{
2454 int len;
2455 int counter_set;
2456 for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2457 counter_set++) {
2458 len = pp_stats_line(ppi, counter_set);
2459 if (len >= ppi->char_count) {
2460 *ppi->outp = '\0';
2461 return false;
2462 }
2463 if (len) {
2464 ppi->outp += len;
2465 ppi->char_count -= len;
2466 (*ppi->num_items_returned)++;
2467 }
2468 }
2469 return true;
2470}
2471
2472/*
2473 * Procfs reader to get all tag stats using style "1)" as described in
2474 * fs/proc/generic.c
2475 * Groups all protocols tx/rx bytes.
2476 */
2477static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
2478 off_t items_to_skip, int char_count, int *eof,
2479 void *data)
2480{
2481 struct proc_print_info ppi;
2482 int len;
2483
2484 ppi.outp = page;
2485 ppi.item_index = 0;
2486 ppi.char_count = char_count;
2487 ppi.num_items_returned = num_items_returned;
2488 ppi.items_to_skip = items_to_skip;
2489
2490 if (unlikely(module_passive)) {
2491 len = pp_stats_line(&ppi, 0);
2492 /* The header should always be shorter than the buffer. */
2493 BUG_ON(len >= ppi.char_count);
2494 (*num_items_returned)++;
2495 *eof = 1;
2496 return len;
2497 }
2498
2499 CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
2500 "char_count=%d *eof=%d\n", page, *num_items_returned,
2501 items_to_skip, char_count, *eof);
2502
2503 if (*eof)
2504 return 0;
2505
2506 /* The idx is there to help debug when things go belly up. */
2507 len = pp_stats_line(&ppi, 0);
2508 /* Don't advance the outp unless the whole line was printed */
2509 if (len >= ppi.char_count) {
2510 *ppi.outp = '\0';
2511 return ppi.outp - page;
2512 }
2513 if (len) {
2514 ppi.outp += len;
2515 ppi.char_count -= len;
2516 (*num_items_returned)++;
2517 }
2518
2519 spin_lock_bh(&iface_stat_list_lock);
2520 list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
2521 struct rb_node *node;
2522 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
2523 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
2524 node;
2525 node = rb_next(node)) {
2526 ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
2527 if (!pp_sets(&ppi)) {
2528 spin_unlock_bh(
2529 &ppi.iface_entry->tag_stat_list_lock);
2530 spin_unlock_bh(&iface_stat_list_lock);
2531 return ppi.outp - page;
2532 }
2533 }
2534 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
2535 }
2536 spin_unlock_bh(&iface_stat_list_lock);
2537
2538 *eof = 1;
2539 return ppi.outp - page;
2540}
2541
2542/*------------------------------------------*/
2543static int qtudev_open(struct inode *inode, struct file *file)
2544{
2545 struct uid_tag_data *utd_entry;
2546 struct proc_qtu_data *pqd_entry;
2547 struct proc_qtu_data *new_pqd_entry;
2548 int res;
2549 bool utd_entry_found;
2550
2551 if (unlikely(qtu_proc_handling_passive))
2552 return 0;
2553
2554 DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2555 current->pid, current->tgid, current_fsuid());
2556
2557 spin_lock_bh(&uid_tag_data_tree_lock);
2558
2559 /* Look for existing uid data, or alloc one. */
2560 utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2561 if (IS_ERR_OR_NULL(utd_entry)) {
2562 res = PTR_ERR(utd_entry);
2563 goto err;
2564 }
2565
2566 /* Look for existing PID based proc_data */
2567 pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2568 current->tgid);
2569 if (pqd_entry) {
2570 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2571 "%s already opened\n",
2572 current->pid, current->tgid, current_fsuid(),
2573 QTU_DEV_NAME);
2574 res = -EBUSY;
2575 goto err_unlock_free_utd;
2576 }
2577
2578 new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2579 if (!new_pqd_entry) {
2580 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2581 "proc data alloc failed\n",
2582 current->pid, current->tgid, current_fsuid());
2583 res = -ENOMEM;
2584 goto err_unlock_free_utd;
2585 }
2586 new_pqd_entry->pid = current->tgid;
2587 INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2588 new_pqd_entry->parent_tag_data = utd_entry;
2589 utd_entry->num_pqd++;
2590
2591 proc_qtu_data_tree_insert(new_pqd_entry,
2592 &proc_qtu_data_tree);
2593
2594 spin_unlock_bh(&uid_tag_data_tree_lock);
2595 DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2596 current_fsuid(), new_pqd_entry);
2597 file->private_data = new_pqd_entry;
2598 return 0;
2599
2600err_unlock_free_utd:
2601 if (!utd_entry_found) {
2602 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2603 kfree(utd_entry);
2604 }
2605 spin_unlock_bh(&uid_tag_data_tree_lock);
2606err:
2607 return res;
2608}
2609
2610static int qtudev_release(struct inode *inode, struct file *file)
2611{
2612 struct proc_qtu_data *pqd_entry = file->private_data;
2613 struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data;
2614 struct sock_tag *st_entry;
2615 struct rb_root st_to_free_tree = RB_ROOT;
2616 struct list_head *entry, *next;
2617 struct tag_ref *tr;
2618
2619 if (unlikely(qtu_proc_handling_passive))
2620 return 0;
2621
2622 /*
2623 * Do not trust the current->pid, it might just be a kworker cleaning
2624 * up after a dead proc.
2625 */
2626 DR_DEBUG("qtaguid: qtudev_release(): "
2627 "pid=%u tgid=%u uid=%u "
2628 "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2629 current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2630 pqd_entry, pqd_entry->pid, utd_entry,
2631 utd_entry->num_active_tags);
2632
2633 spin_lock_bh(&sock_tag_list_lock);
2634 spin_lock_bh(&uid_tag_data_tree_lock);
2635
2636 list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2637 st_entry = list_entry(entry, struct sock_tag, list);
2638 DR_DEBUG("qtaguid: %s(): "
2639 "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2640 __func__,
2641 st_entry, st_entry->sk,
2642 current->pid, current->tgid,
2643 pqd_entry->parent_tag_data->uid);
2644
2645 utd_entry = uid_tag_data_tree_search(
2646 &uid_tag_data_tree,
2647 get_uid_from_tag(st_entry->tag));
2648 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2649 DR_DEBUG("qtaguid: %s(): "
2650 "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2651 st_entry->tag, utd_entry);
2652 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2653 st_entry->tag);
2654 BUG_ON(!tr);
2655 BUG_ON(tr->num_sock_tags <= 0);
2656 tr->num_sock_tags--;
2657 free_tag_ref_from_utd_entry(tr, utd_entry);
2658
2659 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2660 list_del(&st_entry->list);
2661 /* Can't sockfd_put() within spinlock, do it later. */
2662 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2663
2664 /*
2665 * Try to free the utd_entry if no other proc_qtu_data is
2666 * using it (num_pqd is 0) and it doesn't have active tags
2667 * (num_active_tags is 0).
2668 */
2669 put_utd_entry(utd_entry);
2670 }
2671
2672 rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2673 BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2674 pqd_entry->parent_tag_data->num_pqd--;
2675 put_utd_entry(pqd_entry->parent_tag_data);
2676 kfree(pqd_entry);
2677 file->private_data = NULL;
2678
2679 spin_unlock_bh(&uid_tag_data_tree_lock);
2680 spin_unlock_bh(&sock_tag_list_lock);
2681
2682
2683 sock_tag_tree_erase(&st_to_free_tree);
2684
2685 prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2686 current->pid, current->tgid);
2687 return 0;
2688}
2689
2690/*------------------------------------------*/
2691static const struct file_operations qtudev_fops = {
2692 .owner = THIS_MODULE,
2693 .open = qtudev_open,
2694 .release = qtudev_release,
2695};
2696
2697static struct miscdevice qtu_device = {
2698 .minor = MISC_DYNAMIC_MINOR,
2699 .name = QTU_DEV_NAME,
2700 .fops = &qtudev_fops,
2701 /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2702};
2703
2704/*------------------------------------------*/
2705static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2706{
2707 int ret;
2708 *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2709 if (!*res_procdir) {
2710 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2711 ret = -ENOMEM;
2712 goto no_dir;
2713 }
2714
2715 xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
2716 *res_procdir);
2717 if (!xt_qtaguid_ctrl_file) {
2718 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2719 " file\n");
2720 ret = -ENOMEM;
2721 goto no_ctrl_entry;
2722 }
2723 xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
2724 xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
2725
2726 xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
2727 *res_procdir);
2728 if (!xt_qtaguid_stats_file) {
2729 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2730 "file\n");
2731 ret = -ENOMEM;
2732 goto no_stats_entry;
2733 }
2734 xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
2735 /*
2736 * TODO: add support counter hacking
2737 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2738 */
2739 return 0;
2740
2741no_stats_entry:
2742 remove_proc_entry("ctrl", *res_procdir);
2743no_ctrl_entry:
2744 remove_proc_entry("xt_qtaguid", NULL);
2745no_dir:
2746 return ret;
2747}
2748
2749static struct xt_match qtaguid_mt_reg __read_mostly = {
2750 /*
2751 * This module masquerades as the "owner" module so that iptables
2752 * tools can deal with it.
2753 */
2754 .name = "owner",
2755 .revision = 1,
2756 .family = NFPROTO_UNSPEC,
2757 .match = qtaguid_mt,
2758 .matchsize = sizeof(struct xt_qtaguid_match_info),
2759 .me = THIS_MODULE,
2760};
2761
2762static int __init qtaguid_mt_init(void)
2763{
2764 if (qtaguid_proc_register(&xt_qtaguid_procdir)
2765 || iface_stat_init(xt_qtaguid_procdir)
2766 || xt_register_match(&qtaguid_mt_reg)
2767 || misc_register(&qtu_device))
2768 return -1;
2769 return 0;
2770}
2771
2772/*
2773 * TODO: allow unloading of the module.
2774 * For now stats are permanent.
2775 * Kconfig forces'y/n' and never an 'm'.
2776 */
2777
2778module_init(qtaguid_mt_init);
2779MODULE_AUTHOR("jpa <jpa@google.com>");
2780MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2781MODULE_LICENSE("GPL");
2782MODULE_ALIAS("ipt_owner");
2783MODULE_ALIAS("ip6t_owner");
2784MODULE_ALIAS("ipt_qtaguid");
2785MODULE_ALIAS("ip6t_qtaguid");
diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h
new file mode 100644
index 00000000000..02479d6d317
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_internal.h
@@ -0,0 +1,330 @@
1/*
2 * Kernel iptables module to track stats for packets based on user tags.
3 *
4 * (C) 2011 Google, Inc
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __XT_QTAGUID_INTERNAL_H__
11#define __XT_QTAGUID_INTERNAL_H__
12
13#include <linux/types.h>
14#include <linux/rbtree.h>
15#include <linux/spinlock_types.h>
16#include <linux/workqueue.h>
17
18/* Iface handling */
19#define IDEBUG_MASK (1<<0)
20/* Iptable Matching. Per packet. */
21#define MDEBUG_MASK (1<<1)
22/* Red-black tree handling. Per packet. */
23#define RDEBUG_MASK (1<<2)
24/* procfs ctrl/stats handling */
25#define CDEBUG_MASK (1<<3)
26/* dev and resource tracking */
27#define DDEBUG_MASK (1<<4)
28
29/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
30#define DEFAULT_DEBUG_MASK 0
31
32/*
33 * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
34 * All undef: text size ~ 0x3030; all def: ~ 0x4404.
35 */
36#define IDEBUG
37#define MDEBUG
38#define RDEBUG
39#define CDEBUG
40#define DDEBUG
41
42#define MSK_DEBUG(mask, ...) do { \
43 if (unlikely(qtaguid_debug_mask & (mask))) \
44 pr_debug(__VA_ARGS__); \
45 } while (0)
46#ifdef IDEBUG
47#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
48#else
49#define IF_DEBUG(...) no_printk(__VA_ARGS__)
50#endif
51#ifdef MDEBUG
52#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
53#else
54#define MT_DEBUG(...) no_printk(__VA_ARGS__)
55#endif
56#ifdef RDEBUG
57#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
58#else
59#define RB_DEBUG(...) no_printk(__VA_ARGS__)
60#endif
61#ifdef CDEBUG
62#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
63#else
64#define CT_DEBUG(...) no_printk(__VA_ARGS__)
65#endif
66#ifdef DDEBUG
67#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
68#else
69#define DR_DEBUG(...) no_printk(__VA_ARGS__)
70#endif
71
72extern uint qtaguid_debug_mask;
73
74/*---------------------------------------------------------------------------*/
75/*
76 * Tags:
77 *
78 * They represent what the data usage counters will be tracked against.
79 * By default a tag is just based on the UID.
80 * The UID is used as the base for policing, and can not be ignored.
81 * So a tag will always at least represent a UID (uid_tag).
82 *
83 * A tag can be augmented with an "accounting tag" which is associated
84 * with a UID.
85 * User space can set the acct_tag portion of the tag which is then used
86 * with sockets: all data belonging to that socket will be counted against the
87 * tag. The policing is then based on the tag's uid_tag portion,
88 * and stats are collected for the acct_tag portion separately.
89 *
90 * There could be
91 * a: {acct_tag=1, uid_tag=10003}
92 * b: {acct_tag=2, uid_tag=10003}
93 * c: {acct_tag=3, uid_tag=10003}
94 * d: {acct_tag=0, uid_tag=10003}
95 * a, b, and c represent tags associated with specific sockets.
96 * d is for the totals for that uid, including all untagged traffic.
97 * Typically d is used with policing/quota rules.
98 *
99 * We want tag_t big enough to distinguish uid_t and acct_tag.
100 * It might become a struct if needed.
101 * Nothing should be using it as an int.
102 */
103typedef uint64_t tag_t; /* Only used via accessors */
104
105#define TAG_UID_MASK 0xFFFFFFFFULL
106#define TAG_ACCT_MASK (~0xFFFFFFFFULL)
107
108static inline int tag_compare(tag_t t1, tag_t t2)
109{
110 return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
111}
112
113static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
114{
115 return acct_tag | uid;
116}
117static inline tag_t make_tag_from_uid(uid_t uid)
118{
119 return uid;
120}
121static inline uid_t get_uid_from_tag(tag_t tag)
122{
123 return tag & TAG_UID_MASK;
124}
125static inline tag_t get_utag_from_tag(tag_t tag)
126{
127 return tag & TAG_UID_MASK;
128}
129static inline tag_t get_atag_from_tag(tag_t tag)
130{
131 return tag & TAG_ACCT_MASK;
132}
133
134static inline bool valid_atag(tag_t tag)
135{
136 return !(tag & TAG_UID_MASK);
137}
138static inline tag_t make_atag_from_value(uint32_t value)
139{
140 return (uint64_t)value << 32;
141}
142/*---------------------------------------------------------------------------*/
143
144/*
145 * Maximum number of socket tags that a UID is allowed to have active.
146 * Multiple processes belonging to the same UID contribute towards this limit.
147 * Special UIDs that can impersonate a UID also contribute (e.g. download
148 * manager, ...)
149 */
150#define DEFAULT_MAX_SOCK_TAGS 1024
151
152/*
153 * For now we only track 2 sets of counters.
154 * The default set is 0.
155 * Userspace can activate another set for a given uid being tracked.
156 */
157#define IFS_MAX_COUNTER_SETS 2
158
159enum ifs_tx_rx {
160 IFS_TX,
161 IFS_RX,
162 IFS_MAX_DIRECTIONS
163};
164
165/* For now, TCP, UDP, the rest */
166enum ifs_proto {
167 IFS_TCP,
168 IFS_UDP,
169 IFS_PROTO_OTHER,
170 IFS_MAX_PROTOS
171};
172
173struct byte_packet_counters {
174 uint64_t bytes;
175 uint64_t packets;
176};
177
178struct data_counters {
179 struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
180};
181
182/* Generic X based nodes used as a base for rb_tree ops */
183struct tag_node {
184 struct rb_node node;
185 tag_t tag;
186};
187
188struct tag_stat {
189 struct tag_node tn;
190 struct data_counters counters;
191 /*
192 * If this tag is acct_tag based, we need to count against the
193 * matching parent uid_tag.
194 */
195 struct data_counters *parent_counters;
196};
197
198struct iface_stat {
199 struct list_head list; /* in iface_stat_list */
200 char *ifname;
201 bool active;
202 /* net_dev is only valid for active iface_stat */
203 struct net_device *net_dev;
204
205 struct byte_packet_counters totals[IFS_MAX_DIRECTIONS];
206 /*
207 * We keep the last_known, because some devices reset their counters
208 * just before NETDEV_UP, while some will reset just before
209 * NETDEV_REGISTER (which is more normal).
210 * So now, if the device didn't do a NETDEV_UNREGISTER and we see
211 * its current dev stats smaller that what was previously known, we
212 * assume an UNREGISTER and just use the last_known.
213 */
214 struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
215 /* last_known is usable when last_known_valid is true */
216 bool last_known_valid;
217
218 struct proc_dir_entry *proc_ptr;
219
220 struct rb_root tag_stat_tree;
221 spinlock_t tag_stat_list_lock;
222};
223
224/* This is needed to create proc_dir_entries from atomic context. */
225struct iface_stat_work {
226 struct work_struct iface_work;
227 struct iface_stat *iface_entry;
228};
229
230/*
231 * Track tag that this socket is transferring data for, and not necessarily
232 * the uid that owns the socket.
233 * This is the tag against which tag_stat.counters will be billed.
234 * These structs need to be looked up by sock and pid.
235 */
236struct sock_tag {
237 struct rb_node sock_node;
238 struct sock *sk; /* Only used as a number, never dereferenced */
239 /* The socket is needed for sockfd_put() */
240 struct socket *socket;
241 /* Used to associate with a given pid */
242 struct list_head list; /* in proc_qtu_data.sock_tag_list */
243 pid_t pid;
244
245 tag_t tag;
246};
247
248struct qtaguid_event_counts {
249 /* Various successful events */
250 atomic64_t sockets_tagged;
251 atomic64_t sockets_untagged;
252 atomic64_t counter_set_changes;
253 atomic64_t delete_cmds;
254 atomic64_t iface_events; /* Number of NETDEV_* events handled */
255
256 atomic64_t match_calls; /* Number of times iptables called mt */
257 /*
258 * match_found_sk_*: numbers related to the netfilter matching
259 * function finding a sock for the sk_buff.
260 * Total skbs processed is sum(match_found*).
261 */
262 atomic64_t match_found_sk; /* An sk was already in the sk_buff. */
263 /* The connection tracker had or didn't have the sk. */
264 atomic64_t match_found_sk_in_ct;
265 atomic64_t match_found_no_sk_in_ct;
266 /*
267 * No sk could be found. No apparent owner. Could happen with
268 * unsolicited traffic.
269 */
270 atomic64_t match_no_sk;
271 /*
272 * The file ptr in the sk_socket wasn't there.
273 * This might happen for traffic while the socket is being closed.
274 */
275 atomic64_t match_no_sk_file;
276};
277
278/* Track the set active_set for the given tag. */
279struct tag_counter_set {
280 struct tag_node tn;
281 int active_set;
282};
283
284/*----------------------------------------------*/
285/*
286 * The qtu uid data is used to track resources that are created directly or
287 * indirectly by processes (uid tracked).
288 * It is shared by the processes with the same uid.
289 * Some of the resource will be counted to prevent further rogue allocations,
290 * some will need freeing once the owner process (uid) exits.
291 */
292struct uid_tag_data {
293 struct rb_node node;
294 uid_t uid;
295
296 /*
297 * For the uid, how many accounting tags have been set.
298 */
299 int num_active_tags;
300 /* Track the number of proc_qtu_data that reference it */
301 int num_pqd;
302 struct rb_root tag_ref_tree;
303 /* No tag_node_tree_lock; use uid_tag_data_tree_lock */
304};
305
306struct tag_ref {
307 struct tag_node tn;
308
309 /*
310 * This tracks the number of active sockets that have a tag on them
311 * which matches this tag_ref.tn.tag.
312 * A tag ref can live on after the sockets are untagged.
313 * A tag ref can only be removed during a tag delete command.
314 */
315 int num_sock_tags;
316};
317
318struct proc_qtu_data {
319 struct rb_node node;
320 pid_t pid;
321
322 struct uid_tag_data *parent_tag_data;
323
324 /* Tracks the sock_tags that need freeing upon this proc's death */
325 struct list_head sock_tag_list;
326 /* No spinlock_t sock_tag_list_lock; use the global one. */
327};
328
329/*----------------------------------------------*/
330#endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */
diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c
new file mode 100644
index 00000000000..39176785c91
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_print.c
@@ -0,0 +1,556 @@
1/*
2 * Pretty printing Support for iptables xt_qtaguid module.
3 *
4 * (C) 2011 Google, Inc
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/*
12 * Most of the functions in this file just waste time if DEBUG is not defined.
13 * The matching xt_qtaguid_print.h will static inline empty funcs if the needed
14 * debug flags ore not defined.
15 * Those funcs that fail to allocate memory will panic as there is no need to
16 * hobble allong just pretending to do the requested work.
17 */
18
19#define DEBUG
20
21#include <linux/fs.h>
22#include <linux/gfp.h>
23#include <linux/net.h>
24#include <linux/rbtree.h>
25#include <linux/slab.h>
26#include <linux/spinlock_types.h>
27
28
29#include "xt_qtaguid_internal.h"
30#include "xt_qtaguid_print.h"
31
32#ifdef DDEBUG
33
34static void _bug_on_err_or_null(void *ptr)
35{
36 if (IS_ERR_OR_NULL(ptr)) {
37 pr_err("qtaguid: kmalloc failed\n");
38 BUG();
39 }
40}
41
42char *pp_tag_t(tag_t *tag)
43{
44 char *res;
45
46 if (!tag)
47 res = kasprintf(GFP_ATOMIC, "tag_t@null{}");
48 else
49 res = kasprintf(GFP_ATOMIC,
50 "tag_t@%p{tag=0x%llx, uid=%u}",
51 tag, *tag, get_uid_from_tag(*tag));
52 _bug_on_err_or_null(res);
53 return res;
54}
55
56char *pp_data_counters(struct data_counters *dc, bool showValues)
57{
58 char *res;
59
60 if (!dc)
61 res = kasprintf(GFP_ATOMIC, "data_counters@null{}");
62 else if (showValues)
63 res = kasprintf(
64 GFP_ATOMIC, "data_counters@%p{"
65 "set0{"
66 "rx{"
67 "tcp{b=%llu, p=%llu}, "
68 "udp{b=%llu, p=%llu},"
69 "other{b=%llu, p=%llu}}, "
70 "tx{"
71 "tcp{b=%llu, p=%llu}, "
72 "udp{b=%llu, p=%llu},"
73 "other{b=%llu, p=%llu}}}, "
74 "set1{"
75 "rx{"
76 "tcp{b=%llu, p=%llu}, "
77 "udp{b=%llu, p=%llu},"
78 "other{b=%llu, p=%llu}}, "
79 "tx{"
80 "tcp{b=%llu, p=%llu}, "
81 "udp{b=%llu, p=%llu},"
82 "other{b=%llu, p=%llu}}}}",
83 dc,
84 dc->bpc[0][IFS_RX][IFS_TCP].bytes,
85 dc->bpc[0][IFS_RX][IFS_TCP].packets,
86 dc->bpc[0][IFS_RX][IFS_UDP].bytes,
87 dc->bpc[0][IFS_RX][IFS_UDP].packets,
88 dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes,
89 dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets,
90 dc->bpc[0][IFS_TX][IFS_TCP].bytes,
91 dc->bpc[0][IFS_TX][IFS_TCP].packets,
92 dc->bpc[0][IFS_TX][IFS_UDP].bytes,
93 dc->bpc[0][IFS_TX][IFS_UDP].packets,
94 dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes,
95 dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets,
96 dc->bpc[1][IFS_RX][IFS_TCP].bytes,
97 dc->bpc[1][IFS_RX][IFS_TCP].packets,
98 dc->bpc[1][IFS_RX][IFS_UDP].bytes,
99 dc->bpc[1][IFS_RX][IFS_UDP].packets,
100 dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes,
101 dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets,
102 dc->bpc[1][IFS_TX][IFS_TCP].bytes,
103 dc->bpc[1][IFS_TX][IFS_TCP].packets,
104 dc->bpc[1][IFS_TX][IFS_UDP].bytes,
105 dc->bpc[1][IFS_TX][IFS_UDP].packets,
106 dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes,
107 dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets);
108 else
109 res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc);
110 _bug_on_err_or_null(res);
111 return res;
112}
113
114char *pp_tag_node(struct tag_node *tn)
115{
116 char *tag_str;
117 char *res;
118
119 if (!tn) {
120 res = kasprintf(GFP_ATOMIC, "tag_node@null{}");
121 _bug_on_err_or_null(res);
122 return res;
123 }
124 tag_str = pp_tag_t(&tn->tag);
125 res = kasprintf(GFP_ATOMIC,
126 "tag_node@%p{tag=%s}",
127 tn, tag_str);
128 _bug_on_err_or_null(res);
129 kfree(tag_str);
130 return res;
131}
132
133char *pp_tag_ref(struct tag_ref *tr)
134{
135 char *tn_str;
136 char *res;
137
138 if (!tr) {
139 res = kasprintf(GFP_ATOMIC, "tag_ref@null{}");
140 _bug_on_err_or_null(res);
141 return res;
142 }
143 tn_str = pp_tag_node(&tr->tn);
144 res = kasprintf(GFP_ATOMIC,
145 "tag_ref@%p{%s, num_sock_tags=%d}",
146 tr, tn_str, tr->num_sock_tags);
147 _bug_on_err_or_null(res);
148 kfree(tn_str);
149 return res;
150}
151
152char *pp_tag_stat(struct tag_stat *ts)
153{
154 char *tn_str;
155 char *counters_str;
156 char *parent_counters_str;
157 char *res;
158
159 if (!ts) {
160 res = kasprintf(GFP_ATOMIC, "tag_stat@null{}");
161 _bug_on_err_or_null(res);
162 return res;
163 }
164 tn_str = pp_tag_node(&ts->tn);
165 counters_str = pp_data_counters(&ts->counters, true);
166 parent_counters_str = pp_data_counters(ts->parent_counters, false);
167 res = kasprintf(GFP_ATOMIC,
168 "tag_stat@%p{%s, counters=%s, parent_counters=%s}",
169 ts, tn_str, counters_str, parent_counters_str);
170 _bug_on_err_or_null(res);
171 kfree(tn_str);
172 kfree(counters_str);
173 kfree(parent_counters_str);
174 return res;
175}
176
177char *pp_iface_stat(struct iface_stat *is)
178{
179 char *res;
180 if (!is)
181 res = kasprintf(GFP_ATOMIC, "iface_stat@null{}");
182 else
183 res = kasprintf(GFP_ATOMIC, "iface_stat@%p{"
184 "list=list_head{...}, "
185 "ifname=%s, "
186 "total={rx={bytes=%llu, "
187 "packets=%llu}, "
188 "tx={bytes=%llu, "
189 "packets=%llu}}, "
190 "last_known_valid=%d, "
191 "last_known={rx={bytes=%llu, "
192 "packets=%llu}, "
193 "tx={bytes=%llu, "
194 "packets=%llu}}, "
195 "active=%d, "
196 "net_dev=%p, "
197 "proc_ptr=%p, "
198 "tag_stat_tree=rb_root{...}}",
199 is,
200 is->ifname,
201 is->totals[IFS_RX].bytes,
202 is->totals[IFS_RX].packets,
203 is->totals[IFS_TX].bytes,
204 is->totals[IFS_TX].packets,
205 is->last_known_valid,
206 is->last_known[IFS_RX].bytes,
207 is->last_known[IFS_RX].packets,
208 is->last_known[IFS_TX].bytes,
209 is->last_known[IFS_TX].packets,
210 is->active,
211 is->net_dev,
212 is->proc_ptr);
213 _bug_on_err_or_null(res);
214 return res;
215}
216
217char *pp_sock_tag(struct sock_tag *st)
218{
219 char *tag_str;
220 char *res;
221
222 if (!st) {
223 res = kasprintf(GFP_ATOMIC, "sock_tag@null{}");
224 _bug_on_err_or_null(res);
225 return res;
226 }
227 tag_str = pp_tag_t(&st->tag);
228 res = kasprintf(GFP_ATOMIC, "sock_tag@%p{"
229 "sock_node=rb_node{...}, "
230 "sk=%p socket=%p (f_count=%lu), list=list_head{...}, "
231 "pid=%u, tag=%s}",
232 st, st->sk, st->socket, atomic_long_read(
233 &st->socket->file->f_count),
234 st->pid, tag_str);
235 _bug_on_err_or_null(res);
236 kfree(tag_str);
237 return res;
238}
239
240char *pp_uid_tag_data(struct uid_tag_data *utd)
241{
242 char *res;
243
244 if (!utd)
245 res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}");
246 else
247 res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{"
248 "uid=%u, num_active_acct_tags=%d, "
249 "num_pqd=%d, "
250 "tag_node_tree=rb_root{...}, "
251 "proc_qtu_data_tree=rb_root{...}}",
252 utd, utd->uid,
253 utd->num_active_tags, utd->num_pqd);
254 _bug_on_err_or_null(res);
255 return res;
256}
257
258char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
259{
260 char *parent_tag_data_str;
261 char *res;
262
263 if (!pqd) {
264 res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}");
265 _bug_on_err_or_null(res);
266 return res;
267 }
268 parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data);
269 res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{"
270 "node=rb_node{...}, pid=%u, "
271 "parent_tag_data=%s, "
272 "sock_tag_list=list_head{...}}",
273 pqd, pqd->pid, parent_tag_data_str
274 );
275 _bug_on_err_or_null(res);
276 kfree(parent_tag_data_str);
277 return res;
278}
279
280/*------------------------------------------*/
281void prdebug_sock_tag_tree(int indent_level,
282 struct rb_root *sock_tag_tree)
283{
284 struct rb_node *node;
285 struct sock_tag *sock_tag_entry;
286 char *str;
287
288 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
289 return;
290
291 if (RB_EMPTY_ROOT(sock_tag_tree)) {
292 str = "sock_tag_tree=rb_root{}";
293 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
294 return;
295 }
296
297 str = "sock_tag_tree=rb_root{";
298 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
299 indent_level++;
300 for (node = rb_first(sock_tag_tree);
301 node;
302 node = rb_next(node)) {
303 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
304 str = pp_sock_tag(sock_tag_entry);
305 pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
306 kfree(str);
307 }
308 indent_level--;
309 str = "}";
310 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
311}
312
313void prdebug_sock_tag_list(int indent_level,
314 struct list_head *sock_tag_list)
315{
316 struct sock_tag *sock_tag_entry;
317 char *str;
318
319 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
320 return;
321
322 if (list_empty(sock_tag_list)) {
323 str = "sock_tag_list=list_head{}";
324 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
325 return;
326 }
327
328 str = "sock_tag_list=list_head{";
329 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
330 indent_level++;
331 list_for_each_entry(sock_tag_entry, sock_tag_list, list) {
332 str = pp_sock_tag(sock_tag_entry);
333 pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
334 kfree(str);
335 }
336 indent_level--;
337 str = "}";
338 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
339}
340
341void prdebug_proc_qtu_data_tree(int indent_level,
342 struct rb_root *proc_qtu_data_tree)
343{
344 char *str;
345 struct rb_node *node;
346 struct proc_qtu_data *proc_qtu_data_entry;
347
348 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
349 return;
350
351 if (RB_EMPTY_ROOT(proc_qtu_data_tree)) {
352 str = "proc_qtu_data_tree=rb_root{}";
353 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
354 return;
355 }
356
357 str = "proc_qtu_data_tree=rb_root{";
358 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
359 indent_level++;
360 for (node = rb_first(proc_qtu_data_tree);
361 node;
362 node = rb_next(node)) {
363 proc_qtu_data_entry = rb_entry(node,
364 struct proc_qtu_data,
365 node);
366 str = pp_proc_qtu_data(proc_qtu_data_entry);
367 pr_debug("%*d: %s,\n", indent_level*2, indent_level,
368 str);
369 kfree(str);
370 indent_level++;
371 prdebug_sock_tag_list(indent_level,
372 &proc_qtu_data_entry->sock_tag_list);
373 indent_level--;
374
375 }
376 indent_level--;
377 str = "}";
378 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
379}
380
381void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
382{
383 char *str;
384 struct rb_node *node;
385 struct tag_ref *tag_ref_entry;
386
387 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
388 return;
389
390 if (RB_EMPTY_ROOT(tag_ref_tree)) {
391 str = "tag_ref_tree{}";
392 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
393 return;
394 }
395
396 str = "tag_ref_tree{";
397 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
398 indent_level++;
399 for (node = rb_first(tag_ref_tree);
400 node;
401 node = rb_next(node)) {
402 tag_ref_entry = rb_entry(node,
403 struct tag_ref,
404 tn.node);
405 str = pp_tag_ref(tag_ref_entry);
406 pr_debug("%*d: %s,\n", indent_level*2, indent_level,
407 str);
408 kfree(str);
409 }
410 indent_level--;
411 str = "}";
412 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
413}
414
415void prdebug_uid_tag_data_tree(int indent_level,
416 struct rb_root *uid_tag_data_tree)
417{
418 char *str;
419 struct rb_node *node;
420 struct uid_tag_data *uid_tag_data_entry;
421
422 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
423 return;
424
425 if (RB_EMPTY_ROOT(uid_tag_data_tree)) {
426 str = "uid_tag_data_tree=rb_root{}";
427 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
428 return;
429 }
430
431 str = "uid_tag_data_tree=rb_root{";
432 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
433 indent_level++;
434 for (node = rb_first(uid_tag_data_tree);
435 node;
436 node = rb_next(node)) {
437 uid_tag_data_entry = rb_entry(node, struct uid_tag_data,
438 node);
439 str = pp_uid_tag_data(uid_tag_data_entry);
440 pr_debug("%*d: %s,\n", indent_level*2, indent_level, str);
441 kfree(str);
442 if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) {
443 indent_level++;
444 prdebug_tag_ref_tree(indent_level,
445 &uid_tag_data_entry->tag_ref_tree);
446 indent_level--;
447 }
448 }
449 indent_level--;
450 str = "}";
451 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
452}
453
454void prdebug_tag_stat_tree(int indent_level,
455 struct rb_root *tag_stat_tree)
456{
457 char *str;
458 struct rb_node *node;
459 struct tag_stat *ts_entry;
460
461 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
462 return;
463
464 if (RB_EMPTY_ROOT(tag_stat_tree)) {
465 str = "tag_stat_tree{}";
466 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
467 return;
468 }
469
470 str = "tag_stat_tree{";
471 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
472 indent_level++;
473 for (node = rb_first(tag_stat_tree);
474 node;
475 node = rb_next(node)) {
476 ts_entry = rb_entry(node, struct tag_stat, tn.node);
477 str = pp_tag_stat(ts_entry);
478 pr_debug("%*d: %s\n", indent_level*2, indent_level,
479 str);
480 kfree(str);
481 }
482 indent_level--;
483 str = "}";
484 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
485}
486
487void prdebug_iface_stat_list(int indent_level,
488 struct list_head *iface_stat_list)
489{
490 char *str;
491 struct iface_stat *iface_entry;
492
493 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
494 return;
495
496 if (list_empty(iface_stat_list)) {
497 str = "iface_stat_list=list_head{}";
498 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
499 return;
500 }
501
502 str = "iface_stat_list=list_head{";
503 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
504 indent_level++;
505 list_for_each_entry(iface_entry, iface_stat_list, list) {
506 str = pp_iface_stat(iface_entry);
507 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
508 kfree(str);
509
510 spin_lock_bh(&iface_entry->tag_stat_list_lock);
511 if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) {
512 indent_level++;
513 prdebug_tag_stat_tree(indent_level,
514 &iface_entry->tag_stat_tree);
515 indent_level--;
516 }
517 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
518 }
519 indent_level--;
520 str = "}";
521 pr_debug("%*d: %s\n", indent_level*2, indent_level, str);
522}
523
524#endif /* ifdef DDEBUG */
525/*------------------------------------------*/
526static const char * const netdev_event_strings[] = {
527 "netdev_unknown",
528 "NETDEV_UP",
529 "NETDEV_DOWN",
530 "NETDEV_REBOOT",
531 "NETDEV_CHANGE",
532 "NETDEV_REGISTER",
533 "NETDEV_UNREGISTER",
534 "NETDEV_CHANGEMTU",
535 "NETDEV_CHANGEADDR",
536 "NETDEV_GOING_DOWN",
537 "NETDEV_CHANGENAME",
538 "NETDEV_FEAT_CHANGE",
539 "NETDEV_BONDING_FAILOVER",
540 "NETDEV_PRE_UP",
541 "NETDEV_PRE_TYPE_CHANGE",
542 "NETDEV_POST_TYPE_CHANGE",
543 "NETDEV_POST_INIT",
544 "NETDEV_UNREGISTER_BATCH",
545 "NETDEV_RELEASE",
546 "NETDEV_NOTIFY_PEERS",
547 "NETDEV_JOIN",
548};
549
550const char *netdev_evt_str(int netdev_event)
551{
552 if (netdev_event < 0
553 || netdev_event >= ARRAY_SIZE(netdev_event_strings))
554 return "bad event num";
555 return netdev_event_strings[netdev_event];
556}
diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h
new file mode 100644
index 00000000000..b63871a0be5
--- /dev/null
+++ b/net/netfilter/xt_qtaguid_print.h
@@ -0,0 +1,120 @@
1/*
2 * Pretty printing Support for iptables xt_qtaguid module.
3 *
4 * (C) 2011 Google, Inc
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __XT_QTAGUID_PRINT_H__
11#define __XT_QTAGUID_PRINT_H__
12
13#include "xt_qtaguid_internal.h"
14
15#ifdef DDEBUG
16
17char *pp_tag_t(tag_t *tag);
18char *pp_data_counters(struct data_counters *dc, bool showValues);
19char *pp_tag_node(struct tag_node *tn);
20char *pp_tag_ref(struct tag_ref *tr);
21char *pp_tag_stat(struct tag_stat *ts);
22char *pp_iface_stat(struct iface_stat *is);
23char *pp_sock_tag(struct sock_tag *st);
24char *pp_uid_tag_data(struct uid_tag_data *qtd);
25char *pp_proc_qtu_data(struct proc_qtu_data *pqd);
26
27/*------------------------------------------*/
28void prdebug_sock_tag_list(int indent_level,
29 struct list_head *sock_tag_list);
30void prdebug_sock_tag_tree(int indent_level,
31 struct rb_root *sock_tag_tree);
32void prdebug_proc_qtu_data_tree(int indent_level,
33 struct rb_root *proc_qtu_data_tree);
34void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree);
35void prdebug_uid_tag_data_tree(int indent_level,
36 struct rb_root *uid_tag_data_tree);
37void prdebug_tag_stat_tree(int indent_level,
38 struct rb_root *tag_stat_tree);
39void prdebug_iface_stat_list(int indent_level,
40 struct list_head *iface_stat_list);
41
42#else
43
44/*------------------------------------------*/
45static inline char *pp_tag_t(tag_t *tag)
46{
47 return NULL;
48}
49static inline char *pp_data_counters(struct data_counters *dc, bool showValues)
50{
51 return NULL;
52}
53static inline char *pp_tag_node(struct tag_node *tn)
54{
55 return NULL;
56}
57static inline char *pp_tag_ref(struct tag_ref *tr)
58{
59 return NULL;
60}
61static inline char *pp_tag_stat(struct tag_stat *ts)
62{
63 return NULL;
64}
65static inline char *pp_iface_stat(struct iface_stat *is)
66{
67 return NULL;
68}
69static inline char *pp_sock_tag(struct sock_tag *st)
70{
71 return NULL;
72}
73static inline char *pp_uid_tag_data(struct uid_tag_data *qtd)
74{
75 return NULL;
76}
77static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
78{
79 return NULL;
80}
81
82/*------------------------------------------*/
83static inline
84void prdebug_sock_tag_list(int indent_level,
85 struct list_head *sock_tag_list)
86{
87}
88static inline
89void prdebug_sock_tag_tree(int indent_level,
90 struct rb_root *sock_tag_tree)
91{
92}
93static inline
94void prdebug_proc_qtu_data_tree(int indent_level,
95 struct rb_root *proc_qtu_data_tree)
96{
97}
98static inline
99void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
100{
101}
102static inline
103void prdebug_uid_tag_data_tree(int indent_level,
104 struct rb_root *uid_tag_data_tree)
105{
106}
107static inline
108void prdebug_tag_stat_tree(int indent_level,
109 struct rb_root *tag_stat_tree)
110{
111}
112static inline
113void prdebug_iface_stat_list(int indent_level,
114 struct list_head *iface_stat_list)
115{
116}
117#endif
118/*------------------------------------------*/
119const char *netdev_evt_str(int netdev_event);
120#endif /* ifndef __XT_QTAGUID_PRINT_H__ */
diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c
new file mode 100644
index 00000000000..3c72bea2dd6
--- /dev/null
+++ b/net/netfilter/xt_quota2.c
@@ -0,0 +1,381 @@
1/*
2 * xt_quota2 - enhanced xt_quota that can count upwards and in packets
3 * as a minimal accounting match.
4 * by Jan Engelhardt <jengelh@medozas.de>, 2008
5 *
6 * Originally based on xt_quota.c:
7 * netfilter module to enforce network quotas
8 * Sam Johnston <samj@samj.net>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License; either
12 * version 2 of the License, as published by the Free Software Foundation.
13 */
14#include <linux/list.h>
15#include <linux/proc_fs.h>
16#include <linux/skbuff.h>
17#include <linux/spinlock.h>
18#include <asm/atomic.h>
19
20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter/xt_quota2.h>
22#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
23#include <linux/netfilter_ipv4/ipt_ULOG.h>
24#endif
25
26/**
27 * @lock: lock to protect quota writers from each other
28 */
29struct xt_quota_counter {
30 u_int64_t quota;
31 spinlock_t lock;
32 struct list_head list;
33 atomic_t ref;
34 char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)];
35 struct proc_dir_entry *procfs_entry;
36};
37
38#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
39/* Harald's favorite number +1 :D From ipt_ULOG.C */
40static int qlog_nl_event = 112;
41module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR);
42MODULE_PARM_DESC(event_num,
43 "Event number for NETLINK_NFLOG message. 0 disables log."
44 "111 is what ipt_ULOG uses.");
45static struct sock *nflognl;
46#endif
47
48static LIST_HEAD(counter_list);
49static DEFINE_SPINLOCK(counter_list_lock);
50
51static struct proc_dir_entry *proc_xt_quota;
52static unsigned int quota_list_perms = S_IRUGO | S_IWUSR;
53static unsigned int quota_list_uid = 0;
54static unsigned int quota_list_gid = 0;
55module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR);
56module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR);
57module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR);
58
59
60#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
61static void quota2_log(unsigned int hooknum,
62 const struct sk_buff *skb,
63 const struct net_device *in,
64 const struct net_device *out,
65 const char *prefix)
66{
67 ulog_packet_msg_t *pm;
68 struct sk_buff *log_skb;
69 size_t size;
70 struct nlmsghdr *nlh;
71
72 if (!qlog_nl_event)
73 return;
74
75 size = NLMSG_SPACE(sizeof(*pm));
76 size = max(size, (size_t)NLMSG_GOODSIZE);
77 log_skb = alloc_skb(size, GFP_ATOMIC);
78 if (!log_skb) {
79 pr_err("xt_quota2: cannot alloc skb for logging\n");
80 return;
81 }
82
83 /* NLMSG_PUT() uses "goto nlmsg_failure" */
84 nlh = NLMSG_PUT(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event,
85 sizeof(*pm));
86 pm = NLMSG_DATA(nlh);
87 if (skb->tstamp.tv64 == 0)
88 __net_timestamp((struct sk_buff *)skb);
89 pm->data_len = 0;
90 pm->hook = hooknum;
91 if (prefix != NULL)
92 strlcpy(pm->prefix, prefix, sizeof(pm->prefix));
93 else
94 *(pm->prefix) = '\0';
95 if (in)
96 strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name));
97 else
98 pm->indev_name[0] = '\0';
99
100 if (out)
101 strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
102 else
103 pm->outdev_name[0] = '\0';
104
105 NETLINK_CB(log_skb).dst_group = 1;
106 pr_debug("throwing 1 packets to netlink group 1\n");
107 netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC);
108
109nlmsg_failure: /* Used within NLMSG_PUT() */
110 pr_debug("xt_quota2: error during NLMSG_PUT\n");
111}
112#else
113static void quota2_log(unsigned int hooknum,
114 const struct sk_buff *skb,
115 const struct net_device *in,
116 const struct net_device *out,
117 const char *prefix)
118{
119}
120#endif /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */
121
122static int quota_proc_read(char *page, char **start, off_t offset,
123 int count, int *eof, void *data)
124{
125 struct xt_quota_counter *e = data;
126 int ret;
127
128 spin_lock_bh(&e->lock);
129 ret = snprintf(page, PAGE_SIZE, "%llu\n", e->quota);
130 spin_unlock_bh(&e->lock);
131 return ret;
132}
133
134static int quota_proc_write(struct file *file, const char __user *input,
135 unsigned long size, void *data)
136{
137 struct xt_quota_counter *e = data;
138 char buf[sizeof("18446744073709551616")];
139
140 if (size > sizeof(buf))
141 size = sizeof(buf);
142 if (copy_from_user(buf, input, size) != 0)
143 return -EFAULT;
144 buf[sizeof(buf)-1] = '\0';
145
146 spin_lock_bh(&e->lock);
147 e->quota = simple_strtoull(buf, NULL, 0);
148 spin_unlock_bh(&e->lock);
149 return size;
150}
151
152static struct xt_quota_counter *
153q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon)
154{
155 struct xt_quota_counter *e;
156 unsigned int size;
157
158 /* Do not need all the procfs things for anonymous counters. */
159 size = anon ? offsetof(typeof(*e), list) : sizeof(*e);
160 e = kmalloc(size, GFP_KERNEL);
161 if (e == NULL)
162 return NULL;
163
164 e->quota = q->quota;
165 spin_lock_init(&e->lock);
166 if (!anon) {
167 INIT_LIST_HEAD(&e->list);
168 atomic_set(&e->ref, 1);
169 strlcpy(e->name, q->name, sizeof(e->name));
170 }
171 return e;
172}
173
174/**
175 * q2_get_counter - get ref to counter or create new
176 * @name: name of counter
177 */
178static struct xt_quota_counter *
179q2_get_counter(const struct xt_quota_mtinfo2 *q)
180{
181 struct proc_dir_entry *p;
182 struct xt_quota_counter *e = NULL;
183 struct xt_quota_counter *new_e;
184
185 if (*q->name == '\0')
186 return q2_new_counter(q, true);
187
188 /* No need to hold a lock while getting a new counter */
189 new_e = q2_new_counter(q, false);
190 if (new_e == NULL)
191 goto out;
192
193 spin_lock_bh(&counter_list_lock);
194 list_for_each_entry(e, &counter_list, list)
195 if (strcmp(e->name, q->name) == 0) {
196 atomic_inc(&e->ref);
197 spin_unlock_bh(&counter_list_lock);
198 kfree(new_e);
199 pr_debug("xt_quota2: old counter name=%s", e->name);
200 return e;
201 }
202 e = new_e;
203 pr_debug("xt_quota2: new_counter name=%s", e->name);
204 list_add_tail(&e->list, &counter_list);
205 /* The entry having a refcount of 1 is not directly destructible.
206 * This func has not yet returned the new entry, thus iptables
207 * has not references for destroying this entry.
208 * For another rule to try to destroy it, it would 1st need for this
209 * func* to be re-invoked, acquire a new ref for the same named quota.
210 * Nobody will access the e->procfs_entry either.
211 * So release the lock. */
212 spin_unlock_bh(&counter_list_lock);
213
214 /* create_proc_entry() is not spin_lock happy */
215 p = e->procfs_entry = create_proc_entry(e->name, quota_list_perms,
216 proc_xt_quota);
217
218 if (IS_ERR_OR_NULL(p)) {
219 spin_lock_bh(&counter_list_lock);
220 list_del(&e->list);
221 spin_unlock_bh(&counter_list_lock);
222 goto out;
223 }
224 p->data = e;
225 p->read_proc = quota_proc_read;
226 p->write_proc = quota_proc_write;
227 p->uid = quota_list_uid;
228 p->gid = quota_list_gid;
229 return e;
230
231 out:
232 kfree(e);
233 return NULL;
234}
235
236static int quota_mt2_check(const struct xt_mtchk_param *par)
237{
238 struct xt_quota_mtinfo2 *q = par->matchinfo;
239
240 pr_debug("xt_quota2: check() flags=0x%04x", q->flags);
241
242 if (q->flags & ~XT_QUOTA_MASK)
243 return -EINVAL;
244
245 q->name[sizeof(q->name)-1] = '\0';
246 if (*q->name == '.' || strchr(q->name, '/') != NULL) {
247 printk(KERN_ERR "xt_quota.3: illegal name\n");
248 return -EINVAL;
249 }
250
251 q->master = q2_get_counter(q);
252 if (q->master == NULL) {
253 printk(KERN_ERR "xt_quota.3: memory alloc failure\n");
254 return -ENOMEM;
255 }
256
257 return 0;
258}
259
260static void quota_mt2_destroy(const struct xt_mtdtor_param *par)
261{
262 struct xt_quota_mtinfo2 *q = par->matchinfo;
263 struct xt_quota_counter *e = q->master;
264
265 if (*q->name == '\0') {
266 kfree(e);
267 return;
268 }
269
270 spin_lock_bh(&counter_list_lock);
271 if (!atomic_dec_and_test(&e->ref)) {
272 spin_unlock_bh(&counter_list_lock);
273 return;
274 }
275
276 list_del(&e->list);
277 remove_proc_entry(e->name, proc_xt_quota);
278 spin_unlock_bh(&counter_list_lock);
279 kfree(e);
280}
281
282static bool
283quota_mt2(const struct sk_buff *skb, struct xt_action_param *par)
284{
285 struct xt_quota_mtinfo2 *q = (void *)par->matchinfo;
286 struct xt_quota_counter *e = q->master;
287 bool ret = q->flags & XT_QUOTA_INVERT;
288
289 spin_lock_bh(&e->lock);
290 if (q->flags & XT_QUOTA_GROW) {
291 /*
292 * While no_change is pointless in "grow" mode, we will
293 * implement it here simply to have a consistent behavior.
294 */
295 if (!(q->flags & XT_QUOTA_NO_CHANGE)) {
296 e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
297 }
298 ret = true;
299 } else {
300 if (e->quota >= skb->len) {
301 if (!(q->flags & XT_QUOTA_NO_CHANGE))
302 e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len;
303 ret = !ret;
304 } else {
305 /* We are transitioning, log that fact. */
306 if (e->quota) {
307 quota2_log(par->hooknum,
308 skb,
309 par->in,
310 par->out,
311 q->name);
312 }
313 /* we do not allow even small packets from now on */
314 e->quota = 0;
315 }
316 }
317 spin_unlock_bh(&e->lock);
318 return ret;
319}
320
321static struct xt_match quota_mt2_reg[] __read_mostly = {
322 {
323 .name = "quota2",
324 .revision = 3,
325 .family = NFPROTO_IPV4,
326 .checkentry = quota_mt2_check,
327 .match = quota_mt2,
328 .destroy = quota_mt2_destroy,
329 .matchsize = sizeof(struct xt_quota_mtinfo2),
330 .me = THIS_MODULE,
331 },
332 {
333 .name = "quota2",
334 .revision = 3,
335 .family = NFPROTO_IPV6,
336 .checkentry = quota_mt2_check,
337 .match = quota_mt2,
338 .destroy = quota_mt2_destroy,
339 .matchsize = sizeof(struct xt_quota_mtinfo2),
340 .me = THIS_MODULE,
341 },
342};
343
344static int __init quota_mt2_init(void)
345{
346 int ret;
347 pr_debug("xt_quota2: init()");
348
349#ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG
350 nflognl = netlink_kernel_create(&init_net,
351 NETLINK_NFLOG, 1, NULL,
352 NULL, THIS_MODULE);
353 if (!nflognl)
354 return -ENOMEM;
355#endif
356
357 proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net);
358 if (proc_xt_quota == NULL)
359 return -EACCES;
360
361 ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
362 if (ret < 0)
363 remove_proc_entry("xt_quota", init_net.proc_net);
364 pr_debug("xt_quota2: init() %d", ret);
365 return ret;
366}
367
368static void __exit quota_mt2_exit(void)
369{
370 xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg));
371 remove_proc_entry("xt_quota", init_net.proc_net);
372}
373
374module_init(quota_mt2_init);
375module_exit(quota_mt2_exit);
376MODULE_DESCRIPTION("Xtables: countdown quota match; up counter");
377MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
378MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
379MODULE_LICENSE("GPL");
380MODULE_ALIAS("ipt_quota2");
381MODULE_ALIAS("ip6t_quota2");