diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-22 10:38:37 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-22 10:38:37 -0500 |
commit | fcc9d2e5a6c89d22b8b773a64fb4ad21ac318446 (patch) | |
tree | a57612d1888735a2ec7972891b68c1ac5ec8faea /net/netfilter | |
parent | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (diff) |
Diffstat (limited to 'net/netfilter')
-rw-r--r-- | net/netfilter/nfnetlink_queue.c | 1028 | ||||
-rw-r--r-- | net/netfilter/xt_NOTRACK.c | 53 | ||||
-rw-r--r-- | net/netfilter/xt_qtaguid.c | 2785 | ||||
-rw-r--r-- | net/netfilter/xt_qtaguid_internal.h | 330 | ||||
-rw-r--r-- | net/netfilter/xt_qtaguid_print.c | 556 | ||||
-rw-r--r-- | net/netfilter/xt_qtaguid_print.h | 120 | ||||
-rw-r--r-- | net/netfilter/xt_quota2.c | 381 |
7 files changed, 5253 insertions, 0 deletions
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c new file mode 100644 index 00000000000..a80b0cb03f1 --- /dev/null +++ b/net/netfilter/nfnetlink_queue.c | |||
@@ -0,0 +1,1028 @@ | |||
1 | /* | ||
2 | * This is a module which is used for queueing packets and communicating with | ||
3 | * userspace via nfnetlink. | ||
4 | * | ||
5 | * (C) 2005 by Harald Welte <laforge@netfilter.org> | ||
6 | * (C) 2007 by Patrick McHardy <kaber@trash.net> | ||
7 | * | ||
8 | * Based on the old ipv4-only ip_queue.c: | ||
9 | * (C) 2000-2002 James Morris <jmorris@intercode.com.au> | ||
10 | * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org> | ||
11 | * | ||
12 | * This program is free software; you can redistribute it and/or modify | ||
13 | * it under the terms of the GNU General Public License version 2 as | ||
14 | * published by the Free Software Foundation. | ||
15 | * | ||
16 | */ | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/spinlock.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/notifier.h> | ||
23 | #include <linux/netdevice.h> | ||
24 | #include <linux/netfilter.h> | ||
25 | #include <linux/proc_fs.h> | ||
26 | #include <linux/netfilter_ipv4.h> | ||
27 | #include <linux/netfilter_ipv6.h> | ||
28 | #include <linux/netfilter/nfnetlink.h> | ||
29 | #include <linux/netfilter/nfnetlink_queue.h> | ||
30 | #include <linux/list.h> | ||
31 | #include <net/sock.h> | ||
32 | #include <net/netfilter/nf_queue.h> | ||
33 | |||
34 | #include <linux/atomic.h> | ||
35 | |||
36 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
37 | #include "../bridge/br_private.h" | ||
38 | #endif | ||
39 | |||
40 | #define NFQNL_QMAX_DEFAULT 1024 | ||
41 | |||
42 | struct nfqnl_instance { | ||
43 | struct hlist_node hlist; /* global list of queues */ | ||
44 | struct rcu_head rcu; | ||
45 | |||
46 | int peer_pid; | ||
47 | unsigned int queue_maxlen; | ||
48 | unsigned int copy_range; | ||
49 | unsigned int queue_dropped; | ||
50 | unsigned int queue_user_dropped; | ||
51 | |||
52 | |||
53 | u_int16_t queue_num; /* number of this queue */ | ||
54 | u_int8_t copy_mode; | ||
55 | /* | ||
56 | * Following fields are dirtied for each queued packet, | ||
57 | * keep them in same cache line if possible. | ||
58 | */ | ||
59 | spinlock_t lock; | ||
60 | unsigned int queue_total; | ||
61 | unsigned int id_sequence; /* 'sequence' of pkt ids */ | ||
62 | struct list_head queue_list; /* packets in queue */ | ||
63 | }; | ||
64 | |||
65 | typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); | ||
66 | |||
67 | static DEFINE_SPINLOCK(instances_lock); | ||
68 | |||
69 | #define INSTANCE_BUCKETS 16 | ||
70 | static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; | ||
71 | |||
72 | static inline u_int8_t instance_hashfn(u_int16_t queue_num) | ||
73 | { | ||
74 | return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; | ||
75 | } | ||
76 | |||
77 | static struct nfqnl_instance * | ||
78 | instance_lookup(u_int16_t queue_num) | ||
79 | { | ||
80 | struct hlist_head *head; | ||
81 | struct hlist_node *pos; | ||
82 | struct nfqnl_instance *inst; | ||
83 | |||
84 | head = &instance_table[instance_hashfn(queue_num)]; | ||
85 | hlist_for_each_entry_rcu(inst, pos, head, hlist) { | ||
86 | if (inst->queue_num == queue_num) | ||
87 | return inst; | ||
88 | } | ||
89 | return NULL; | ||
90 | } | ||
91 | |||
92 | static struct nfqnl_instance * | ||
93 | instance_create(u_int16_t queue_num, int pid) | ||
94 | { | ||
95 | struct nfqnl_instance *inst; | ||
96 | unsigned int h; | ||
97 | int err; | ||
98 | |||
99 | spin_lock(&instances_lock); | ||
100 | if (instance_lookup(queue_num)) { | ||
101 | err = -EEXIST; | ||
102 | goto out_unlock; | ||
103 | } | ||
104 | |||
105 | inst = kzalloc(sizeof(*inst), GFP_ATOMIC); | ||
106 | if (!inst) { | ||
107 | err = -ENOMEM; | ||
108 | goto out_unlock; | ||
109 | } | ||
110 | |||
111 | inst->queue_num = queue_num; | ||
112 | inst->peer_pid = pid; | ||
113 | inst->queue_maxlen = NFQNL_QMAX_DEFAULT; | ||
114 | inst->copy_range = 0xfffff; | ||
115 | inst->copy_mode = NFQNL_COPY_NONE; | ||
116 | spin_lock_init(&inst->lock); | ||
117 | INIT_LIST_HEAD(&inst->queue_list); | ||
118 | |||
119 | if (!try_module_get(THIS_MODULE)) { | ||
120 | err = -EAGAIN; | ||
121 | goto out_free; | ||
122 | } | ||
123 | |||
124 | h = instance_hashfn(queue_num); | ||
125 | hlist_add_head_rcu(&inst->hlist, &instance_table[h]); | ||
126 | |||
127 | spin_unlock(&instances_lock); | ||
128 | |||
129 | return inst; | ||
130 | |||
131 | out_free: | ||
132 | kfree(inst); | ||
133 | out_unlock: | ||
134 | spin_unlock(&instances_lock); | ||
135 | return ERR_PTR(err); | ||
136 | } | ||
137 | |||
138 | static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, | ||
139 | unsigned long data); | ||
140 | |||
141 | static void | ||
142 | instance_destroy_rcu(struct rcu_head *head) | ||
143 | { | ||
144 | struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance, | ||
145 | rcu); | ||
146 | |||
147 | nfqnl_flush(inst, NULL, 0); | ||
148 | kfree(inst); | ||
149 | module_put(THIS_MODULE); | ||
150 | } | ||
151 | |||
152 | static void | ||
153 | __instance_destroy(struct nfqnl_instance *inst) | ||
154 | { | ||
155 | hlist_del_rcu(&inst->hlist); | ||
156 | call_rcu(&inst->rcu, instance_destroy_rcu); | ||
157 | } | ||
158 | |||
159 | static void | ||
160 | instance_destroy(struct nfqnl_instance *inst) | ||
161 | { | ||
162 | spin_lock(&instances_lock); | ||
163 | __instance_destroy(inst); | ||
164 | spin_unlock(&instances_lock); | ||
165 | } | ||
166 | |||
167 | static inline void | ||
168 | __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) | ||
169 | { | ||
170 | list_add_tail(&entry->list, &queue->queue_list); | ||
171 | queue->queue_total++; | ||
172 | } | ||
173 | |||
174 | static void | ||
175 | __dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) | ||
176 | { | ||
177 | list_del(&entry->list); | ||
178 | queue->queue_total--; | ||
179 | } | ||
180 | |||
181 | static struct nf_queue_entry * | ||
182 | find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) | ||
183 | { | ||
184 | struct nf_queue_entry *entry = NULL, *i; | ||
185 | |||
186 | spin_lock_bh(&queue->lock); | ||
187 | |||
188 | list_for_each_entry(i, &queue->queue_list, list) { | ||
189 | if (i->id == id) { | ||
190 | entry = i; | ||
191 | break; | ||
192 | } | ||
193 | } | ||
194 | |||
195 | if (entry) | ||
196 | __dequeue_entry(queue, entry); | ||
197 | |||
198 | spin_unlock_bh(&queue->lock); | ||
199 | |||
200 | return entry; | ||
201 | } | ||
202 | |||
203 | static void | ||
204 | nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) | ||
205 | { | ||
206 | struct nf_queue_entry *entry, *next; | ||
207 | |||
208 | spin_lock_bh(&queue->lock); | ||
209 | list_for_each_entry_safe(entry, next, &queue->queue_list, list) { | ||
210 | if (!cmpfn || cmpfn(entry, data)) { | ||
211 | list_del(&entry->list); | ||
212 | queue->queue_total--; | ||
213 | nf_reinject(entry, NF_DROP); | ||
214 | } | ||
215 | } | ||
216 | spin_unlock_bh(&queue->lock); | ||
217 | } | ||
218 | |||
219 | static struct sk_buff * | ||
220 | nfqnl_build_packet_message(struct nfqnl_instance *queue, | ||
221 | struct nf_queue_entry *entry, | ||
222 | __be32 **packet_id_ptr) | ||
223 | { | ||
224 | sk_buff_data_t old_tail; | ||
225 | size_t size; | ||
226 | size_t data_len = 0; | ||
227 | struct sk_buff *skb; | ||
228 | struct nlattr *nla; | ||
229 | struct nfqnl_msg_packet_hdr *pmsg; | ||
230 | struct nlmsghdr *nlh; | ||
231 | struct nfgenmsg *nfmsg; | ||
232 | struct sk_buff *entskb = entry->skb; | ||
233 | struct net_device *indev; | ||
234 | struct net_device *outdev; | ||
235 | |||
236 | size = NLMSG_SPACE(sizeof(struct nfgenmsg)) | ||
237 | + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) | ||
238 | + nla_total_size(sizeof(u_int32_t)) /* ifindex */ | ||
239 | + nla_total_size(sizeof(u_int32_t)) /* ifindex */ | ||
240 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
241 | + nla_total_size(sizeof(u_int32_t)) /* ifindex */ | ||
242 | + nla_total_size(sizeof(u_int32_t)) /* ifindex */ | ||
243 | #endif | ||
244 | + nla_total_size(sizeof(u_int32_t)) /* mark */ | ||
245 | + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) | ||
246 | + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); | ||
247 | |||
248 | outdev = entry->outdev; | ||
249 | |||
250 | switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { | ||
251 | case NFQNL_COPY_META: | ||
252 | case NFQNL_COPY_NONE: | ||
253 | break; | ||
254 | |||
255 | case NFQNL_COPY_PACKET: | ||
256 | if (entskb->ip_summed == CHECKSUM_PARTIAL && | ||
257 | skb_checksum_help(entskb)) | ||
258 | return NULL; | ||
259 | |||
260 | data_len = ACCESS_ONCE(queue->copy_range); | ||
261 | if (data_len == 0 || data_len > entskb->len) | ||
262 | data_len = entskb->len; | ||
263 | |||
264 | size += nla_total_size(data_len); | ||
265 | break; | ||
266 | } | ||
267 | |||
268 | |||
269 | skb = alloc_skb(size, GFP_ATOMIC); | ||
270 | if (!skb) | ||
271 | goto nlmsg_failure; | ||
272 | |||
273 | old_tail = skb->tail; | ||
274 | nlh = NLMSG_PUT(skb, 0, 0, | ||
275 | NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, | ||
276 | sizeof(struct nfgenmsg)); | ||
277 | nfmsg = NLMSG_DATA(nlh); | ||
278 | nfmsg->nfgen_family = entry->pf; | ||
279 | nfmsg->version = NFNETLINK_V0; | ||
280 | nfmsg->res_id = htons(queue->queue_num); | ||
281 | |||
282 | nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg)); | ||
283 | pmsg = nla_data(nla); | ||
284 | pmsg->hw_protocol = entskb->protocol; | ||
285 | pmsg->hook = entry->hook; | ||
286 | *packet_id_ptr = &pmsg->packet_id; | ||
287 | |||
288 | indev = entry->indev; | ||
289 | if (indev) { | ||
290 | #ifndef CONFIG_BRIDGE_NETFILTER | ||
291 | NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)); | ||
292 | #else | ||
293 | if (entry->pf == PF_BRIDGE) { | ||
294 | /* Case 1: indev is physical input device, we need to | ||
295 | * look for bridge group (when called from | ||
296 | * netfilter_bridge) */ | ||
297 | NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, | ||
298 | htonl(indev->ifindex)); | ||
299 | /* this is the bridge group "brX" */ | ||
300 | /* rcu_read_lock()ed by __nf_queue */ | ||
301 | NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, | ||
302 | htonl(br_port_get_rcu(indev)->br->dev->ifindex)); | ||
303 | } else { | ||
304 | /* Case 2: indev is bridge group, we need to look for | ||
305 | * physical device (when called from ipv4) */ | ||
306 | NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, | ||
307 | htonl(indev->ifindex)); | ||
308 | if (entskb->nf_bridge && entskb->nf_bridge->physindev) | ||
309 | NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, | ||
310 | htonl(entskb->nf_bridge->physindev->ifindex)); | ||
311 | } | ||
312 | #endif | ||
313 | } | ||
314 | |||
315 | if (outdev) { | ||
316 | #ifndef CONFIG_BRIDGE_NETFILTER | ||
317 | NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)); | ||
318 | #else | ||
319 | if (entry->pf == PF_BRIDGE) { | ||
320 | /* Case 1: outdev is physical output device, we need to | ||
321 | * look for bridge group (when called from | ||
322 | * netfilter_bridge) */ | ||
323 | NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, | ||
324 | htonl(outdev->ifindex)); | ||
325 | /* this is the bridge group "brX" */ | ||
326 | /* rcu_read_lock()ed by __nf_queue */ | ||
327 | NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, | ||
328 | htonl(br_port_get_rcu(outdev)->br->dev->ifindex)); | ||
329 | } else { | ||
330 | /* Case 2: outdev is bridge group, we need to look for | ||
331 | * physical output device (when called from ipv4) */ | ||
332 | NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, | ||
333 | htonl(outdev->ifindex)); | ||
334 | if (entskb->nf_bridge && entskb->nf_bridge->physoutdev) | ||
335 | NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, | ||
336 | htonl(entskb->nf_bridge->physoutdev->ifindex)); | ||
337 | } | ||
338 | #endif | ||
339 | } | ||
340 | |||
341 | if (entskb->mark) | ||
342 | NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark)); | ||
343 | |||
344 | if (indev && entskb->dev && | ||
345 | entskb->mac_header != entskb->network_header) { | ||
346 | struct nfqnl_msg_packet_hw phw; | ||
347 | int len = dev_parse_header(entskb, phw.hw_addr); | ||
348 | if (len) { | ||
349 | phw.hw_addrlen = htons(len); | ||
350 | NLA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); | ||
351 | } | ||
352 | } | ||
353 | |||
354 | if (entskb->tstamp.tv64) { | ||
355 | struct nfqnl_msg_packet_timestamp ts; | ||
356 | struct timeval tv = ktime_to_timeval(entskb->tstamp); | ||
357 | ts.sec = cpu_to_be64(tv.tv_sec); | ||
358 | ts.usec = cpu_to_be64(tv.tv_usec); | ||
359 | |||
360 | NLA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); | ||
361 | } | ||
362 | |||
363 | if (data_len) { | ||
364 | struct nlattr *nla; | ||
365 | int sz = nla_attr_size(data_len); | ||
366 | |||
367 | if (skb_tailroom(skb) < nla_total_size(data_len)) { | ||
368 | printk(KERN_WARNING "nf_queue: no tailroom!\n"); | ||
369 | goto nlmsg_failure; | ||
370 | } | ||
371 | |||
372 | nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); | ||
373 | nla->nla_type = NFQA_PAYLOAD; | ||
374 | nla->nla_len = sz; | ||
375 | |||
376 | if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) | ||
377 | BUG(); | ||
378 | } | ||
379 | |||
380 | nlh->nlmsg_len = skb->tail - old_tail; | ||
381 | return skb; | ||
382 | |||
383 | nlmsg_failure: | ||
384 | nla_put_failure: | ||
385 | if (skb) | ||
386 | kfree_skb(skb); | ||
387 | if (net_ratelimit()) | ||
388 | printk(KERN_ERR "nf_queue: error creating packet message\n"); | ||
389 | return NULL; | ||
390 | } | ||
391 | |||
392 | static int | ||
393 | nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | ||
394 | { | ||
395 | struct sk_buff *nskb; | ||
396 | struct nfqnl_instance *queue; | ||
397 | int err = -ENOBUFS; | ||
398 | __be32 *packet_id_ptr; | ||
399 | |||
400 | /* rcu_read_lock()ed by nf_hook_slow() */ | ||
401 | queue = instance_lookup(queuenum); | ||
402 | if (!queue) { | ||
403 | err = -ESRCH; | ||
404 | goto err_out; | ||
405 | } | ||
406 | |||
407 | if (queue->copy_mode == NFQNL_COPY_NONE) { | ||
408 | err = -EINVAL; | ||
409 | goto err_out; | ||
410 | } | ||
411 | |||
412 | nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); | ||
413 | if (nskb == NULL) { | ||
414 | err = -ENOMEM; | ||
415 | goto err_out; | ||
416 | } | ||
417 | spin_lock_bh(&queue->lock); | ||
418 | |||
419 | if (!queue->peer_pid) { | ||
420 | err = -EINVAL; | ||
421 | goto err_out_free_nskb; | ||
422 | } | ||
423 | if (queue->queue_total >= queue->queue_maxlen) { | ||
424 | queue->queue_dropped++; | ||
425 | if (net_ratelimit()) | ||
426 | printk(KERN_WARNING "nf_queue: full at %d entries, " | ||
427 | "dropping packets(s).\n", | ||
428 | queue->queue_total); | ||
429 | goto err_out_free_nskb; | ||
430 | } | ||
431 | entry->id = ++queue->id_sequence; | ||
432 | *packet_id_ptr = htonl(entry->id); | ||
433 | |||
434 | /* nfnetlink_unicast will either free the nskb or add it to a socket */ | ||
435 | err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); | ||
436 | if (err < 0) { | ||
437 | queue->queue_user_dropped++; | ||
438 | goto err_out_unlock; | ||
439 | } | ||
440 | |||
441 | __enqueue_entry(queue, entry); | ||
442 | |||
443 | spin_unlock_bh(&queue->lock); | ||
444 | return 0; | ||
445 | |||
446 | err_out_free_nskb: | ||
447 | kfree_skb(nskb); | ||
448 | err_out_unlock: | ||
449 | spin_unlock_bh(&queue->lock); | ||
450 | err_out: | ||
451 | return err; | ||
452 | } | ||
453 | |||
454 | static int | ||
455 | nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e) | ||
456 | { | ||
457 | struct sk_buff *nskb; | ||
458 | int diff; | ||
459 | |||
460 | diff = data_len - e->skb->len; | ||
461 | if (diff < 0) { | ||
462 | if (pskb_trim(e->skb, data_len)) | ||
463 | return -ENOMEM; | ||
464 | } else if (diff > 0) { | ||
465 | if (data_len > 0xFFFF) | ||
466 | return -EINVAL; | ||
467 | if (diff > skb_tailroom(e->skb)) { | ||
468 | nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), | ||
469 | diff, GFP_ATOMIC); | ||
470 | if (!nskb) { | ||
471 | printk(KERN_WARNING "nf_queue: OOM " | ||
472 | "in mangle, dropping packet\n"); | ||
473 | return -ENOMEM; | ||
474 | } | ||
475 | kfree_skb(e->skb); | ||
476 | e->skb = nskb; | ||
477 | } | ||
478 | skb_put(e->skb, diff); | ||
479 | } | ||
480 | if (!skb_make_writable(e->skb, data_len)) | ||
481 | return -ENOMEM; | ||
482 | skb_copy_to_linear_data(e->skb, data, data_len); | ||
483 | e->skb->ip_summed = CHECKSUM_NONE; | ||
484 | return 0; | ||
485 | } | ||
486 | |||
487 | static int | ||
488 | nfqnl_set_mode(struct nfqnl_instance *queue, | ||
489 | unsigned char mode, unsigned int range) | ||
490 | { | ||
491 | int status = 0; | ||
492 | |||
493 | spin_lock_bh(&queue->lock); | ||
494 | switch (mode) { | ||
495 | case NFQNL_COPY_NONE: | ||
496 | case NFQNL_COPY_META: | ||
497 | queue->copy_mode = mode; | ||
498 | queue->copy_range = 0; | ||
499 | break; | ||
500 | |||
501 | case NFQNL_COPY_PACKET: | ||
502 | queue->copy_mode = mode; | ||
503 | /* we're using struct nlattr which has 16bit nla_len */ | ||
504 | if (range > 0xffff) | ||
505 | queue->copy_range = 0xffff; | ||
506 | else | ||
507 | queue->copy_range = range; | ||
508 | break; | ||
509 | |||
510 | default: | ||
511 | status = -EINVAL; | ||
512 | |||
513 | } | ||
514 | spin_unlock_bh(&queue->lock); | ||
515 | |||
516 | return status; | ||
517 | } | ||
518 | |||
519 | static int | ||
520 | dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) | ||
521 | { | ||
522 | if (entry->indev) | ||
523 | if (entry->indev->ifindex == ifindex) | ||
524 | return 1; | ||
525 | if (entry->outdev) | ||
526 | if (entry->outdev->ifindex == ifindex) | ||
527 | return 1; | ||
528 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
529 | if (entry->skb->nf_bridge) { | ||
530 | if (entry->skb->nf_bridge->physindev && | ||
531 | entry->skb->nf_bridge->physindev->ifindex == ifindex) | ||
532 | return 1; | ||
533 | if (entry->skb->nf_bridge->physoutdev && | ||
534 | entry->skb->nf_bridge->physoutdev->ifindex == ifindex) | ||
535 | return 1; | ||
536 | } | ||
537 | #endif | ||
538 | return 0; | ||
539 | } | ||
540 | |||
541 | /* drop all packets with either indev or outdev == ifindex from all queue | ||
542 | * instances */ | ||
543 | static void | ||
544 | nfqnl_dev_drop(int ifindex) | ||
545 | { | ||
546 | int i; | ||
547 | |||
548 | rcu_read_lock(); | ||
549 | |||
550 | for (i = 0; i < INSTANCE_BUCKETS; i++) { | ||
551 | struct hlist_node *tmp; | ||
552 | struct nfqnl_instance *inst; | ||
553 | struct hlist_head *head = &instance_table[i]; | ||
554 | |||
555 | hlist_for_each_entry_rcu(inst, tmp, head, hlist) | ||
556 | nfqnl_flush(inst, dev_cmp, ifindex); | ||
557 | } | ||
558 | |||
559 | rcu_read_unlock(); | ||
560 | } | ||
561 | |||
562 | #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) | ||
563 | |||
564 | static int | ||
565 | nfqnl_rcv_dev_event(struct notifier_block *this, | ||
566 | unsigned long event, void *ptr) | ||
567 | { | ||
568 | struct net_device *dev = ptr; | ||
569 | |||
570 | if (!net_eq(dev_net(dev), &init_net)) | ||
571 | return NOTIFY_DONE; | ||
572 | |||
573 | /* Drop any packets associated with the downed device */ | ||
574 | if (event == NETDEV_DOWN) | ||
575 | nfqnl_dev_drop(dev->ifindex); | ||
576 | return NOTIFY_DONE; | ||
577 | } | ||
578 | |||
579 | static struct notifier_block nfqnl_dev_notifier = { | ||
580 | .notifier_call = nfqnl_rcv_dev_event, | ||
581 | }; | ||
582 | |||
583 | static int | ||
584 | nfqnl_rcv_nl_event(struct notifier_block *this, | ||
585 | unsigned long event, void *ptr) | ||
586 | { | ||
587 | struct netlink_notify *n = ptr; | ||
588 | |||
589 | if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { | ||
590 | int i; | ||
591 | |||
592 | /* destroy all instances for this pid */ | ||
593 | spin_lock(&instances_lock); | ||
594 | for (i = 0; i < INSTANCE_BUCKETS; i++) { | ||
595 | struct hlist_node *tmp, *t2; | ||
596 | struct nfqnl_instance *inst; | ||
597 | struct hlist_head *head = &instance_table[i]; | ||
598 | |||
599 | hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { | ||
600 | if ((n->net == &init_net) && | ||
601 | (n->pid == inst->peer_pid)) | ||
602 | __instance_destroy(inst); | ||
603 | } | ||
604 | } | ||
605 | spin_unlock(&instances_lock); | ||
606 | } | ||
607 | return NOTIFY_DONE; | ||
608 | } | ||
609 | |||
610 | static struct notifier_block nfqnl_rtnl_notifier = { | ||
611 | .notifier_call = nfqnl_rcv_nl_event, | ||
612 | }; | ||
613 | |||
614 | static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { | ||
615 | [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, | ||
616 | [NFQA_MARK] = { .type = NLA_U32 }, | ||
617 | [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, | ||
618 | }; | ||
619 | |||
620 | static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { | ||
621 | [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, | ||
622 | [NFQA_MARK] = { .type = NLA_U32 }, | ||
623 | }; | ||
624 | |||
625 | static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) | ||
626 | { | ||
627 | struct nfqnl_instance *queue; | ||
628 | |||
629 | queue = instance_lookup(queue_num); | ||
630 | if (!queue) | ||
631 | return ERR_PTR(-ENODEV); | ||
632 | |||
633 | if (queue->peer_pid != nlpid) | ||
634 | return ERR_PTR(-EPERM); | ||
635 | |||
636 | return queue; | ||
637 | } | ||
638 | |||
639 | static struct nfqnl_msg_verdict_hdr* | ||
640 | verdicthdr_get(const struct nlattr * const nfqa[]) | ||
641 | { | ||
642 | struct nfqnl_msg_verdict_hdr *vhdr; | ||
643 | unsigned int verdict; | ||
644 | |||
645 | if (!nfqa[NFQA_VERDICT_HDR]) | ||
646 | return NULL; | ||
647 | |||
648 | vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); | ||
649 | verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK; | ||
650 | if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN) | ||
651 | return NULL; | ||
652 | return vhdr; | ||
653 | } | ||
654 | |||
655 | static int nfq_id_after(unsigned int id, unsigned int max) | ||
656 | { | ||
657 | return (int)(id - max) > 0; | ||
658 | } | ||
659 | |||
660 | static int | ||
661 | nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, | ||
662 | const struct nlmsghdr *nlh, | ||
663 | const struct nlattr * const nfqa[]) | ||
664 | { | ||
665 | struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); | ||
666 | struct nf_queue_entry *entry, *tmp; | ||
667 | unsigned int verdict, maxid; | ||
668 | struct nfqnl_msg_verdict_hdr *vhdr; | ||
669 | struct nfqnl_instance *queue; | ||
670 | LIST_HEAD(batch_list); | ||
671 | u16 queue_num = ntohs(nfmsg->res_id); | ||
672 | |||
673 | queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); | ||
674 | if (IS_ERR(queue)) | ||
675 | return PTR_ERR(queue); | ||
676 | |||
677 | vhdr = verdicthdr_get(nfqa); | ||
678 | if (!vhdr) | ||
679 | return -EINVAL; | ||
680 | |||
681 | verdict = ntohl(vhdr->verdict); | ||
682 | maxid = ntohl(vhdr->id); | ||
683 | |||
684 | spin_lock_bh(&queue->lock); | ||
685 | |||
686 | list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) { | ||
687 | if (nfq_id_after(entry->id, maxid)) | ||
688 | break; | ||
689 | __dequeue_entry(queue, entry); | ||
690 | list_add_tail(&entry->list, &batch_list); | ||
691 | } | ||
692 | |||
693 | spin_unlock_bh(&queue->lock); | ||
694 | |||
695 | if (list_empty(&batch_list)) | ||
696 | return -ENOENT; | ||
697 | |||
698 | list_for_each_entry_safe(entry, tmp, &batch_list, list) { | ||
699 | if (nfqa[NFQA_MARK]) | ||
700 | entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); | ||
701 | nf_reinject(entry, verdict); | ||
702 | } | ||
703 | return 0; | ||
704 | } | ||
705 | |||
706 | static int | ||
707 | nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, | ||
708 | const struct nlmsghdr *nlh, | ||
709 | const struct nlattr * const nfqa[]) | ||
710 | { | ||
711 | struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); | ||
712 | u_int16_t queue_num = ntohs(nfmsg->res_id); | ||
713 | |||
714 | struct nfqnl_msg_verdict_hdr *vhdr; | ||
715 | struct nfqnl_instance *queue; | ||
716 | unsigned int verdict; | ||
717 | struct nf_queue_entry *entry; | ||
718 | |||
719 | queue = instance_lookup(queue_num); | ||
720 | if (!queue) | ||
721 | |||
722 | queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); | ||
723 | if (IS_ERR(queue)) | ||
724 | return PTR_ERR(queue); | ||
725 | |||
726 | vhdr = verdicthdr_get(nfqa); | ||
727 | if (!vhdr) | ||
728 | return -EINVAL; | ||
729 | |||
730 | verdict = ntohl(vhdr->verdict); | ||
731 | |||
732 | entry = find_dequeue_entry(queue, ntohl(vhdr->id)); | ||
733 | if (entry == NULL) | ||
734 | return -ENOENT; | ||
735 | |||
736 | if (nfqa[NFQA_PAYLOAD]) { | ||
737 | if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), | ||
738 | nla_len(nfqa[NFQA_PAYLOAD]), entry) < 0) | ||
739 | verdict = NF_DROP; | ||
740 | } | ||
741 | |||
742 | if (nfqa[NFQA_MARK]) | ||
743 | entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); | ||
744 | |||
745 | nf_reinject(entry, verdict); | ||
746 | return 0; | ||
747 | } | ||
748 | |||
749 | static int | ||
750 | nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, | ||
751 | const struct nlmsghdr *nlh, | ||
752 | const struct nlattr * const nfqa[]) | ||
753 | { | ||
754 | return -ENOTSUPP; | ||
755 | } | ||
756 | |||
757 | static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { | ||
758 | [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, | ||
759 | [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, | ||
760 | }; | ||
761 | |||
762 | static const struct nf_queue_handler nfqh = { | ||
763 | .name = "nf_queue", | ||
764 | .outfn = &nfqnl_enqueue_packet, | ||
765 | }; | ||
766 | |||
767 | static int | ||
768 | nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, | ||
769 | const struct nlmsghdr *nlh, | ||
770 | const struct nlattr * const nfqa[]) | ||
771 | { | ||
772 | struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); | ||
773 | u_int16_t queue_num = ntohs(nfmsg->res_id); | ||
774 | struct nfqnl_instance *queue; | ||
775 | struct nfqnl_msg_config_cmd *cmd = NULL; | ||
776 | int ret = 0; | ||
777 | |||
778 | if (nfqa[NFQA_CFG_CMD]) { | ||
779 | cmd = nla_data(nfqa[NFQA_CFG_CMD]); | ||
780 | |||
781 | /* Commands without queue context - might sleep */ | ||
782 | switch (cmd->command) { | ||
783 | case NFQNL_CFG_CMD_PF_BIND: | ||
784 | return nf_register_queue_handler(ntohs(cmd->pf), | ||
785 | &nfqh); | ||
786 | case NFQNL_CFG_CMD_PF_UNBIND: | ||
787 | return nf_unregister_queue_handler(ntohs(cmd->pf), | ||
788 | &nfqh); | ||
789 | } | ||
790 | } | ||
791 | |||
792 | rcu_read_lock(); | ||
793 | queue = instance_lookup(queue_num); | ||
794 | if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { | ||
795 | ret = -EPERM; | ||
796 | goto err_out_unlock; | ||
797 | } | ||
798 | |||
799 | if (cmd != NULL) { | ||
800 | switch (cmd->command) { | ||
801 | case NFQNL_CFG_CMD_BIND: | ||
802 | if (queue) { | ||
803 | ret = -EBUSY; | ||
804 | goto err_out_unlock; | ||
805 | } | ||
806 | queue = instance_create(queue_num, NETLINK_CB(skb).pid); | ||
807 | if (IS_ERR(queue)) { | ||
808 | ret = PTR_ERR(queue); | ||
809 | goto err_out_unlock; | ||
810 | } | ||
811 | break; | ||
812 | case NFQNL_CFG_CMD_UNBIND: | ||
813 | if (!queue) { | ||
814 | ret = -ENODEV; | ||
815 | goto err_out_unlock; | ||
816 | } | ||
817 | instance_destroy(queue); | ||
818 | break; | ||
819 | case NFQNL_CFG_CMD_PF_BIND: | ||
820 | case NFQNL_CFG_CMD_PF_UNBIND: | ||
821 | break; | ||
822 | default: | ||
823 | ret = -ENOTSUPP; | ||
824 | break; | ||
825 | } | ||
826 | } | ||
827 | |||
828 | if (nfqa[NFQA_CFG_PARAMS]) { | ||
829 | struct nfqnl_msg_config_params *params; | ||
830 | |||
831 | if (!queue) { | ||
832 | ret = -ENODEV; | ||
833 | goto err_out_unlock; | ||
834 | } | ||
835 | params = nla_data(nfqa[NFQA_CFG_PARAMS]); | ||
836 | nfqnl_set_mode(queue, params->copy_mode, | ||
837 | ntohl(params->copy_range)); | ||
838 | } | ||
839 | |||
840 | if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { | ||
841 | __be32 *queue_maxlen; | ||
842 | |||
843 | if (!queue) { | ||
844 | ret = -ENODEV; | ||
845 | goto err_out_unlock; | ||
846 | } | ||
847 | queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); | ||
848 | spin_lock_bh(&queue->lock); | ||
849 | queue->queue_maxlen = ntohl(*queue_maxlen); | ||
850 | spin_unlock_bh(&queue->lock); | ||
851 | } | ||
852 | |||
853 | err_out_unlock: | ||
854 | rcu_read_unlock(); | ||
855 | return ret; | ||
856 | } | ||
857 | |||
858 | static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { | ||
859 | [NFQNL_MSG_PACKET] = { .call_rcu = nfqnl_recv_unsupp, | ||
860 | .attr_count = NFQA_MAX, }, | ||
861 | [NFQNL_MSG_VERDICT] = { .call_rcu = nfqnl_recv_verdict, | ||
862 | .attr_count = NFQA_MAX, | ||
863 | .policy = nfqa_verdict_policy }, | ||
864 | [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, | ||
865 | .attr_count = NFQA_CFG_MAX, | ||
866 | .policy = nfqa_cfg_policy }, | ||
867 | [NFQNL_MSG_VERDICT_BATCH]={ .call_rcu = nfqnl_recv_verdict_batch, | ||
868 | .attr_count = NFQA_MAX, | ||
869 | .policy = nfqa_verdict_batch_policy }, | ||
870 | }; | ||
871 | |||
872 | static const struct nfnetlink_subsystem nfqnl_subsys = { | ||
873 | .name = "nf_queue", | ||
874 | .subsys_id = NFNL_SUBSYS_QUEUE, | ||
875 | .cb_count = NFQNL_MSG_MAX, | ||
876 | .cb = nfqnl_cb, | ||
877 | }; | ||
878 | |||
879 | #ifdef CONFIG_PROC_FS | ||
880 | struct iter_state { | ||
881 | unsigned int bucket; | ||
882 | }; | ||
883 | |||
884 | static struct hlist_node *get_first(struct seq_file *seq) | ||
885 | { | ||
886 | struct iter_state *st = seq->private; | ||
887 | |||
888 | if (!st) | ||
889 | return NULL; | ||
890 | |||
891 | for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { | ||
892 | if (!hlist_empty(&instance_table[st->bucket])) | ||
893 | return instance_table[st->bucket].first; | ||
894 | } | ||
895 | return NULL; | ||
896 | } | ||
897 | |||
898 | static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) | ||
899 | { | ||
900 | struct iter_state *st = seq->private; | ||
901 | |||
902 | h = h->next; | ||
903 | while (!h) { | ||
904 | if (++st->bucket >= INSTANCE_BUCKETS) | ||
905 | return NULL; | ||
906 | |||
907 | h = instance_table[st->bucket].first; | ||
908 | } | ||
909 | return h; | ||
910 | } | ||
911 | |||
912 | static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) | ||
913 | { | ||
914 | struct hlist_node *head; | ||
915 | head = get_first(seq); | ||
916 | |||
917 | if (head) | ||
918 | while (pos && (head = get_next(seq, head))) | ||
919 | pos--; | ||
920 | return pos ? NULL : head; | ||
921 | } | ||
922 | |||
923 | static void *seq_start(struct seq_file *seq, loff_t *pos) | ||
924 | __acquires(instances_lock) | ||
925 | { | ||
926 | spin_lock(&instances_lock); | ||
927 | return get_idx(seq, *pos); | ||
928 | } | ||
929 | |||
930 | static void *seq_next(struct seq_file *s, void *v, loff_t *pos) | ||
931 | { | ||
932 | (*pos)++; | ||
933 | return get_next(s, v); | ||
934 | } | ||
935 | |||
936 | static void seq_stop(struct seq_file *s, void *v) | ||
937 | __releases(instances_lock) | ||
938 | { | ||
939 | spin_unlock(&instances_lock); | ||
940 | } | ||
941 | |||
942 | static int seq_show(struct seq_file *s, void *v) | ||
943 | { | ||
944 | const struct nfqnl_instance *inst = v; | ||
945 | |||
946 | return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", | ||
947 | inst->queue_num, | ||
948 | inst->peer_pid, inst->queue_total, | ||
949 | inst->copy_mode, inst->copy_range, | ||
950 | inst->queue_dropped, inst->queue_user_dropped, | ||
951 | inst->id_sequence, 1); | ||
952 | } | ||
953 | |||
954 | static const struct seq_operations nfqnl_seq_ops = { | ||
955 | .start = seq_start, | ||
956 | .next = seq_next, | ||
957 | .stop = seq_stop, | ||
958 | .show = seq_show, | ||
959 | }; | ||
960 | |||
961 | static int nfqnl_open(struct inode *inode, struct file *file) | ||
962 | { | ||
963 | return seq_open_private(file, &nfqnl_seq_ops, | ||
964 | sizeof(struct iter_state)); | ||
965 | } | ||
966 | |||
967 | static const struct file_operations nfqnl_file_ops = { | ||
968 | .owner = THIS_MODULE, | ||
969 | .open = nfqnl_open, | ||
970 | .read = seq_read, | ||
971 | .llseek = seq_lseek, | ||
972 | .release = seq_release_private, | ||
973 | }; | ||
974 | |||
975 | #endif /* PROC_FS */ | ||
976 | |||
977 | static int __init nfnetlink_queue_init(void) | ||
978 | { | ||
979 | int i, status = -ENOMEM; | ||
980 | |||
981 | for (i = 0; i < INSTANCE_BUCKETS; i++) | ||
982 | INIT_HLIST_HEAD(&instance_table[i]); | ||
983 | |||
984 | netlink_register_notifier(&nfqnl_rtnl_notifier); | ||
985 | status = nfnetlink_subsys_register(&nfqnl_subsys); | ||
986 | if (status < 0) { | ||
987 | printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); | ||
988 | goto cleanup_netlink_notifier; | ||
989 | } | ||
990 | |||
991 | #ifdef CONFIG_PROC_FS | ||
992 | if (!proc_create("nfnetlink_queue", 0440, | ||
993 | proc_net_netfilter, &nfqnl_file_ops)) | ||
994 | goto cleanup_subsys; | ||
995 | #endif | ||
996 | |||
997 | register_netdevice_notifier(&nfqnl_dev_notifier); | ||
998 | return status; | ||
999 | |||
1000 | #ifdef CONFIG_PROC_FS | ||
1001 | cleanup_subsys: | ||
1002 | nfnetlink_subsys_unregister(&nfqnl_subsys); | ||
1003 | #endif | ||
1004 | cleanup_netlink_notifier: | ||
1005 | netlink_unregister_notifier(&nfqnl_rtnl_notifier); | ||
1006 | return status; | ||
1007 | } | ||
1008 | |||
1009 | static void __exit nfnetlink_queue_fini(void) | ||
1010 | { | ||
1011 | nf_unregister_queue_handlers(&nfqh); | ||
1012 | unregister_netdevice_notifier(&nfqnl_dev_notifier); | ||
1013 | #ifdef CONFIG_PROC_FS | ||
1014 | remove_proc_entry("nfnetlink_queue", proc_net_netfilter); | ||
1015 | #endif | ||
1016 | nfnetlink_subsys_unregister(&nfqnl_subsys); | ||
1017 | netlink_unregister_notifier(&nfqnl_rtnl_notifier); | ||
1018 | |||
1019 | rcu_barrier(); /* Wait for completion of call_rcu()'s */ | ||
1020 | } | ||
1021 | |||
1022 | MODULE_DESCRIPTION("netfilter packet queue handler"); | ||
1023 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
1024 | MODULE_LICENSE("GPL"); | ||
1025 | MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE); | ||
1026 | |||
1027 | module_init(nfnetlink_queue_init); | ||
1028 | module_exit(nfnetlink_queue_fini); | ||
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c new file mode 100644 index 00000000000..9d782181b6c --- /dev/null +++ b/net/netfilter/xt_NOTRACK.c | |||
@@ -0,0 +1,53 @@ | |||
1 | /* This is a module which is used for setting up fake conntracks | ||
2 | * on packets so that they are not seen by the conntrack/NAT code. | ||
3 | */ | ||
4 | #include <linux/module.h> | ||
5 | #include <linux/skbuff.h> | ||
6 | |||
7 | #include <linux/netfilter/x_tables.h> | ||
8 | #include <net/netfilter/nf_conntrack.h> | ||
9 | |||
10 | MODULE_DESCRIPTION("Xtables: Disabling connection tracking for packets"); | ||
11 | MODULE_LICENSE("GPL"); | ||
12 | MODULE_ALIAS("ipt_NOTRACK"); | ||
13 | MODULE_ALIAS("ip6t_NOTRACK"); | ||
14 | |||
15 | static unsigned int | ||
16 | notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) | ||
17 | { | ||
18 | /* Previously seen (loopback)? Ignore. */ | ||
19 | if (skb->nfct != NULL) | ||
20 | return XT_CONTINUE; | ||
21 | |||
22 | /* Attach fake conntrack entry. | ||
23 | If there is a real ct entry correspondig to this packet, | ||
24 | it'll hang aroun till timing out. We don't deal with it | ||
25 | for performance reasons. JK */ | ||
26 | skb->nfct = &nf_ct_untracked_get()->ct_general; | ||
27 | skb->nfctinfo = IP_CT_NEW; | ||
28 | nf_conntrack_get(skb->nfct); | ||
29 | |||
30 | return XT_CONTINUE; | ||
31 | } | ||
32 | |||
33 | static struct xt_target notrack_tg_reg __read_mostly = { | ||
34 | .name = "NOTRACK", | ||
35 | .revision = 0, | ||
36 | .family = NFPROTO_UNSPEC, | ||
37 | .target = notrack_tg, | ||
38 | .table = "raw", | ||
39 | .me = THIS_MODULE, | ||
40 | }; | ||
41 | |||
42 | static int __init notrack_tg_init(void) | ||
43 | { | ||
44 | return xt_register_target(¬rack_tg_reg); | ||
45 | } | ||
46 | |||
47 | static void __exit notrack_tg_exit(void) | ||
48 | { | ||
49 | xt_unregister_target(¬rack_tg_reg); | ||
50 | } | ||
51 | |||
52 | module_init(notrack_tg_init); | ||
53 | module_exit(notrack_tg_exit); | ||
diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c new file mode 100644 index 00000000000..08086d680c2 --- /dev/null +++ b/net/netfilter/xt_qtaguid.c | |||
@@ -0,0 +1,2785 @@ | |||
1 | /* | ||
2 | * Kernel iptables module to track stats for packets based on user tags. | ||
3 | * | ||
4 | * (C) 2011 Google, Inc | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | /* | ||
12 | * There are run-time debug flags enabled via the debug_mask module param, or | ||
13 | * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h. | ||
14 | */ | ||
15 | #define DEBUG | ||
16 | |||
17 | #include <linux/file.h> | ||
18 | #include <linux/inetdevice.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/netfilter/x_tables.h> | ||
21 | #include <linux/netfilter/xt_qtaguid.h> | ||
22 | #include <linux/skbuff.h> | ||
23 | #include <linux/workqueue.h> | ||
24 | #include <net/addrconf.h> | ||
25 | #include <net/sock.h> | ||
26 | #include <net/tcp.h> | ||
27 | #include <net/udp.h> | ||
28 | |||
29 | #include <linux/netfilter/xt_socket.h> | ||
30 | #include "xt_qtaguid_internal.h" | ||
31 | #include "xt_qtaguid_print.h" | ||
32 | |||
33 | /* | ||
34 | * We only use the xt_socket funcs within a similar context to avoid unexpected | ||
35 | * return values. | ||
36 | */ | ||
37 | #define XT_SOCKET_SUPPORTED_HOOKS \ | ||
38 | ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) | ||
39 | |||
40 | |||
41 | static const char *module_procdirname = "xt_qtaguid"; | ||
42 | static struct proc_dir_entry *xt_qtaguid_procdir; | ||
43 | |||
44 | static unsigned int proc_iface_perms = S_IRUGO; | ||
45 | module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR); | ||
46 | |||
47 | static struct proc_dir_entry *xt_qtaguid_stats_file; | ||
48 | static unsigned int proc_stats_perms = S_IRUGO; | ||
49 | module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR); | ||
50 | |||
51 | static struct proc_dir_entry *xt_qtaguid_ctrl_file; | ||
52 | #ifdef CONFIG_ANDROID_PARANOID_NETWORK | ||
53 | static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO; | ||
54 | #else | ||
55 | static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR; | ||
56 | #endif | ||
57 | module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR); | ||
58 | |||
59 | #ifdef CONFIG_ANDROID_PARANOID_NETWORK | ||
60 | #include <linux/android_aid.h> | ||
61 | static gid_t proc_stats_readall_gid = AID_NET_BW_STATS; | ||
62 | static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT; | ||
63 | #else | ||
64 | /* 0 means, don't limit anybody */ | ||
65 | static gid_t proc_stats_readall_gid; | ||
66 | static gid_t proc_ctrl_write_gid; | ||
67 | #endif | ||
68 | module_param_named(stats_readall_gid, proc_stats_readall_gid, uint, | ||
69 | S_IRUGO | S_IWUSR); | ||
70 | module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint, | ||
71 | S_IRUGO | S_IWUSR); | ||
72 | |||
73 | /* | ||
74 | * Limit the number of active tags (via socket tags) for a given UID. | ||
75 | * Multiple processes could share the UID. | ||
76 | */ | ||
77 | static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS; | ||
78 | module_param(max_sock_tags, int, S_IRUGO | S_IWUSR); | ||
79 | |||
80 | /* | ||
81 | * After the kernel has initiallized this module, it is still possible | ||
82 | * to make it passive. | ||
83 | * Setting passive to Y: | ||
84 | * - the iface stats handling will not act on notifications. | ||
85 | * - iptables matches will never match. | ||
86 | * - ctrl commands silently succeed. | ||
87 | * - stats are always empty. | ||
88 | * This is mostly usefull when a bug is suspected. | ||
89 | */ | ||
90 | static bool module_passive; | ||
91 | module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR); | ||
92 | |||
93 | /* | ||
94 | * Control how qtaguid data is tracked per proc/uid. | ||
95 | * Setting tag_tracking_passive to Y: | ||
96 | * - don't create proc specific structs to track tags | ||
97 | * - don't check that active tag stats exceed some limits. | ||
98 | * - don't clean up socket tags on process exits. | ||
99 | * This is mostly usefull when a bug is suspected. | ||
100 | */ | ||
101 | static bool qtu_proc_handling_passive; | ||
102 | module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool, | ||
103 | S_IRUGO | S_IWUSR); | ||
104 | |||
105 | #define QTU_DEV_NAME "xt_qtaguid" | ||
106 | |||
107 | uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK; | ||
108 | module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR); | ||
109 | |||
110 | /*---------------------------------------------------------------------------*/ | ||
111 | static const char *iface_stat_procdirname = "iface_stat"; | ||
112 | static struct proc_dir_entry *iface_stat_procdir; | ||
113 | static const char *iface_stat_all_procfilename = "iface_stat_all"; | ||
114 | static struct proc_dir_entry *iface_stat_all_procfile; | ||
115 | |||
116 | /* | ||
117 | * Ordering of locks: | ||
118 | * outer locks: | ||
119 | * iface_stat_list_lock | ||
120 | * sock_tag_list_lock | ||
121 | * inner locks: | ||
122 | * uid_tag_data_tree_lock | ||
123 | * tag_counter_set_list_lock | ||
124 | * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock | ||
125 | * is acquired. | ||
126 | * | ||
127 | * Call tree with all lock holders as of 2011-09-25: | ||
128 | * | ||
129 | * iface_stat_all_proc_read() | ||
130 | * iface_stat_list_lock | ||
131 | * (struct iface_stat) | ||
132 | * | ||
133 | * qtaguid_ctrl_proc_read() | ||
134 | * sock_tag_list_lock | ||
135 | * (sock_tag_tree) | ||
136 | * (struct proc_qtu_data->sock_tag_list) | ||
137 | * prdebug_full_state() | ||
138 | * sock_tag_list_lock | ||
139 | * (sock_tag_tree) | ||
140 | * uid_tag_data_tree_lock | ||
141 | * (uid_tag_data_tree) | ||
142 | * (proc_qtu_data_tree) | ||
143 | * iface_stat_list_lock | ||
144 | * | ||
145 | * qtaguid_stats_proc_read() | ||
146 | * iface_stat_list_lock | ||
147 | * struct iface_stat->tag_stat_list_lock | ||
148 | * | ||
149 | * qtudev_open() | ||
150 | * uid_tag_data_tree_lock | ||
151 | * | ||
152 | * qtudev_release() | ||
153 | * sock_tag_data_list_lock | ||
154 | * uid_tag_data_tree_lock | ||
155 | * prdebug_full_state() | ||
156 | * sock_tag_list_lock | ||
157 | * uid_tag_data_tree_lock | ||
158 | * iface_stat_list_lock | ||
159 | * | ||
160 | * iface_netdev_event_handler() | ||
161 | * iface_stat_create() | ||
162 | * iface_stat_list_lock | ||
163 | * iface_stat_update() | ||
164 | * iface_stat_list_lock | ||
165 | * | ||
166 | * iface_inetaddr_event_handler() | ||
167 | * iface_stat_create() | ||
168 | * iface_stat_list_lock | ||
169 | * iface_stat_update() | ||
170 | * iface_stat_list_lock | ||
171 | * | ||
172 | * iface_inet6addr_event_handler() | ||
173 | * iface_stat_create_ipv6() | ||
174 | * iface_stat_list_lock | ||
175 | * iface_stat_update() | ||
176 | * iface_stat_list_lock | ||
177 | * | ||
178 | * qtaguid_mt() | ||
179 | * account_for_uid() | ||
180 | * if_tag_stat_update() | ||
181 | * get_sock_stat() | ||
182 | * sock_tag_list_lock | ||
183 | * struct iface_stat->tag_stat_list_lock | ||
184 | * tag_stat_update() | ||
185 | * get_active_counter_set() | ||
186 | * tag_counter_set_list_lock | ||
187 | * tag_stat_update() | ||
188 | * get_active_counter_set() | ||
189 | * tag_counter_set_list_lock | ||
190 | * | ||
191 | * | ||
192 | * qtaguid_ctrl_parse() | ||
193 | * ctrl_cmd_delete() | ||
194 | * sock_tag_list_lock | ||
195 | * tag_counter_set_list_lock | ||
196 | * iface_stat_list_lock | ||
197 | * struct iface_stat->tag_stat_list_lock | ||
198 | * uid_tag_data_tree_lock | ||
199 | * ctrl_cmd_counter_set() | ||
200 | * tag_counter_set_list_lock | ||
201 | * ctrl_cmd_tag() | ||
202 | * sock_tag_list_lock | ||
203 | * (sock_tag_tree) | ||
204 | * get_tag_ref() | ||
205 | * uid_tag_data_tree_lock | ||
206 | * (uid_tag_data_tree) | ||
207 | * uid_tag_data_tree_lock | ||
208 | * (proc_qtu_data_tree) | ||
209 | * ctrl_cmd_untag() | ||
210 | * sock_tag_list_lock | ||
211 | * uid_tag_data_tree_lock | ||
212 | * | ||
213 | */ | ||
214 | static LIST_HEAD(iface_stat_list); | ||
215 | static DEFINE_SPINLOCK(iface_stat_list_lock); | ||
216 | |||
217 | static struct rb_root sock_tag_tree = RB_ROOT; | ||
218 | static DEFINE_SPINLOCK(sock_tag_list_lock); | ||
219 | |||
220 | static struct rb_root tag_counter_set_tree = RB_ROOT; | ||
221 | static DEFINE_SPINLOCK(tag_counter_set_list_lock); | ||
222 | |||
223 | static struct rb_root uid_tag_data_tree = RB_ROOT; | ||
224 | static DEFINE_SPINLOCK(uid_tag_data_tree_lock); | ||
225 | |||
226 | static struct rb_root proc_qtu_data_tree = RB_ROOT; | ||
227 | /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */ | ||
228 | |||
229 | static struct qtaguid_event_counts qtu_events; | ||
230 | /*----------------------------------------------*/ | ||
231 | static bool can_manipulate_uids(void) | ||
232 | { | ||
233 | /* root pwnd */ | ||
234 | return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid) | ||
235 | || in_egroup_p(proc_ctrl_write_gid); | ||
236 | } | ||
237 | |||
238 | static bool can_impersonate_uid(uid_t uid) | ||
239 | { | ||
240 | return uid == current_fsuid() || can_manipulate_uids(); | ||
241 | } | ||
242 | |||
243 | static bool can_read_other_uid_stats(uid_t uid) | ||
244 | { | ||
245 | /* root pwnd */ | ||
246 | return unlikely(!current_fsuid()) || uid == current_fsuid() | ||
247 | || unlikely(!proc_stats_readall_gid) | ||
248 | || in_egroup_p(proc_stats_readall_gid); | ||
249 | } | ||
250 | |||
251 | static inline void dc_add_byte_packets(struct data_counters *counters, int set, | ||
252 | enum ifs_tx_rx direction, | ||
253 | enum ifs_proto ifs_proto, | ||
254 | int bytes, | ||
255 | int packets) | ||
256 | { | ||
257 | counters->bpc[set][direction][ifs_proto].bytes += bytes; | ||
258 | counters->bpc[set][direction][ifs_proto].packets += packets; | ||
259 | } | ||
260 | |||
261 | static inline uint64_t dc_sum_bytes(struct data_counters *counters, | ||
262 | int set, | ||
263 | enum ifs_tx_rx direction) | ||
264 | { | ||
265 | return counters->bpc[set][direction][IFS_TCP].bytes | ||
266 | + counters->bpc[set][direction][IFS_UDP].bytes | ||
267 | + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; | ||
268 | } | ||
269 | |||
270 | static inline uint64_t dc_sum_packets(struct data_counters *counters, | ||
271 | int set, | ||
272 | enum ifs_tx_rx direction) | ||
273 | { | ||
274 | return counters->bpc[set][direction][IFS_TCP].packets | ||
275 | + counters->bpc[set][direction][IFS_UDP].packets | ||
276 | + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; | ||
277 | } | ||
278 | |||
279 | static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag) | ||
280 | { | ||
281 | struct rb_node *node = root->rb_node; | ||
282 | |||
283 | while (node) { | ||
284 | struct tag_node *data = rb_entry(node, struct tag_node, node); | ||
285 | int result; | ||
286 | RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " | ||
287 | " node=%p data=%p\n", tag, node, data); | ||
288 | result = tag_compare(tag, data->tag); | ||
289 | RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " | ||
290 | " data.tag=0x%llx (uid=%u) res=%d\n", | ||
291 | tag, data->tag, get_uid_from_tag(data->tag), result); | ||
292 | if (result < 0) | ||
293 | node = node->rb_left; | ||
294 | else if (result > 0) | ||
295 | node = node->rb_right; | ||
296 | else | ||
297 | return data; | ||
298 | } | ||
299 | return NULL; | ||
300 | } | ||
301 | |||
302 | static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root) | ||
303 | { | ||
304 | struct rb_node **new = &(root->rb_node), *parent = NULL; | ||
305 | |||
306 | /* Figure out where to put new node */ | ||
307 | while (*new) { | ||
308 | struct tag_node *this = rb_entry(*new, struct tag_node, | ||
309 | node); | ||
310 | int result = tag_compare(data->tag, this->tag); | ||
311 | RB_DEBUG("qtaguid: %s(): tag=0x%llx" | ||
312 | " (uid=%u)\n", __func__, | ||
313 | this->tag, | ||
314 | get_uid_from_tag(this->tag)); | ||
315 | parent = *new; | ||
316 | if (result < 0) | ||
317 | new = &((*new)->rb_left); | ||
318 | else if (result > 0) | ||
319 | new = &((*new)->rb_right); | ||
320 | else | ||
321 | BUG(); | ||
322 | } | ||
323 | |||
324 | /* Add new node and rebalance tree. */ | ||
325 | rb_link_node(&data->node, parent, new); | ||
326 | rb_insert_color(&data->node, root); | ||
327 | } | ||
328 | |||
329 | static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root) | ||
330 | { | ||
331 | tag_node_tree_insert(&data->tn, root); | ||
332 | } | ||
333 | |||
334 | static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag) | ||
335 | { | ||
336 | struct tag_node *node = tag_node_tree_search(root, tag); | ||
337 | if (!node) | ||
338 | return NULL; | ||
339 | return rb_entry(&node->node, struct tag_stat, tn.node); | ||
340 | } | ||
341 | |||
342 | static void tag_counter_set_tree_insert(struct tag_counter_set *data, | ||
343 | struct rb_root *root) | ||
344 | { | ||
345 | tag_node_tree_insert(&data->tn, root); | ||
346 | } | ||
347 | |||
348 | static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root, | ||
349 | tag_t tag) | ||
350 | { | ||
351 | struct tag_node *node = tag_node_tree_search(root, tag); | ||
352 | if (!node) | ||
353 | return NULL; | ||
354 | return rb_entry(&node->node, struct tag_counter_set, tn.node); | ||
355 | |||
356 | } | ||
357 | |||
358 | static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root) | ||
359 | { | ||
360 | tag_node_tree_insert(&data->tn, root); | ||
361 | } | ||
362 | |||
363 | static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag) | ||
364 | { | ||
365 | struct tag_node *node = tag_node_tree_search(root, tag); | ||
366 | if (!node) | ||
367 | return NULL; | ||
368 | return rb_entry(&node->node, struct tag_ref, tn.node); | ||
369 | } | ||
370 | |||
371 | static struct sock_tag *sock_tag_tree_search(struct rb_root *root, | ||
372 | const struct sock *sk) | ||
373 | { | ||
374 | struct rb_node *node = root->rb_node; | ||
375 | |||
376 | while (node) { | ||
377 | struct sock_tag *data = rb_entry(node, struct sock_tag, | ||
378 | sock_node); | ||
379 | if (sk < data->sk) | ||
380 | node = node->rb_left; | ||
381 | else if (sk > data->sk) | ||
382 | node = node->rb_right; | ||
383 | else | ||
384 | return data; | ||
385 | } | ||
386 | return NULL; | ||
387 | } | ||
388 | |||
389 | static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root) | ||
390 | { | ||
391 | struct rb_node **new = &(root->rb_node), *parent = NULL; | ||
392 | |||
393 | /* Figure out where to put new node */ | ||
394 | while (*new) { | ||
395 | struct sock_tag *this = rb_entry(*new, struct sock_tag, | ||
396 | sock_node); | ||
397 | parent = *new; | ||
398 | if (data->sk < this->sk) | ||
399 | new = &((*new)->rb_left); | ||
400 | else if (data->sk > this->sk) | ||
401 | new = &((*new)->rb_right); | ||
402 | else | ||
403 | BUG(); | ||
404 | } | ||
405 | |||
406 | /* Add new node and rebalance tree. */ | ||
407 | rb_link_node(&data->sock_node, parent, new); | ||
408 | rb_insert_color(&data->sock_node, root); | ||
409 | } | ||
410 | |||
411 | static void sock_tag_tree_erase(struct rb_root *st_to_free_tree) | ||
412 | { | ||
413 | struct rb_node *node; | ||
414 | struct sock_tag *st_entry; | ||
415 | |||
416 | node = rb_first(st_to_free_tree); | ||
417 | while (node) { | ||
418 | st_entry = rb_entry(node, struct sock_tag, sock_node); | ||
419 | node = rb_next(node); | ||
420 | CT_DEBUG("qtaguid: %s(): " | ||
421 | "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__, | ||
422 | st_entry->sk, | ||
423 | st_entry->tag, | ||
424 | get_uid_from_tag(st_entry->tag)); | ||
425 | rb_erase(&st_entry->sock_node, st_to_free_tree); | ||
426 | sockfd_put(st_entry->socket); | ||
427 | kfree(st_entry); | ||
428 | } | ||
429 | } | ||
430 | |||
431 | static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root, | ||
432 | const pid_t pid) | ||
433 | { | ||
434 | struct rb_node *node = root->rb_node; | ||
435 | |||
436 | while (node) { | ||
437 | struct proc_qtu_data *data = rb_entry(node, | ||
438 | struct proc_qtu_data, | ||
439 | node); | ||
440 | if (pid < data->pid) | ||
441 | node = node->rb_left; | ||
442 | else if (pid > data->pid) | ||
443 | node = node->rb_right; | ||
444 | else | ||
445 | return data; | ||
446 | } | ||
447 | return NULL; | ||
448 | } | ||
449 | |||
450 | static void proc_qtu_data_tree_insert(struct proc_qtu_data *data, | ||
451 | struct rb_root *root) | ||
452 | { | ||
453 | struct rb_node **new = &(root->rb_node), *parent = NULL; | ||
454 | |||
455 | /* Figure out where to put new node */ | ||
456 | while (*new) { | ||
457 | struct proc_qtu_data *this = rb_entry(*new, | ||
458 | struct proc_qtu_data, | ||
459 | node); | ||
460 | parent = *new; | ||
461 | if (data->pid < this->pid) | ||
462 | new = &((*new)->rb_left); | ||
463 | else if (data->pid > this->pid) | ||
464 | new = &((*new)->rb_right); | ||
465 | else | ||
466 | BUG(); | ||
467 | } | ||
468 | |||
469 | /* Add new node and rebalance tree. */ | ||
470 | rb_link_node(&data->node, parent, new); | ||
471 | rb_insert_color(&data->node, root); | ||
472 | } | ||
473 | |||
474 | static void uid_tag_data_tree_insert(struct uid_tag_data *data, | ||
475 | struct rb_root *root) | ||
476 | { | ||
477 | struct rb_node **new = &(root->rb_node), *parent = NULL; | ||
478 | |||
479 | /* Figure out where to put new node */ | ||
480 | while (*new) { | ||
481 | struct uid_tag_data *this = rb_entry(*new, | ||
482 | struct uid_tag_data, | ||
483 | node); | ||
484 | parent = *new; | ||
485 | if (data->uid < this->uid) | ||
486 | new = &((*new)->rb_left); | ||
487 | else if (data->uid > this->uid) | ||
488 | new = &((*new)->rb_right); | ||
489 | else | ||
490 | BUG(); | ||
491 | } | ||
492 | |||
493 | /* Add new node and rebalance tree. */ | ||
494 | rb_link_node(&data->node, parent, new); | ||
495 | rb_insert_color(&data->node, root); | ||
496 | } | ||
497 | |||
498 | static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root, | ||
499 | uid_t uid) | ||
500 | { | ||
501 | struct rb_node *node = root->rb_node; | ||
502 | |||
503 | while (node) { | ||
504 | struct uid_tag_data *data = rb_entry(node, | ||
505 | struct uid_tag_data, | ||
506 | node); | ||
507 | if (uid < data->uid) | ||
508 | node = node->rb_left; | ||
509 | else if (uid > data->uid) | ||
510 | node = node->rb_right; | ||
511 | else | ||
512 | return data; | ||
513 | } | ||
514 | return NULL; | ||
515 | } | ||
516 | |||
517 | /* | ||
518 | * Allocates a new uid_tag_data struct if needed. | ||
519 | * Returns a pointer to the found or allocated uid_tag_data. | ||
520 | * Returns a PTR_ERR on failures, and lock is not held. | ||
521 | * If found is not NULL: | ||
522 | * sets *found to true if not allocated. | ||
523 | * sets *found to false if allocated. | ||
524 | */ | ||
525 | struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res) | ||
526 | { | ||
527 | struct uid_tag_data *utd_entry; | ||
528 | |||
529 | /* Look for top level uid_tag_data for the UID */ | ||
530 | utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid); | ||
531 | DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry); | ||
532 | |||
533 | if (found_res) | ||
534 | *found_res = utd_entry; | ||
535 | if (utd_entry) | ||
536 | return utd_entry; | ||
537 | |||
538 | utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC); | ||
539 | if (!utd_entry) { | ||
540 | pr_err("qtaguid: get_uid_data(%u): " | ||
541 | "tag data alloc failed\n", uid); | ||
542 | return ERR_PTR(-ENOMEM); | ||
543 | } | ||
544 | |||
545 | utd_entry->uid = uid; | ||
546 | utd_entry->tag_ref_tree = RB_ROOT; | ||
547 | uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree); | ||
548 | DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry); | ||
549 | return utd_entry; | ||
550 | } | ||
551 | |||
552 | /* Never returns NULL. Either PTR_ERR or a valid ptr. */ | ||
553 | static struct tag_ref *new_tag_ref(tag_t new_tag, | ||
554 | struct uid_tag_data *utd_entry) | ||
555 | { | ||
556 | struct tag_ref *tr_entry; | ||
557 | int res; | ||
558 | |||
559 | if (utd_entry->num_active_tags + 1 > max_sock_tags) { | ||
560 | pr_info("qtaguid: new_tag_ref(0x%llx): " | ||
561 | "tag ref alloc quota exceeded. max=%d\n", | ||
562 | new_tag, max_sock_tags); | ||
563 | res = -EMFILE; | ||
564 | goto err_res; | ||
565 | |||
566 | } | ||
567 | |||
568 | tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC); | ||
569 | if (!tr_entry) { | ||
570 | pr_err("qtaguid: new_tag_ref(0x%llx): " | ||
571 | "tag ref alloc failed\n", | ||
572 | new_tag); | ||
573 | res = -ENOMEM; | ||
574 | goto err_res; | ||
575 | } | ||
576 | tr_entry->tn.tag = new_tag; | ||
577 | /* tr_entry->num_sock_tags handled by caller */ | ||
578 | utd_entry->num_active_tags++; | ||
579 | tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree); | ||
580 | DR_DEBUG("qtaguid: new_tag_ref(0x%llx): " | ||
581 | " inserted new tag ref %p\n", | ||
582 | new_tag, tr_entry); | ||
583 | return tr_entry; | ||
584 | |||
585 | err_res: | ||
586 | return ERR_PTR(res); | ||
587 | } | ||
588 | |||
589 | static struct tag_ref *lookup_tag_ref(tag_t full_tag, | ||
590 | struct uid_tag_data **utd_res) | ||
591 | { | ||
592 | struct uid_tag_data *utd_entry; | ||
593 | struct tag_ref *tr_entry; | ||
594 | bool found_utd; | ||
595 | uid_t uid = get_uid_from_tag(full_tag); | ||
596 | |||
597 | DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n", | ||
598 | full_tag, uid); | ||
599 | |||
600 | utd_entry = get_uid_data(uid, &found_utd); | ||
601 | if (IS_ERR_OR_NULL(utd_entry)) { | ||
602 | if (utd_res) | ||
603 | *utd_res = utd_entry; | ||
604 | return NULL; | ||
605 | } | ||
606 | |||
607 | tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag); | ||
608 | if (utd_res) | ||
609 | *utd_res = utd_entry; | ||
610 | DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n", | ||
611 | full_tag, utd_entry, tr_entry); | ||
612 | return tr_entry; | ||
613 | } | ||
614 | |||
615 | /* Never returns NULL. Either PTR_ERR or a valid ptr. */ | ||
616 | static struct tag_ref *get_tag_ref(tag_t full_tag, | ||
617 | struct uid_tag_data **utd_res) | ||
618 | { | ||
619 | struct uid_tag_data *utd_entry; | ||
620 | struct tag_ref *tr_entry; | ||
621 | |||
622 | DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n", | ||
623 | full_tag); | ||
624 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
625 | tr_entry = lookup_tag_ref(full_tag, &utd_entry); | ||
626 | BUG_ON(IS_ERR_OR_NULL(utd_entry)); | ||
627 | if (!tr_entry) | ||
628 | tr_entry = new_tag_ref(full_tag, utd_entry); | ||
629 | |||
630 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
631 | if (utd_res) | ||
632 | *utd_res = utd_entry; | ||
633 | DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n", | ||
634 | full_tag, utd_entry, tr_entry); | ||
635 | return tr_entry; | ||
636 | } | ||
637 | |||
638 | /* Checks and maybe frees the UID Tag Data entry */ | ||
639 | static void put_utd_entry(struct uid_tag_data *utd_entry) | ||
640 | { | ||
641 | /* Are we done with the UID tag data entry? */ | ||
642 | if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) && | ||
643 | !utd_entry->num_pqd) { | ||
644 | DR_DEBUG("qtaguid: %s(): " | ||
645 | "erase utd_entry=%p uid=%u " | ||
646 | "by pid=%u tgid=%u uid=%u\n", __func__, | ||
647 | utd_entry, utd_entry->uid, | ||
648 | current->pid, current->tgid, current_fsuid()); | ||
649 | BUG_ON(utd_entry->num_active_tags); | ||
650 | rb_erase(&utd_entry->node, &uid_tag_data_tree); | ||
651 | kfree(utd_entry); | ||
652 | } else { | ||
653 | DR_DEBUG("qtaguid: %s(): " | ||
654 | "utd_entry=%p still has %d tags %d proc_qtu_data\n", | ||
655 | __func__, utd_entry, utd_entry->num_active_tags, | ||
656 | utd_entry->num_pqd); | ||
657 | BUG_ON(!(utd_entry->num_active_tags || | ||
658 | utd_entry->num_pqd)); | ||
659 | } | ||
660 | } | ||
661 | |||
662 | /* | ||
663 | * If no sock_tags are using this tag_ref, | ||
664 | * decrements refcount of utd_entry, removes tr_entry | ||
665 | * from utd_entry->tag_ref_tree and frees. | ||
666 | */ | ||
667 | static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry, | ||
668 | struct uid_tag_data *utd_entry) | ||
669 | { | ||
670 | DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__, | ||
671 | tr_entry, tr_entry->tn.tag, | ||
672 | get_uid_from_tag(tr_entry->tn.tag)); | ||
673 | if (!tr_entry->num_sock_tags) { | ||
674 | BUG_ON(!utd_entry->num_active_tags); | ||
675 | utd_entry->num_active_tags--; | ||
676 | rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree); | ||
677 | DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry); | ||
678 | kfree(tr_entry); | ||
679 | } | ||
680 | } | ||
681 | |||
682 | static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry) | ||
683 | { | ||
684 | struct rb_node *node; | ||
685 | struct tag_ref *tr_entry; | ||
686 | tag_t acct_tag; | ||
687 | |||
688 | DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__, | ||
689 | full_tag, get_uid_from_tag(full_tag)); | ||
690 | acct_tag = get_atag_from_tag(full_tag); | ||
691 | node = rb_first(&utd_entry->tag_ref_tree); | ||
692 | while (node) { | ||
693 | tr_entry = rb_entry(node, struct tag_ref, tn.node); | ||
694 | node = rb_next(node); | ||
695 | if (!acct_tag || tr_entry->tn.tag == full_tag) | ||
696 | free_tag_ref_from_utd_entry(tr_entry, utd_entry); | ||
697 | } | ||
698 | } | ||
699 | |||
700 | static int read_proc_u64(char *page, char **start, off_t off, | ||
701 | int count, int *eof, void *data) | ||
702 | { | ||
703 | int len; | ||
704 | uint64_t value; | ||
705 | char *p = page; | ||
706 | uint64_t *iface_entry = data; | ||
707 | |||
708 | if (!data) | ||
709 | return 0; | ||
710 | |||
711 | value = *iface_entry; | ||
712 | p += sprintf(p, "%llu\n", value); | ||
713 | len = (p - page) - off; | ||
714 | *eof = (len <= count) ? 1 : 0; | ||
715 | *start = page + off; | ||
716 | return len; | ||
717 | } | ||
718 | |||
719 | static int read_proc_bool(char *page, char **start, off_t off, | ||
720 | int count, int *eof, void *data) | ||
721 | { | ||
722 | int len; | ||
723 | bool value; | ||
724 | char *p = page; | ||
725 | bool *bool_entry = data; | ||
726 | |||
727 | if (!data) | ||
728 | return 0; | ||
729 | |||
730 | value = *bool_entry; | ||
731 | p += sprintf(p, "%u\n", value); | ||
732 | len = (p - page) - off; | ||
733 | *eof = (len <= count) ? 1 : 0; | ||
734 | *start = page + off; | ||
735 | return len; | ||
736 | } | ||
737 | |||
738 | static int get_active_counter_set(tag_t tag) | ||
739 | { | ||
740 | int active_set = 0; | ||
741 | struct tag_counter_set *tcs; | ||
742 | |||
743 | MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)" | ||
744 | " (uid=%u)\n", | ||
745 | tag, get_uid_from_tag(tag)); | ||
746 | /* For now we only handle UID tags for active sets */ | ||
747 | tag = get_utag_from_tag(tag); | ||
748 | spin_lock_bh(&tag_counter_set_list_lock); | ||
749 | tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); | ||
750 | if (tcs) | ||
751 | active_set = tcs->active_set; | ||
752 | spin_unlock_bh(&tag_counter_set_list_lock); | ||
753 | return active_set; | ||
754 | } | ||
755 | |||
756 | /* | ||
757 | * Find the entry for tracking the specified interface. | ||
758 | * Caller must hold iface_stat_list_lock | ||
759 | */ | ||
760 | static struct iface_stat *get_iface_entry(const char *ifname) | ||
761 | { | ||
762 | struct iface_stat *iface_entry; | ||
763 | |||
764 | /* Find the entry for tracking the specified tag within the interface */ | ||
765 | if (ifname == NULL) { | ||
766 | pr_info("qtaguid: iface_stat: get() NULL device name\n"); | ||
767 | return NULL; | ||
768 | } | ||
769 | |||
770 | /* Iterate over interfaces */ | ||
771 | list_for_each_entry(iface_entry, &iface_stat_list, list) { | ||
772 | if (!strcmp(ifname, iface_entry->ifname)) | ||
773 | goto done; | ||
774 | } | ||
775 | iface_entry = NULL; | ||
776 | done: | ||
777 | return iface_entry; | ||
778 | } | ||
779 | |||
780 | static int iface_stat_all_proc_read(char *page, char **num_items_returned, | ||
781 | off_t items_to_skip, int char_count, | ||
782 | int *eof, void *data) | ||
783 | { | ||
784 | char *outp = page; | ||
785 | int item_index = 0; | ||
786 | int len; | ||
787 | struct iface_stat *iface_entry; | ||
788 | struct rtnl_link_stats64 dev_stats, *stats; | ||
789 | struct rtnl_link_stats64 no_dev_stats = {0}; | ||
790 | |||
791 | if (unlikely(module_passive)) { | ||
792 | *eof = 1; | ||
793 | return 0; | ||
794 | } | ||
795 | |||
796 | CT_DEBUG("qtaguid:proc iface_stat_all " | ||
797 | "page=%p *num_items_returned=%p off=%ld " | ||
798 | "char_count=%d *eof=%d\n", page, *num_items_returned, | ||
799 | items_to_skip, char_count, *eof); | ||
800 | |||
801 | if (*eof) | ||
802 | return 0; | ||
803 | |||
804 | /* | ||
805 | * This lock will prevent iface_stat_update() from changing active, | ||
806 | * and in turn prevent an interface from unregistering itself. | ||
807 | */ | ||
808 | spin_lock_bh(&iface_stat_list_lock); | ||
809 | list_for_each_entry(iface_entry, &iface_stat_list, list) { | ||
810 | if (item_index++ < items_to_skip) | ||
811 | continue; | ||
812 | |||
813 | if (iface_entry->active) { | ||
814 | stats = dev_get_stats(iface_entry->net_dev, | ||
815 | &dev_stats); | ||
816 | } else { | ||
817 | stats = &no_dev_stats; | ||
818 | } | ||
819 | len = snprintf(outp, char_count, | ||
820 | "%s %d " | ||
821 | "%llu %llu %llu %llu " | ||
822 | "%llu %llu %llu %llu\n", | ||
823 | iface_entry->ifname, | ||
824 | iface_entry->active, | ||
825 | iface_entry->totals[IFS_RX].bytes, | ||
826 | iface_entry->totals[IFS_RX].packets, | ||
827 | iface_entry->totals[IFS_TX].bytes, | ||
828 | iface_entry->totals[IFS_TX].packets, | ||
829 | stats->rx_bytes, stats->rx_packets, | ||
830 | stats->tx_bytes, stats->tx_packets); | ||
831 | if (len >= char_count) { | ||
832 | spin_unlock_bh(&iface_stat_list_lock); | ||
833 | *outp = '\0'; | ||
834 | return outp - page; | ||
835 | } | ||
836 | outp += len; | ||
837 | char_count -= len; | ||
838 | (*num_items_returned)++; | ||
839 | } | ||
840 | spin_unlock_bh(&iface_stat_list_lock); | ||
841 | |||
842 | *eof = 1; | ||
843 | return outp - page; | ||
844 | } | ||
845 | |||
846 | static void iface_create_proc_worker(struct work_struct *work) | ||
847 | { | ||
848 | struct proc_dir_entry *proc_entry; | ||
849 | struct iface_stat_work *isw = container_of(work, struct iface_stat_work, | ||
850 | iface_work); | ||
851 | struct iface_stat *new_iface = isw->iface_entry; | ||
852 | |||
853 | /* iface_entries are not deleted, so safe to manipulate. */ | ||
854 | proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir); | ||
855 | if (IS_ERR_OR_NULL(proc_entry)) { | ||
856 | pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n"); | ||
857 | kfree(isw); | ||
858 | return; | ||
859 | } | ||
860 | |||
861 | new_iface->proc_ptr = proc_entry; | ||
862 | |||
863 | create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry, | ||
864 | read_proc_u64, &new_iface->totals[IFS_TX].bytes); | ||
865 | create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry, | ||
866 | read_proc_u64, &new_iface->totals[IFS_RX].bytes); | ||
867 | create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry, | ||
868 | read_proc_u64, &new_iface->totals[IFS_TX].packets); | ||
869 | create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry, | ||
870 | read_proc_u64, &new_iface->totals[IFS_RX].packets); | ||
871 | create_proc_read_entry("active", proc_iface_perms, proc_entry, | ||
872 | read_proc_bool, &new_iface->active); | ||
873 | |||
874 | IF_DEBUG("qtaguid: iface_stat: create_proc(): done " | ||
875 | "entry=%p dev=%s\n", new_iface, new_iface->ifname); | ||
876 | kfree(isw); | ||
877 | } | ||
878 | |||
879 | /* | ||
880 | * Will set the entry's active state, and | ||
881 | * update the net_dev accordingly also. | ||
882 | */ | ||
883 | static void _iface_stat_set_active(struct iface_stat *entry, | ||
884 | struct net_device *net_dev, | ||
885 | bool activate) | ||
886 | { | ||
887 | if (activate) { | ||
888 | entry->net_dev = net_dev; | ||
889 | entry->active = true; | ||
890 | IF_DEBUG("qtaguid: %s(%s): " | ||
891 | "enable tracking. rfcnt=%d\n", __func__, | ||
892 | entry->ifname, | ||
893 | percpu_read(*net_dev->pcpu_refcnt)); | ||
894 | } else { | ||
895 | entry->active = false; | ||
896 | entry->net_dev = NULL; | ||
897 | IF_DEBUG("qtaguid: %s(%s): " | ||
898 | "disable tracking. rfcnt=%d\n", __func__, | ||
899 | entry->ifname, | ||
900 | percpu_read(*net_dev->pcpu_refcnt)); | ||
901 | |||
902 | } | ||
903 | } | ||
904 | |||
905 | /* Caller must hold iface_stat_list_lock */ | ||
906 | static struct iface_stat *iface_alloc(struct net_device *net_dev) | ||
907 | { | ||
908 | struct iface_stat *new_iface; | ||
909 | struct iface_stat_work *isw; | ||
910 | |||
911 | new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC); | ||
912 | if (new_iface == NULL) { | ||
913 | pr_err("qtaguid: iface_stat: create(%s): " | ||
914 | "iface_stat alloc failed\n", net_dev->name); | ||
915 | return NULL; | ||
916 | } | ||
917 | new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC); | ||
918 | if (new_iface->ifname == NULL) { | ||
919 | pr_err("qtaguid: iface_stat: create(%s): " | ||
920 | "ifname alloc failed\n", net_dev->name); | ||
921 | kfree(new_iface); | ||
922 | return NULL; | ||
923 | } | ||
924 | spin_lock_init(&new_iface->tag_stat_list_lock); | ||
925 | new_iface->tag_stat_tree = RB_ROOT; | ||
926 | _iface_stat_set_active(new_iface, net_dev, true); | ||
927 | |||
928 | /* | ||
929 | * ipv6 notifier chains are atomic :( | ||
930 | * No create_proc_read_entry() for you! | ||
931 | */ | ||
932 | isw = kmalloc(sizeof(*isw), GFP_ATOMIC); | ||
933 | if (!isw) { | ||
934 | pr_err("qtaguid: iface_stat: create(%s): " | ||
935 | "work alloc failed\n", new_iface->ifname); | ||
936 | _iface_stat_set_active(new_iface, net_dev, false); | ||
937 | kfree(new_iface->ifname); | ||
938 | kfree(new_iface); | ||
939 | return NULL; | ||
940 | } | ||
941 | isw->iface_entry = new_iface; | ||
942 | INIT_WORK(&isw->iface_work, iface_create_proc_worker); | ||
943 | schedule_work(&isw->iface_work); | ||
944 | list_add(&new_iface->list, &iface_stat_list); | ||
945 | return new_iface; | ||
946 | } | ||
947 | |||
948 | static void iface_check_stats_reset_and_adjust(struct net_device *net_dev, | ||
949 | struct iface_stat *iface) | ||
950 | { | ||
951 | struct rtnl_link_stats64 dev_stats, *stats; | ||
952 | bool stats_rewound; | ||
953 | |||
954 | stats = dev_get_stats(net_dev, &dev_stats); | ||
955 | /* No empty packets */ | ||
956 | stats_rewound = | ||
957 | (stats->rx_bytes < iface->last_known[IFS_RX].bytes) | ||
958 | || (stats->tx_bytes < iface->last_known[IFS_TX].bytes); | ||
959 | |||
960 | IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p " | ||
961 | "bytes rx/tx=%llu/%llu " | ||
962 | "active=%d last_known=%d " | ||
963 | "stats_rewound=%d\n", __func__, | ||
964 | net_dev ? net_dev->name : "?", | ||
965 | iface, net_dev, | ||
966 | stats->rx_bytes, stats->tx_bytes, | ||
967 | iface->active, iface->last_known_valid, stats_rewound); | ||
968 | |||
969 | if (iface->active && iface->last_known_valid && stats_rewound) { | ||
970 | pr_warn_once("qtaguid: iface_stat: %s(%s): " | ||
971 | "iface reset its stats unexpectedly\n", __func__, | ||
972 | net_dev->name); | ||
973 | |||
974 | iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes; | ||
975 | iface->totals[IFS_TX].packets += | ||
976 | iface->last_known[IFS_TX].packets; | ||
977 | iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes; | ||
978 | iface->totals[IFS_RX].packets += | ||
979 | iface->last_known[IFS_RX].packets; | ||
980 | iface->last_known_valid = false; | ||
981 | IF_DEBUG("qtaguid: %s(%s): iface=%p " | ||
982 | "used last known bytes rx/tx=%llu/%llu\n", __func__, | ||
983 | iface->ifname, iface, iface->last_known[IFS_RX].bytes, | ||
984 | iface->last_known[IFS_TX].bytes); | ||
985 | } | ||
986 | } | ||
987 | |||
988 | /* | ||
989 | * Create a new entry for tracking the specified interface. | ||
990 | * Do nothing if the entry already exists. | ||
991 | * Called when an interface is configured with a valid IP address. | ||
992 | */ | ||
993 | static void iface_stat_create(struct net_device *net_dev, | ||
994 | struct in_ifaddr *ifa) | ||
995 | { | ||
996 | struct in_device *in_dev = NULL; | ||
997 | const char *ifname; | ||
998 | struct iface_stat *entry; | ||
999 | __be32 ipaddr = 0; | ||
1000 | struct iface_stat *new_iface; | ||
1001 | |||
1002 | IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n", | ||
1003 | net_dev ? net_dev->name : "?", | ||
1004 | ifa, net_dev); | ||
1005 | if (!net_dev) { | ||
1006 | pr_err("qtaguid: iface_stat: create(): no net dev\n"); | ||
1007 | return; | ||
1008 | } | ||
1009 | |||
1010 | ifname = net_dev->name; | ||
1011 | if (!ifa) { | ||
1012 | in_dev = in_dev_get(net_dev); | ||
1013 | if (!in_dev) { | ||
1014 | pr_err("qtaguid: iface_stat: create(%s): no inet dev\n", | ||
1015 | ifname); | ||
1016 | return; | ||
1017 | } | ||
1018 | IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n", | ||
1019 | ifname, in_dev); | ||
1020 | for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { | ||
1021 | IF_DEBUG("qtaguid: iface_stat: create(%s): " | ||
1022 | "ifa=%p ifa_label=%s\n", | ||
1023 | ifname, ifa, | ||
1024 | ifa->ifa_label ? ifa->ifa_label : "(null)"); | ||
1025 | if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label)) | ||
1026 | break; | ||
1027 | } | ||
1028 | } | ||
1029 | |||
1030 | if (!ifa) { | ||
1031 | IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n", | ||
1032 | ifname); | ||
1033 | goto done_put; | ||
1034 | } | ||
1035 | ipaddr = ifa->ifa_local; | ||
1036 | |||
1037 | spin_lock_bh(&iface_stat_list_lock); | ||
1038 | entry = get_iface_entry(ifname); | ||
1039 | if (entry != NULL) { | ||
1040 | bool activate = !ipv4_is_loopback(ipaddr); | ||
1041 | IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n", | ||
1042 | ifname, entry); | ||
1043 | iface_check_stats_reset_and_adjust(net_dev, entry); | ||
1044 | _iface_stat_set_active(entry, net_dev, activate); | ||
1045 | IF_DEBUG("qtaguid: %s(%s): " | ||
1046 | "tracking now %d on ip=%pI4\n", __func__, | ||
1047 | entry->ifname, activate, &ipaddr); | ||
1048 | goto done_unlock_put; | ||
1049 | } else if (ipv4_is_loopback(ipaddr)) { | ||
1050 | IF_DEBUG("qtaguid: iface_stat: create(%s): " | ||
1051 | "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr); | ||
1052 | goto done_unlock_put; | ||
1053 | } | ||
1054 | |||
1055 | new_iface = iface_alloc(net_dev); | ||
1056 | IF_DEBUG("qtaguid: iface_stat: create(%s): done " | ||
1057 | "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr); | ||
1058 | done_unlock_put: | ||
1059 | spin_unlock_bh(&iface_stat_list_lock); | ||
1060 | done_put: | ||
1061 | if (in_dev) | ||
1062 | in_dev_put(in_dev); | ||
1063 | } | ||
1064 | |||
1065 | static void iface_stat_create_ipv6(struct net_device *net_dev, | ||
1066 | struct inet6_ifaddr *ifa) | ||
1067 | { | ||
1068 | struct in_device *in_dev; | ||
1069 | const char *ifname; | ||
1070 | struct iface_stat *entry; | ||
1071 | struct iface_stat *new_iface; | ||
1072 | int addr_type; | ||
1073 | |||
1074 | IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n", | ||
1075 | ifa, net_dev, net_dev ? net_dev->name : ""); | ||
1076 | if (!net_dev) { | ||
1077 | pr_err("qtaguid: iface_stat: create6(): no net dev!\n"); | ||
1078 | return; | ||
1079 | } | ||
1080 | ifname = net_dev->name; | ||
1081 | |||
1082 | in_dev = in_dev_get(net_dev); | ||
1083 | if (!in_dev) { | ||
1084 | pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n", | ||
1085 | ifname); | ||
1086 | return; | ||
1087 | } | ||
1088 | |||
1089 | IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n", | ||
1090 | ifname, in_dev); | ||
1091 | |||
1092 | if (!ifa) { | ||
1093 | IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n", | ||
1094 | ifname); | ||
1095 | goto done_put; | ||
1096 | } | ||
1097 | addr_type = ipv6_addr_type(&ifa->addr); | ||
1098 | |||
1099 | spin_lock_bh(&iface_stat_list_lock); | ||
1100 | entry = get_iface_entry(ifname); | ||
1101 | if (entry != NULL) { | ||
1102 | bool activate = !(addr_type & IPV6_ADDR_LOOPBACK); | ||
1103 | IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, | ||
1104 | ifname, entry); | ||
1105 | iface_check_stats_reset_and_adjust(net_dev, entry); | ||
1106 | _iface_stat_set_active(entry, net_dev, activate); | ||
1107 | IF_DEBUG("qtaguid: %s(%s): " | ||
1108 | "tracking now %d on ip=%pI6c\n", __func__, | ||
1109 | entry->ifname, activate, &ifa->addr); | ||
1110 | goto done_unlock_put; | ||
1111 | } else if (addr_type & IPV6_ADDR_LOOPBACK) { | ||
1112 | IF_DEBUG("qtaguid: %s(%s): " | ||
1113 | "ignore loopback dev. ip=%pI6c\n", __func__, | ||
1114 | ifname, &ifa->addr); | ||
1115 | goto done_unlock_put; | ||
1116 | } | ||
1117 | |||
1118 | new_iface = iface_alloc(net_dev); | ||
1119 | IF_DEBUG("qtaguid: iface_stat: create6(%s): done " | ||
1120 | "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr); | ||
1121 | |||
1122 | done_unlock_put: | ||
1123 | spin_unlock_bh(&iface_stat_list_lock); | ||
1124 | done_put: | ||
1125 | in_dev_put(in_dev); | ||
1126 | } | ||
1127 | |||
1128 | static struct sock_tag *get_sock_stat_nl(const struct sock *sk) | ||
1129 | { | ||
1130 | MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk); | ||
1131 | return sock_tag_tree_search(&sock_tag_tree, sk); | ||
1132 | } | ||
1133 | |||
1134 | static struct sock_tag *get_sock_stat(const struct sock *sk) | ||
1135 | { | ||
1136 | struct sock_tag *sock_tag_entry; | ||
1137 | MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk); | ||
1138 | if (!sk) | ||
1139 | return NULL; | ||
1140 | spin_lock_bh(&sock_tag_list_lock); | ||
1141 | sock_tag_entry = get_sock_stat_nl(sk); | ||
1142 | spin_unlock_bh(&sock_tag_list_lock); | ||
1143 | return sock_tag_entry; | ||
1144 | } | ||
1145 | |||
1146 | static void | ||
1147 | data_counters_update(struct data_counters *dc, int set, | ||
1148 | enum ifs_tx_rx direction, int proto, int bytes) | ||
1149 | { | ||
1150 | switch (proto) { | ||
1151 | case IPPROTO_TCP: | ||
1152 | dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1); | ||
1153 | break; | ||
1154 | case IPPROTO_UDP: | ||
1155 | dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1); | ||
1156 | break; | ||
1157 | case IPPROTO_IP: | ||
1158 | default: | ||
1159 | dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes, | ||
1160 | 1); | ||
1161 | break; | ||
1162 | } | ||
1163 | } | ||
1164 | |||
1165 | /* | ||
1166 | * Update stats for the specified interface. Do nothing if the entry | ||
1167 | * does not exist (when a device was never configured with an IP address). | ||
1168 | * Called when an device is being unregistered. | ||
1169 | */ | ||
1170 | static void iface_stat_update(struct net_device *net_dev, bool stash_only) | ||
1171 | { | ||
1172 | struct rtnl_link_stats64 dev_stats, *stats; | ||
1173 | struct iface_stat *entry; | ||
1174 | |||
1175 | stats = dev_get_stats(net_dev, &dev_stats); | ||
1176 | spin_lock_bh(&iface_stat_list_lock); | ||
1177 | entry = get_iface_entry(net_dev->name); | ||
1178 | if (entry == NULL) { | ||
1179 | IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n", | ||
1180 | net_dev->name); | ||
1181 | spin_unlock_bh(&iface_stat_list_lock); | ||
1182 | return; | ||
1183 | } | ||
1184 | |||
1185 | IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, | ||
1186 | net_dev->name, entry); | ||
1187 | if (!entry->active) { | ||
1188 | IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__, | ||
1189 | net_dev->name); | ||
1190 | spin_unlock_bh(&iface_stat_list_lock); | ||
1191 | return; | ||
1192 | } | ||
1193 | |||
1194 | if (stash_only) { | ||
1195 | entry->last_known[IFS_TX].bytes = stats->tx_bytes; | ||
1196 | entry->last_known[IFS_TX].packets = stats->tx_packets; | ||
1197 | entry->last_known[IFS_RX].bytes = stats->rx_bytes; | ||
1198 | entry->last_known[IFS_RX].packets = stats->rx_packets; | ||
1199 | entry->last_known_valid = true; | ||
1200 | IF_DEBUG("qtaguid: %s(%s): " | ||
1201 | "dev stats stashed rx/tx=%llu/%llu\n", __func__, | ||
1202 | net_dev->name, stats->rx_bytes, stats->tx_bytes); | ||
1203 | spin_unlock_bh(&iface_stat_list_lock); | ||
1204 | return; | ||
1205 | } | ||
1206 | entry->totals[IFS_TX].bytes += stats->tx_bytes; | ||
1207 | entry->totals[IFS_TX].packets += stats->tx_packets; | ||
1208 | entry->totals[IFS_RX].bytes += stats->rx_bytes; | ||
1209 | entry->totals[IFS_RX].packets += stats->rx_packets; | ||
1210 | /* We don't need the last_known[] anymore */ | ||
1211 | entry->last_known_valid = false; | ||
1212 | _iface_stat_set_active(entry, net_dev, false); | ||
1213 | IF_DEBUG("qtaguid: %s(%s): " | ||
1214 | "disable tracking. rx/tx=%llu/%llu\n", __func__, | ||
1215 | net_dev->name, stats->rx_bytes, stats->tx_bytes); | ||
1216 | spin_unlock_bh(&iface_stat_list_lock); | ||
1217 | } | ||
1218 | |||
1219 | static void tag_stat_update(struct tag_stat *tag_entry, | ||
1220 | enum ifs_tx_rx direction, int proto, int bytes) | ||
1221 | { | ||
1222 | int active_set; | ||
1223 | active_set = get_active_counter_set(tag_entry->tn.tag); | ||
1224 | MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d " | ||
1225 | "dir=%d proto=%d bytes=%d)\n", | ||
1226 | tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag), | ||
1227 | active_set, direction, proto, bytes); | ||
1228 | data_counters_update(&tag_entry->counters, active_set, direction, | ||
1229 | proto, bytes); | ||
1230 | if (tag_entry->parent_counters) | ||
1231 | data_counters_update(tag_entry->parent_counters, active_set, | ||
1232 | direction, proto, bytes); | ||
1233 | } | ||
1234 | |||
1235 | /* | ||
1236 | * Create a new entry for tracking the specified {acct_tag,uid_tag} within | ||
1237 | * the interface. | ||
1238 | * iface_entry->tag_stat_list_lock should be held. | ||
1239 | */ | ||
1240 | static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry, | ||
1241 | tag_t tag) | ||
1242 | { | ||
1243 | struct tag_stat *new_tag_stat_entry = NULL; | ||
1244 | IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx" | ||
1245 | " (uid=%u)\n", __func__, | ||
1246 | iface_entry, tag, get_uid_from_tag(tag)); | ||
1247 | new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC); | ||
1248 | if (!new_tag_stat_entry) { | ||
1249 | pr_err("qtaguid: iface_stat: tag stat alloc failed\n"); | ||
1250 | goto done; | ||
1251 | } | ||
1252 | new_tag_stat_entry->tn.tag = tag; | ||
1253 | tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree); | ||
1254 | done: | ||
1255 | return new_tag_stat_entry; | ||
1256 | } | ||
1257 | |||
1258 | static void if_tag_stat_update(const char *ifname, uid_t uid, | ||
1259 | const struct sock *sk, enum ifs_tx_rx direction, | ||
1260 | int proto, int bytes) | ||
1261 | { | ||
1262 | struct tag_stat *tag_stat_entry; | ||
1263 | tag_t tag, acct_tag; | ||
1264 | tag_t uid_tag; | ||
1265 | struct data_counters *uid_tag_counters; | ||
1266 | struct sock_tag *sock_tag_entry; | ||
1267 | struct iface_stat *iface_entry; | ||
1268 | struct tag_stat *new_tag_stat; | ||
1269 | MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s " | ||
1270 | "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n", | ||
1271 | ifname, uid, sk, direction, proto, bytes); | ||
1272 | |||
1273 | |||
1274 | iface_entry = get_iface_entry(ifname); | ||
1275 | if (!iface_entry) { | ||
1276 | pr_err("qtaguid: iface_stat: stat_update() %s not found\n", | ||
1277 | ifname); | ||
1278 | return; | ||
1279 | } | ||
1280 | /* It is ok to process data when an iface_entry is inactive */ | ||
1281 | |||
1282 | MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n", | ||
1283 | ifname, iface_entry); | ||
1284 | |||
1285 | /* | ||
1286 | * Look for a tagged sock. | ||
1287 | * It will have an acct_uid. | ||
1288 | */ | ||
1289 | sock_tag_entry = get_sock_stat(sk); | ||
1290 | if (sock_tag_entry) { | ||
1291 | tag = sock_tag_entry->tag; | ||
1292 | acct_tag = get_atag_from_tag(tag); | ||
1293 | uid_tag = get_utag_from_tag(tag); | ||
1294 | } else { | ||
1295 | acct_tag = make_atag_from_value(0); | ||
1296 | tag = combine_atag_with_uid(acct_tag, uid); | ||
1297 | uid_tag = make_tag_from_uid(uid); | ||
1298 | } | ||
1299 | MT_DEBUG("qtaguid: iface_stat: stat_update(): " | ||
1300 | " looking for tag=0x%llx (uid=%u) in ife=%p\n", | ||
1301 | tag, get_uid_from_tag(tag), iface_entry); | ||
1302 | /* Loop over tag list under this interface for {acct_tag,uid_tag} */ | ||
1303 | spin_lock_bh(&iface_entry->tag_stat_list_lock); | ||
1304 | |||
1305 | tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, | ||
1306 | tag); | ||
1307 | if (tag_stat_entry) { | ||
1308 | /* | ||
1309 | * Updating the {acct_tag, uid_tag} entry handles both stats: | ||
1310 | * {0, uid_tag} will also get updated. | ||
1311 | */ | ||
1312 | tag_stat_update(tag_stat_entry, direction, proto, bytes); | ||
1313 | spin_unlock_bh(&iface_entry->tag_stat_list_lock); | ||
1314 | return; | ||
1315 | } | ||
1316 | |||
1317 | /* Loop over tag list under this interface for {0,uid_tag} */ | ||
1318 | tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, | ||
1319 | uid_tag); | ||
1320 | if (!tag_stat_entry) { | ||
1321 | /* Here: the base uid_tag did not exist */ | ||
1322 | /* | ||
1323 | * No parent counters. So | ||
1324 | * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats. | ||
1325 | */ | ||
1326 | new_tag_stat = create_if_tag_stat(iface_entry, uid_tag); | ||
1327 | uid_tag_counters = &new_tag_stat->counters; | ||
1328 | } else { | ||
1329 | uid_tag_counters = &tag_stat_entry->counters; | ||
1330 | } | ||
1331 | |||
1332 | if (acct_tag) { | ||
1333 | new_tag_stat = create_if_tag_stat(iface_entry, tag); | ||
1334 | new_tag_stat->parent_counters = uid_tag_counters; | ||
1335 | } | ||
1336 | tag_stat_update(new_tag_stat, direction, proto, bytes); | ||
1337 | spin_unlock_bh(&iface_entry->tag_stat_list_lock); | ||
1338 | } | ||
1339 | |||
1340 | static int iface_netdev_event_handler(struct notifier_block *nb, | ||
1341 | unsigned long event, void *ptr) { | ||
1342 | struct net_device *dev = ptr; | ||
1343 | |||
1344 | if (unlikely(module_passive)) | ||
1345 | return NOTIFY_DONE; | ||
1346 | |||
1347 | IF_DEBUG("qtaguid: iface_stat: netdev_event(): " | ||
1348 | "ev=0x%lx/%s netdev=%p->name=%s\n", | ||
1349 | event, netdev_evt_str(event), dev, dev ? dev->name : ""); | ||
1350 | |||
1351 | switch (event) { | ||
1352 | case NETDEV_UP: | ||
1353 | iface_stat_create(dev, NULL); | ||
1354 | atomic64_inc(&qtu_events.iface_events); | ||
1355 | break; | ||
1356 | case NETDEV_DOWN: | ||
1357 | case NETDEV_UNREGISTER: | ||
1358 | iface_stat_update(dev, event == NETDEV_DOWN); | ||
1359 | atomic64_inc(&qtu_events.iface_events); | ||
1360 | break; | ||
1361 | } | ||
1362 | return NOTIFY_DONE; | ||
1363 | } | ||
1364 | |||
1365 | static int iface_inet6addr_event_handler(struct notifier_block *nb, | ||
1366 | unsigned long event, void *ptr) | ||
1367 | { | ||
1368 | struct inet6_ifaddr *ifa = ptr; | ||
1369 | struct net_device *dev; | ||
1370 | |||
1371 | if (unlikely(module_passive)) | ||
1372 | return NOTIFY_DONE; | ||
1373 | |||
1374 | IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): " | ||
1375 | "ev=0x%lx/%s ifa=%p\n", | ||
1376 | event, netdev_evt_str(event), ifa); | ||
1377 | |||
1378 | switch (event) { | ||
1379 | case NETDEV_UP: | ||
1380 | BUG_ON(!ifa || !ifa->idev); | ||
1381 | dev = (struct net_device *)ifa->idev->dev; | ||
1382 | iface_stat_create_ipv6(dev, ifa); | ||
1383 | atomic64_inc(&qtu_events.iface_events); | ||
1384 | break; | ||
1385 | case NETDEV_DOWN: | ||
1386 | case NETDEV_UNREGISTER: | ||
1387 | BUG_ON(!ifa || !ifa->idev); | ||
1388 | dev = (struct net_device *)ifa->idev->dev; | ||
1389 | iface_stat_update(dev, event == NETDEV_DOWN); | ||
1390 | atomic64_inc(&qtu_events.iface_events); | ||
1391 | break; | ||
1392 | } | ||
1393 | return NOTIFY_DONE; | ||
1394 | } | ||
1395 | |||
1396 | static int iface_inetaddr_event_handler(struct notifier_block *nb, | ||
1397 | unsigned long event, void *ptr) | ||
1398 | { | ||
1399 | struct in_ifaddr *ifa = ptr; | ||
1400 | struct net_device *dev; | ||
1401 | |||
1402 | if (unlikely(module_passive)) | ||
1403 | return NOTIFY_DONE; | ||
1404 | |||
1405 | IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): " | ||
1406 | "ev=0x%lx/%s ifa=%p\n", | ||
1407 | event, netdev_evt_str(event), ifa); | ||
1408 | |||
1409 | switch (event) { | ||
1410 | case NETDEV_UP: | ||
1411 | BUG_ON(!ifa || !ifa->ifa_dev); | ||
1412 | dev = ifa->ifa_dev->dev; | ||
1413 | iface_stat_create(dev, ifa); | ||
1414 | atomic64_inc(&qtu_events.iface_events); | ||
1415 | break; | ||
1416 | case NETDEV_DOWN: | ||
1417 | case NETDEV_UNREGISTER: | ||
1418 | BUG_ON(!ifa || !ifa->ifa_dev); | ||
1419 | dev = ifa->ifa_dev->dev; | ||
1420 | iface_stat_update(dev, event == NETDEV_DOWN); | ||
1421 | atomic64_inc(&qtu_events.iface_events); | ||
1422 | break; | ||
1423 | } | ||
1424 | return NOTIFY_DONE; | ||
1425 | } | ||
1426 | |||
1427 | static struct notifier_block iface_netdev_notifier_blk = { | ||
1428 | .notifier_call = iface_netdev_event_handler, | ||
1429 | }; | ||
1430 | |||
1431 | static struct notifier_block iface_inetaddr_notifier_blk = { | ||
1432 | .notifier_call = iface_inetaddr_event_handler, | ||
1433 | }; | ||
1434 | |||
1435 | static struct notifier_block iface_inet6addr_notifier_blk = { | ||
1436 | .notifier_call = iface_inet6addr_event_handler, | ||
1437 | }; | ||
1438 | |||
1439 | static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) | ||
1440 | { | ||
1441 | int err; | ||
1442 | |||
1443 | iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir); | ||
1444 | if (!iface_stat_procdir) { | ||
1445 | pr_err("qtaguid: iface_stat: init failed to create proc entry\n"); | ||
1446 | err = -1; | ||
1447 | goto err; | ||
1448 | } | ||
1449 | |||
1450 | iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename, | ||
1451 | proc_iface_perms, | ||
1452 | parent_procdir); | ||
1453 | if (!iface_stat_all_procfile) { | ||
1454 | pr_err("qtaguid: iface_stat: init " | ||
1455 | " failed to create stat_all proc entry\n"); | ||
1456 | err = -1; | ||
1457 | goto err_zap_entry; | ||
1458 | } | ||
1459 | iface_stat_all_procfile->read_proc = iface_stat_all_proc_read; | ||
1460 | |||
1461 | |||
1462 | err = register_netdevice_notifier(&iface_netdev_notifier_blk); | ||
1463 | if (err) { | ||
1464 | pr_err("qtaguid: iface_stat: init " | ||
1465 | "failed to register dev event handler\n"); | ||
1466 | goto err_zap_all_stats_entry; | ||
1467 | } | ||
1468 | err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk); | ||
1469 | if (err) { | ||
1470 | pr_err("qtaguid: iface_stat: init " | ||
1471 | "failed to register ipv4 dev event handler\n"); | ||
1472 | goto err_unreg_nd; | ||
1473 | } | ||
1474 | |||
1475 | err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk); | ||
1476 | if (err) { | ||
1477 | pr_err("qtaguid: iface_stat: init " | ||
1478 | "failed to register ipv6 dev event handler\n"); | ||
1479 | goto err_unreg_ip4_addr; | ||
1480 | } | ||
1481 | return 0; | ||
1482 | |||
1483 | err_unreg_ip4_addr: | ||
1484 | unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk); | ||
1485 | err_unreg_nd: | ||
1486 | unregister_netdevice_notifier(&iface_netdev_notifier_blk); | ||
1487 | err_zap_all_stats_entry: | ||
1488 | remove_proc_entry(iface_stat_all_procfilename, parent_procdir); | ||
1489 | err_zap_entry: | ||
1490 | remove_proc_entry(iface_stat_procdirname, parent_procdir); | ||
1491 | err: | ||
1492 | return err; | ||
1493 | } | ||
1494 | |||
1495 | static struct sock *qtaguid_find_sk(const struct sk_buff *skb, | ||
1496 | struct xt_action_param *par) | ||
1497 | { | ||
1498 | struct sock *sk; | ||
1499 | unsigned int hook_mask = (1 << par->hooknum); | ||
1500 | |||
1501 | MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb, | ||
1502 | par->hooknum, par->family); | ||
1503 | |||
1504 | /* | ||
1505 | * Let's not abuse the the xt_socket_get*_sk(), or else it will | ||
1506 | * return garbage SKs. | ||
1507 | */ | ||
1508 | if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS)) | ||
1509 | return NULL; | ||
1510 | |||
1511 | switch (par->family) { | ||
1512 | case NFPROTO_IPV6: | ||
1513 | sk = xt_socket_get6_sk(skb, par); | ||
1514 | break; | ||
1515 | case NFPROTO_IPV4: | ||
1516 | sk = xt_socket_get4_sk(skb, par); | ||
1517 | break; | ||
1518 | default: | ||
1519 | return NULL; | ||
1520 | } | ||
1521 | |||
1522 | /* | ||
1523 | * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs. | ||
1524 | * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959 | ||
1525 | * Not fixed in 3.0-r3 :( | ||
1526 | */ | ||
1527 | if (sk) { | ||
1528 | MT_DEBUG("qtaguid: %p->sk_proto=%u " | ||
1529 | "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state); | ||
1530 | if (sk->sk_state == TCP_TIME_WAIT) { | ||
1531 | xt_socket_put_sk(sk); | ||
1532 | sk = NULL; | ||
1533 | } | ||
1534 | } | ||
1535 | return sk; | ||
1536 | } | ||
1537 | |||
1538 | static void account_for_uid(const struct sk_buff *skb, | ||
1539 | const struct sock *alternate_sk, uid_t uid, | ||
1540 | struct xt_action_param *par) | ||
1541 | { | ||
1542 | const struct net_device *el_dev; | ||
1543 | |||
1544 | if (!skb->dev) { | ||
1545 | MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); | ||
1546 | el_dev = par->in ? : par->out; | ||
1547 | } else { | ||
1548 | const struct net_device *other_dev; | ||
1549 | el_dev = skb->dev; | ||
1550 | other_dev = par->in ? : par->out; | ||
1551 | if (el_dev != other_dev) { | ||
1552 | MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " | ||
1553 | "par->(in/out)=%p %s\n", | ||
1554 | par->hooknum, el_dev, el_dev->name, other_dev, | ||
1555 | other_dev->name); | ||
1556 | } | ||
1557 | } | ||
1558 | |||
1559 | if (unlikely(!el_dev)) { | ||
1560 | pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum); | ||
1561 | } else if (unlikely(!el_dev->name)) { | ||
1562 | pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum); | ||
1563 | } else { | ||
1564 | MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n", | ||
1565 | par->hooknum, | ||
1566 | el_dev->name, | ||
1567 | el_dev->type); | ||
1568 | |||
1569 | if_tag_stat_update(el_dev->name, uid, | ||
1570 | skb->sk ? skb->sk : alternate_sk, | ||
1571 | par->in ? IFS_RX : IFS_TX, | ||
1572 | ip_hdr(skb)->protocol, skb->len); | ||
1573 | } | ||
1574 | } | ||
1575 | |||
1576 | static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) | ||
1577 | { | ||
1578 | const struct xt_qtaguid_match_info *info = par->matchinfo; | ||
1579 | const struct file *filp; | ||
1580 | bool got_sock = false; | ||
1581 | struct sock *sk; | ||
1582 | uid_t sock_uid; | ||
1583 | bool res; | ||
1584 | |||
1585 | if (unlikely(module_passive)) | ||
1586 | return (info->match ^ info->invert) == 0; | ||
1587 | |||
1588 | MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n", | ||
1589 | par->hooknum, skb, par->in, par->out, par->family); | ||
1590 | |||
1591 | atomic64_inc(&qtu_events.match_calls); | ||
1592 | if (skb == NULL) { | ||
1593 | res = (info->match ^ info->invert) == 0; | ||
1594 | goto ret_res; | ||
1595 | } | ||
1596 | |||
1597 | sk = skb->sk; | ||
1598 | |||
1599 | if (sk == NULL) { | ||
1600 | /* | ||
1601 | * A missing sk->sk_socket happens when packets are in-flight | ||
1602 | * and the matching socket is already closed and gone. | ||
1603 | */ | ||
1604 | sk = qtaguid_find_sk(skb, par); | ||
1605 | /* | ||
1606 | * If we got the socket from the find_sk(), we will need to put | ||
1607 | * it back, as nf_tproxy_get_sock_v4() got it. | ||
1608 | */ | ||
1609 | got_sock = sk; | ||
1610 | if (sk) | ||
1611 | atomic64_inc(&qtu_events.match_found_sk_in_ct); | ||
1612 | else | ||
1613 | atomic64_inc(&qtu_events.match_found_no_sk_in_ct); | ||
1614 | } else { | ||
1615 | atomic64_inc(&qtu_events.match_found_sk); | ||
1616 | } | ||
1617 | MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n", | ||
1618 | par->hooknum, sk, got_sock, ip_hdr(skb)->protocol); | ||
1619 | if (sk != NULL) { | ||
1620 | MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", | ||
1621 | par->hooknum, sk, sk->sk_socket, | ||
1622 | sk->sk_socket ? sk->sk_socket->file : (void *)-1LL); | ||
1623 | filp = sk->sk_socket ? sk->sk_socket->file : NULL; | ||
1624 | MT_DEBUG("qtaguid[%d]: filp...uid=%u\n", | ||
1625 | par->hooknum, filp ? filp->f_cred->fsuid : -1); | ||
1626 | } | ||
1627 | |||
1628 | if (sk == NULL || sk->sk_socket == NULL) { | ||
1629 | /* | ||
1630 | * Here, the qtaguid_find_sk() using connection tracking | ||
1631 | * couldn't find the owner, so for now we just count them | ||
1632 | * against the system. | ||
1633 | */ | ||
1634 | /* | ||
1635 | * TODO: unhack how to force just accounting. | ||
1636 | * For now we only do iface stats when the uid-owner is not | ||
1637 | * requested. | ||
1638 | */ | ||
1639 | if (!(info->match & XT_QTAGUID_UID)) | ||
1640 | account_for_uid(skb, sk, 0, par); | ||
1641 | MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n", | ||
1642 | par->hooknum, | ||
1643 | sk ? sk->sk_socket : NULL); | ||
1644 | res = (info->match ^ info->invert) == 0; | ||
1645 | atomic64_inc(&qtu_events.match_no_sk); | ||
1646 | goto put_sock_ret_res; | ||
1647 | } else if (info->match & info->invert & XT_QTAGUID_SOCKET) { | ||
1648 | res = false; | ||
1649 | goto put_sock_ret_res; | ||
1650 | } | ||
1651 | filp = sk->sk_socket->file; | ||
1652 | if (filp == NULL) { | ||
1653 | MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum); | ||
1654 | account_for_uid(skb, sk, 0, par); | ||
1655 | res = ((info->match ^ info->invert) & | ||
1656 | (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0; | ||
1657 | atomic64_inc(&qtu_events.match_no_sk_file); | ||
1658 | goto put_sock_ret_res; | ||
1659 | } | ||
1660 | sock_uid = filp->f_cred->fsuid; | ||
1661 | /* | ||
1662 | * TODO: unhack how to force just accounting. | ||
1663 | * For now we only do iface stats when the uid-owner is not requested | ||
1664 | */ | ||
1665 | if (!(info->match & XT_QTAGUID_UID)) | ||
1666 | account_for_uid(skb, sk, sock_uid, par); | ||
1667 | |||
1668 | /* | ||
1669 | * The following two tests fail the match when: | ||
1670 | * id not in range AND no inverted condition requested | ||
1671 | * or id in range AND inverted condition requested | ||
1672 | * Thus (!a && b) || (a && !b) == a ^ b | ||
1673 | */ | ||
1674 | if (info->match & XT_QTAGUID_UID) | ||
1675 | if ((filp->f_cred->fsuid >= info->uid_min && | ||
1676 | filp->f_cred->fsuid <= info->uid_max) ^ | ||
1677 | !(info->invert & XT_QTAGUID_UID)) { | ||
1678 | MT_DEBUG("qtaguid[%d]: leaving uid not matching\n", | ||
1679 | par->hooknum); | ||
1680 | res = false; | ||
1681 | goto put_sock_ret_res; | ||
1682 | } | ||
1683 | if (info->match & XT_QTAGUID_GID) | ||
1684 | if ((filp->f_cred->fsgid >= info->gid_min && | ||
1685 | filp->f_cred->fsgid <= info->gid_max) ^ | ||
1686 | !(info->invert & XT_QTAGUID_GID)) { | ||
1687 | MT_DEBUG("qtaguid[%d]: leaving gid not matching\n", | ||
1688 | par->hooknum); | ||
1689 | res = false; | ||
1690 | goto put_sock_ret_res; | ||
1691 | } | ||
1692 | |||
1693 | MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum); | ||
1694 | res = true; | ||
1695 | |||
1696 | put_sock_ret_res: | ||
1697 | if (got_sock) | ||
1698 | xt_socket_put_sk(sk); | ||
1699 | ret_res: | ||
1700 | MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res); | ||
1701 | return res; | ||
1702 | } | ||
1703 | |||
1704 | #ifdef DDEBUG | ||
1705 | /* This function is not in xt_qtaguid_print.c because of locks visibility */ | ||
1706 | static void prdebug_full_state(int indent_level, const char *fmt, ...) | ||
1707 | { | ||
1708 | va_list args; | ||
1709 | char *fmt_buff; | ||
1710 | char *buff; | ||
1711 | |||
1712 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
1713 | return; | ||
1714 | |||
1715 | fmt_buff = kasprintf(GFP_ATOMIC, | ||
1716 | "qtaguid: %s(): %s {\n", __func__, fmt); | ||
1717 | BUG_ON(!fmt_buff); | ||
1718 | va_start(args, fmt); | ||
1719 | buff = kvasprintf(GFP_ATOMIC, | ||
1720 | fmt_buff, args); | ||
1721 | BUG_ON(!buff); | ||
1722 | pr_debug("%s", buff); | ||
1723 | kfree(fmt_buff); | ||
1724 | kfree(buff); | ||
1725 | va_end(args); | ||
1726 | |||
1727 | spin_lock_bh(&sock_tag_list_lock); | ||
1728 | prdebug_sock_tag_tree(indent_level, &sock_tag_tree); | ||
1729 | spin_unlock_bh(&sock_tag_list_lock); | ||
1730 | |||
1731 | spin_lock_bh(&sock_tag_list_lock); | ||
1732 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
1733 | prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree); | ||
1734 | prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree); | ||
1735 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
1736 | spin_unlock_bh(&sock_tag_list_lock); | ||
1737 | |||
1738 | spin_lock_bh(&iface_stat_list_lock); | ||
1739 | prdebug_iface_stat_list(indent_level, &iface_stat_list); | ||
1740 | spin_unlock_bh(&iface_stat_list_lock); | ||
1741 | |||
1742 | pr_debug("qtaguid: %s(): }\n", __func__); | ||
1743 | } | ||
1744 | #else | ||
1745 | static void prdebug_full_state(int indent_level, const char *fmt, ...) {} | ||
1746 | #endif | ||
1747 | |||
1748 | /* | ||
1749 | * Procfs reader to get all active socket tags using style "1)" as described in | ||
1750 | * fs/proc/generic.c | ||
1751 | */ | ||
1752 | static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, | ||
1753 | off_t items_to_skip, int char_count, int *eof, | ||
1754 | void *data) | ||
1755 | { | ||
1756 | char *outp = page; | ||
1757 | int len; | ||
1758 | uid_t uid; | ||
1759 | struct rb_node *node; | ||
1760 | struct sock_tag *sock_tag_entry; | ||
1761 | int item_index = 0; | ||
1762 | int indent_level = 0; | ||
1763 | long f_count; | ||
1764 | |||
1765 | if (unlikely(module_passive)) { | ||
1766 | *eof = 1; | ||
1767 | return 0; | ||
1768 | } | ||
1769 | |||
1770 | if (*eof) | ||
1771 | return 0; | ||
1772 | |||
1773 | CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n", | ||
1774 | page, items_to_skip, char_count, *eof); | ||
1775 | |||
1776 | spin_lock_bh(&sock_tag_list_lock); | ||
1777 | for (node = rb_first(&sock_tag_tree); | ||
1778 | node; | ||
1779 | node = rb_next(node)) { | ||
1780 | if (item_index++ < items_to_skip) | ||
1781 | continue; | ||
1782 | sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); | ||
1783 | uid = get_uid_from_tag(sock_tag_entry->tag); | ||
1784 | CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) " | ||
1785 | "pid=%u\n", | ||
1786 | sock_tag_entry->sk, | ||
1787 | sock_tag_entry->tag, | ||
1788 | uid, | ||
1789 | sock_tag_entry->pid | ||
1790 | ); | ||
1791 | f_count = atomic_long_read( | ||
1792 | &sock_tag_entry->socket->file->f_count); | ||
1793 | len = snprintf(outp, char_count, | ||
1794 | "sock=%p tag=0x%llx (uid=%u) pid=%u " | ||
1795 | "f_count=%lu\n", | ||
1796 | sock_tag_entry->sk, | ||
1797 | sock_tag_entry->tag, uid, | ||
1798 | sock_tag_entry->pid, f_count); | ||
1799 | if (len >= char_count) { | ||
1800 | spin_unlock_bh(&sock_tag_list_lock); | ||
1801 | *outp = '\0'; | ||
1802 | return outp - page; | ||
1803 | } | ||
1804 | outp += len; | ||
1805 | char_count -= len; | ||
1806 | (*num_items_returned)++; | ||
1807 | } | ||
1808 | spin_unlock_bh(&sock_tag_list_lock); | ||
1809 | |||
1810 | if (item_index++ >= items_to_skip) { | ||
1811 | len = snprintf(outp, char_count, | ||
1812 | "events: sockets_tagged=%llu " | ||
1813 | "sockets_untagged=%llu " | ||
1814 | "counter_set_changes=%llu " | ||
1815 | "delete_cmds=%llu " | ||
1816 | "iface_events=%llu " | ||
1817 | "match_calls=%llu " | ||
1818 | "match_found_sk=%llu " | ||
1819 | "match_found_sk_in_ct=%llu " | ||
1820 | "match_found_no_sk_in_ct=%llu " | ||
1821 | "match_no_sk=%llu " | ||
1822 | "match_no_sk_file=%llu\n", | ||
1823 | atomic64_read(&qtu_events.sockets_tagged), | ||
1824 | atomic64_read(&qtu_events.sockets_untagged), | ||
1825 | atomic64_read(&qtu_events.counter_set_changes), | ||
1826 | atomic64_read(&qtu_events.delete_cmds), | ||
1827 | atomic64_read(&qtu_events.iface_events), | ||
1828 | atomic64_read(&qtu_events.match_calls), | ||
1829 | atomic64_read(&qtu_events.match_found_sk), | ||
1830 | atomic64_read(&qtu_events.match_found_sk_in_ct), | ||
1831 | atomic64_read( | ||
1832 | &qtu_events.match_found_no_sk_in_ct), | ||
1833 | atomic64_read(&qtu_events.match_no_sk), | ||
1834 | atomic64_read(&qtu_events.match_no_sk_file)); | ||
1835 | if (len >= char_count) { | ||
1836 | *outp = '\0'; | ||
1837 | return outp - page; | ||
1838 | } | ||
1839 | outp += len; | ||
1840 | char_count -= len; | ||
1841 | (*num_items_returned)++; | ||
1842 | } | ||
1843 | |||
1844 | /* Count the following as part of the last item_index */ | ||
1845 | if (item_index > items_to_skip) { | ||
1846 | prdebug_full_state(indent_level, "proc ctrl"); | ||
1847 | } | ||
1848 | |||
1849 | *eof = 1; | ||
1850 | return outp - page; | ||
1851 | } | ||
1852 | |||
1853 | /* | ||
1854 | * Delete socket tags, and stat tags associated with a given | ||
1855 | * accouting tag and uid. | ||
1856 | */ | ||
1857 | static int ctrl_cmd_delete(const char *input) | ||
1858 | { | ||
1859 | char cmd; | ||
1860 | uid_t uid; | ||
1861 | uid_t entry_uid; | ||
1862 | tag_t acct_tag; | ||
1863 | tag_t tag; | ||
1864 | int res, argc; | ||
1865 | struct iface_stat *iface_entry; | ||
1866 | struct rb_node *node; | ||
1867 | struct sock_tag *st_entry; | ||
1868 | struct rb_root st_to_free_tree = RB_ROOT; | ||
1869 | struct tag_stat *ts_entry; | ||
1870 | struct tag_counter_set *tcs_entry; | ||
1871 | struct tag_ref *tr_entry; | ||
1872 | struct uid_tag_data *utd_entry; | ||
1873 | |||
1874 | argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid); | ||
1875 | CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c " | ||
1876 | "user_tag=0x%llx uid=%u\n", input, argc, cmd, | ||
1877 | acct_tag, uid); | ||
1878 | if (argc < 2) { | ||
1879 | res = -EINVAL; | ||
1880 | goto err; | ||
1881 | } | ||
1882 | if (!valid_atag(acct_tag)) { | ||
1883 | pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input); | ||
1884 | res = -EINVAL; | ||
1885 | goto err; | ||
1886 | } | ||
1887 | if (argc < 3) { | ||
1888 | uid = current_fsuid(); | ||
1889 | } else if (!can_impersonate_uid(uid)) { | ||
1890 | pr_info("qtaguid: ctrl_delete(%s): " | ||
1891 | "insufficient priv from pid=%u tgid=%u uid=%u\n", | ||
1892 | input, current->pid, current->tgid, current_fsuid()); | ||
1893 | res = -EPERM; | ||
1894 | goto err; | ||
1895 | } | ||
1896 | |||
1897 | tag = combine_atag_with_uid(acct_tag, uid); | ||
1898 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | ||
1899 | "looking for tag=0x%llx (uid=%u)\n", | ||
1900 | input, tag, uid); | ||
1901 | |||
1902 | /* Delete socket tags */ | ||
1903 | spin_lock_bh(&sock_tag_list_lock); | ||
1904 | node = rb_first(&sock_tag_tree); | ||
1905 | while (node) { | ||
1906 | st_entry = rb_entry(node, struct sock_tag, sock_node); | ||
1907 | entry_uid = get_uid_from_tag(st_entry->tag); | ||
1908 | node = rb_next(node); | ||
1909 | if (entry_uid != uid) | ||
1910 | continue; | ||
1911 | |||
1912 | CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n", | ||
1913 | input, st_entry->tag, entry_uid); | ||
1914 | |||
1915 | if (!acct_tag || st_entry->tag == tag) { | ||
1916 | rb_erase(&st_entry->sock_node, &sock_tag_tree); | ||
1917 | /* Can't sockfd_put() within spinlock, do it later. */ | ||
1918 | sock_tag_tree_insert(st_entry, &st_to_free_tree); | ||
1919 | tr_entry = lookup_tag_ref(st_entry->tag, NULL); | ||
1920 | BUG_ON(tr_entry->num_sock_tags <= 0); | ||
1921 | tr_entry->num_sock_tags--; | ||
1922 | /* | ||
1923 | * TODO: remove if, and start failing. | ||
1924 | * This is a hack to work around the fact that in some | ||
1925 | * places we have "if (IS_ERR_OR_NULL(pqd_entry))" | ||
1926 | * and are trying to work around apps | ||
1927 | * that didn't open the /dev/xt_qtaguid. | ||
1928 | */ | ||
1929 | if (st_entry->list.next && st_entry->list.prev) | ||
1930 | list_del(&st_entry->list); | ||
1931 | } | ||
1932 | } | ||
1933 | spin_unlock_bh(&sock_tag_list_lock); | ||
1934 | |||
1935 | sock_tag_tree_erase(&st_to_free_tree); | ||
1936 | |||
1937 | /* Delete tag counter-sets */ | ||
1938 | spin_lock_bh(&tag_counter_set_list_lock); | ||
1939 | /* Counter sets are only on the uid tag, not full tag */ | ||
1940 | tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag); | ||
1941 | if (tcs_entry) { | ||
1942 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | ||
1943 | "erase tcs: tag=0x%llx (uid=%u) set=%d\n", | ||
1944 | input, | ||
1945 | tcs_entry->tn.tag, | ||
1946 | get_uid_from_tag(tcs_entry->tn.tag), | ||
1947 | tcs_entry->active_set); | ||
1948 | rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree); | ||
1949 | kfree(tcs_entry); | ||
1950 | } | ||
1951 | spin_unlock_bh(&tag_counter_set_list_lock); | ||
1952 | |||
1953 | /* | ||
1954 | * If acct_tag is 0, then all entries belonging to uid are | ||
1955 | * erased. | ||
1956 | */ | ||
1957 | spin_lock_bh(&iface_stat_list_lock); | ||
1958 | list_for_each_entry(iface_entry, &iface_stat_list, list) { | ||
1959 | spin_lock_bh(&iface_entry->tag_stat_list_lock); | ||
1960 | node = rb_first(&iface_entry->tag_stat_tree); | ||
1961 | while (node) { | ||
1962 | ts_entry = rb_entry(node, struct tag_stat, tn.node); | ||
1963 | entry_uid = get_uid_from_tag(ts_entry->tn.tag); | ||
1964 | node = rb_next(node); | ||
1965 | |||
1966 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | ||
1967 | "ts tag=0x%llx (uid=%u)\n", | ||
1968 | input, ts_entry->tn.tag, entry_uid); | ||
1969 | |||
1970 | if (entry_uid != uid) | ||
1971 | continue; | ||
1972 | if (!acct_tag || ts_entry->tn.tag == tag) { | ||
1973 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | ||
1974 | "erase ts: %s 0x%llx %u\n", | ||
1975 | input, iface_entry->ifname, | ||
1976 | get_atag_from_tag(ts_entry->tn.tag), | ||
1977 | entry_uid); | ||
1978 | rb_erase(&ts_entry->tn.node, | ||
1979 | &iface_entry->tag_stat_tree); | ||
1980 | kfree(ts_entry); | ||
1981 | } | ||
1982 | } | ||
1983 | spin_unlock_bh(&iface_entry->tag_stat_list_lock); | ||
1984 | } | ||
1985 | spin_unlock_bh(&iface_stat_list_lock); | ||
1986 | |||
1987 | /* Cleanup the uid_tag_data */ | ||
1988 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
1989 | node = rb_first(&uid_tag_data_tree); | ||
1990 | while (node) { | ||
1991 | utd_entry = rb_entry(node, struct uid_tag_data, node); | ||
1992 | entry_uid = utd_entry->uid; | ||
1993 | node = rb_next(node); | ||
1994 | |||
1995 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | ||
1996 | "utd uid=%u\n", | ||
1997 | input, entry_uid); | ||
1998 | |||
1999 | if (entry_uid != uid) | ||
2000 | continue; | ||
2001 | /* | ||
2002 | * Go over the tag_refs, and those that don't have | ||
2003 | * sock_tags using them are freed. | ||
2004 | */ | ||
2005 | put_tag_ref_tree(tag, utd_entry); | ||
2006 | put_utd_entry(utd_entry); | ||
2007 | } | ||
2008 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
2009 | |||
2010 | atomic64_inc(&qtu_events.delete_cmds); | ||
2011 | res = 0; | ||
2012 | |||
2013 | err: | ||
2014 | return res; | ||
2015 | } | ||
2016 | |||
2017 | static int ctrl_cmd_counter_set(const char *input) | ||
2018 | { | ||
2019 | char cmd; | ||
2020 | uid_t uid = 0; | ||
2021 | tag_t tag; | ||
2022 | int res, argc; | ||
2023 | struct tag_counter_set *tcs; | ||
2024 | int counter_set; | ||
2025 | |||
2026 | argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid); | ||
2027 | CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c " | ||
2028 | "set=%d uid=%u\n", input, argc, cmd, | ||
2029 | counter_set, uid); | ||
2030 | if (argc != 3) { | ||
2031 | res = -EINVAL; | ||
2032 | goto err; | ||
2033 | } | ||
2034 | if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) { | ||
2035 | pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n", | ||
2036 | input); | ||
2037 | res = -EINVAL; | ||
2038 | goto err; | ||
2039 | } | ||
2040 | if (!can_manipulate_uids()) { | ||
2041 | pr_info("qtaguid: ctrl_counterset(%s): " | ||
2042 | "insufficient priv from pid=%u tgid=%u uid=%u\n", | ||
2043 | input, current->pid, current->tgid, current_fsuid()); | ||
2044 | res = -EPERM; | ||
2045 | goto err; | ||
2046 | } | ||
2047 | |||
2048 | tag = make_tag_from_uid(uid); | ||
2049 | spin_lock_bh(&tag_counter_set_list_lock); | ||
2050 | tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); | ||
2051 | if (!tcs) { | ||
2052 | tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC); | ||
2053 | if (!tcs) { | ||
2054 | spin_unlock_bh(&tag_counter_set_list_lock); | ||
2055 | pr_err("qtaguid: ctrl_counterset(%s): " | ||
2056 | "failed to alloc counter set\n", | ||
2057 | input); | ||
2058 | res = -ENOMEM; | ||
2059 | goto err; | ||
2060 | } | ||
2061 | tcs->tn.tag = tag; | ||
2062 | tag_counter_set_tree_insert(tcs, &tag_counter_set_tree); | ||
2063 | CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx " | ||
2064 | "(uid=%u) set=%d\n", | ||
2065 | input, tag, get_uid_from_tag(tag), counter_set); | ||
2066 | } | ||
2067 | tcs->active_set = counter_set; | ||
2068 | spin_unlock_bh(&tag_counter_set_list_lock); | ||
2069 | atomic64_inc(&qtu_events.counter_set_changes); | ||
2070 | res = 0; | ||
2071 | |||
2072 | err: | ||
2073 | return res; | ||
2074 | } | ||
2075 | |||
2076 | static int ctrl_cmd_tag(const char *input) | ||
2077 | { | ||
2078 | char cmd; | ||
2079 | int sock_fd = 0; | ||
2080 | uid_t uid = 0; | ||
2081 | tag_t acct_tag = make_atag_from_value(0); | ||
2082 | tag_t full_tag; | ||
2083 | struct socket *el_socket; | ||
2084 | int res, argc; | ||
2085 | struct sock_tag *sock_tag_entry; | ||
2086 | struct tag_ref *tag_ref_entry; | ||
2087 | struct uid_tag_data *uid_tag_data_entry; | ||
2088 | struct proc_qtu_data *pqd_entry; | ||
2089 | |||
2090 | /* Unassigned args will get defaulted later. */ | ||
2091 | argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid); | ||
2092 | CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d " | ||
2093 | "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd, | ||
2094 | acct_tag, uid); | ||
2095 | if (argc < 2) { | ||
2096 | res = -EINVAL; | ||
2097 | goto err; | ||
2098 | } | ||
2099 | el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ | ||
2100 | if (!el_socket) { | ||
2101 | pr_info("qtaguid: ctrl_tag(%s): failed to lookup" | ||
2102 | " sock_fd=%d err=%d\n", input, sock_fd, res); | ||
2103 | goto err; | ||
2104 | } | ||
2105 | CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n", | ||
2106 | input, atomic_long_read(&el_socket->file->f_count), | ||
2107 | el_socket->sk); | ||
2108 | if (argc < 3) { | ||
2109 | acct_tag = make_atag_from_value(0); | ||
2110 | } else if (!valid_atag(acct_tag)) { | ||
2111 | pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input); | ||
2112 | res = -EINVAL; | ||
2113 | goto err_put; | ||
2114 | } | ||
2115 | CT_DEBUG("qtaguid: ctrl_tag(%s): " | ||
2116 | "pid=%u tgid=%u uid=%u euid=%u fsuid=%u " | ||
2117 | "in_group=%d in_egroup=%d\n", | ||
2118 | input, current->pid, current->tgid, current_uid(), | ||
2119 | current_euid(), current_fsuid(), | ||
2120 | in_group_p(proc_ctrl_write_gid), | ||
2121 | in_egroup_p(proc_ctrl_write_gid)); | ||
2122 | if (argc < 4) { | ||
2123 | uid = current_fsuid(); | ||
2124 | } else if (!can_impersonate_uid(uid)) { | ||
2125 | pr_info("qtaguid: ctrl_tag(%s): " | ||
2126 | "insufficient priv from pid=%u tgid=%u uid=%u\n", | ||
2127 | input, current->pid, current->tgid, current_fsuid()); | ||
2128 | res = -EPERM; | ||
2129 | goto err_put; | ||
2130 | } | ||
2131 | full_tag = combine_atag_with_uid(acct_tag, uid); | ||
2132 | |||
2133 | spin_lock_bh(&sock_tag_list_lock); | ||
2134 | sock_tag_entry = get_sock_stat_nl(el_socket->sk); | ||
2135 | tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry); | ||
2136 | if (IS_ERR(tag_ref_entry)) { | ||
2137 | res = PTR_ERR(tag_ref_entry); | ||
2138 | spin_unlock_bh(&sock_tag_list_lock); | ||
2139 | goto err_put; | ||
2140 | } | ||
2141 | tag_ref_entry->num_sock_tags++; | ||
2142 | if (sock_tag_entry) { | ||
2143 | struct tag_ref *prev_tag_ref_entry; | ||
2144 | |||
2145 | CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p " | ||
2146 | "st@%p ...->f_count=%ld\n", | ||
2147 | input, el_socket->sk, sock_tag_entry, | ||
2148 | atomic_long_read(&el_socket->file->f_count)); | ||
2149 | /* | ||
2150 | * This is a re-tagging, so release the sock_fd that was | ||
2151 | * locked at the time of the 1st tagging. | ||
2152 | * There is still the ref from this call's sockfd_lookup() so | ||
2153 | * it can be done within the spinlock. | ||
2154 | */ | ||
2155 | sockfd_put(sock_tag_entry->socket); | ||
2156 | prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, | ||
2157 | &uid_tag_data_entry); | ||
2158 | BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry)); | ||
2159 | BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0); | ||
2160 | prev_tag_ref_entry->num_sock_tags--; | ||
2161 | sock_tag_entry->tag = full_tag; | ||
2162 | } else { | ||
2163 | CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n", | ||
2164 | input, el_socket->sk); | ||
2165 | sock_tag_entry = kzalloc(sizeof(*sock_tag_entry), | ||
2166 | GFP_ATOMIC); | ||
2167 | if (!sock_tag_entry) { | ||
2168 | pr_err("qtaguid: ctrl_tag(%s): " | ||
2169 | "socket tag alloc failed\n", | ||
2170 | input); | ||
2171 | spin_unlock_bh(&sock_tag_list_lock); | ||
2172 | res = -ENOMEM; | ||
2173 | goto err_tag_unref_put; | ||
2174 | } | ||
2175 | sock_tag_entry->sk = el_socket->sk; | ||
2176 | sock_tag_entry->socket = el_socket; | ||
2177 | sock_tag_entry->pid = current->tgid; | ||
2178 | sock_tag_entry->tag = combine_atag_with_uid(acct_tag, | ||
2179 | uid); | ||
2180 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
2181 | pqd_entry = proc_qtu_data_tree_search( | ||
2182 | &proc_qtu_data_tree, current->tgid); | ||
2183 | /* | ||
2184 | * TODO: remove if, and start failing. | ||
2185 | * At first, we want to catch user-space code that is not | ||
2186 | * opening the /dev/xt_qtaguid. | ||
2187 | */ | ||
2188 | if (IS_ERR_OR_NULL(pqd_entry)) | ||
2189 | pr_warn_once( | ||
2190 | "qtaguid: %s(): " | ||
2191 | "User space forgot to open /dev/xt_qtaguid? " | ||
2192 | "pid=%u tgid=%u uid=%u\n", __func__, | ||
2193 | current->pid, current->tgid, | ||
2194 | current_fsuid()); | ||
2195 | else | ||
2196 | list_add(&sock_tag_entry->list, | ||
2197 | &pqd_entry->sock_tag_list); | ||
2198 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
2199 | |||
2200 | sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree); | ||
2201 | atomic64_inc(&qtu_events.sockets_tagged); | ||
2202 | } | ||
2203 | spin_unlock_bh(&sock_tag_list_lock); | ||
2204 | /* We keep the ref to the socket (file) until it is untagged */ | ||
2205 | CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n", | ||
2206 | input, sock_tag_entry, | ||
2207 | atomic_long_read(&el_socket->file->f_count)); | ||
2208 | return 0; | ||
2209 | |||
2210 | err_tag_unref_put: | ||
2211 | BUG_ON(tag_ref_entry->num_sock_tags <= 0); | ||
2212 | tag_ref_entry->num_sock_tags--; | ||
2213 | free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry); | ||
2214 | err_put: | ||
2215 | CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n", | ||
2216 | input, atomic_long_read(&el_socket->file->f_count) - 1); | ||
2217 | /* Release the sock_fd that was grabbed by sockfd_lookup(). */ | ||
2218 | sockfd_put(el_socket); | ||
2219 | return res; | ||
2220 | |||
2221 | err: | ||
2222 | CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input); | ||
2223 | return res; | ||
2224 | } | ||
2225 | |||
2226 | static int ctrl_cmd_untag(const char *input) | ||
2227 | { | ||
2228 | char cmd; | ||
2229 | int sock_fd = 0; | ||
2230 | struct socket *el_socket; | ||
2231 | int res, argc; | ||
2232 | struct sock_tag *sock_tag_entry; | ||
2233 | struct tag_ref *tag_ref_entry; | ||
2234 | struct uid_tag_data *utd_entry; | ||
2235 | struct proc_qtu_data *pqd_entry; | ||
2236 | |||
2237 | argc = sscanf(input, "%c %d", &cmd, &sock_fd); | ||
2238 | CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n", | ||
2239 | input, argc, cmd, sock_fd); | ||
2240 | if (argc < 2) { | ||
2241 | res = -EINVAL; | ||
2242 | goto err; | ||
2243 | } | ||
2244 | el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ | ||
2245 | if (!el_socket) { | ||
2246 | pr_info("qtaguid: ctrl_untag(%s): failed to lookup" | ||
2247 | " sock_fd=%d err=%d\n", input, sock_fd, res); | ||
2248 | goto err; | ||
2249 | } | ||
2250 | CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n", | ||
2251 | input, atomic_long_read(&el_socket->file->f_count), | ||
2252 | el_socket->sk); | ||
2253 | spin_lock_bh(&sock_tag_list_lock); | ||
2254 | sock_tag_entry = get_sock_stat_nl(el_socket->sk); | ||
2255 | if (!sock_tag_entry) { | ||
2256 | spin_unlock_bh(&sock_tag_list_lock); | ||
2257 | res = -EINVAL; | ||
2258 | goto err_put; | ||
2259 | } | ||
2260 | /* | ||
2261 | * The socket already belongs to the current process | ||
2262 | * so it can do whatever it wants to it. | ||
2263 | */ | ||
2264 | rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree); | ||
2265 | |||
2266 | tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry); | ||
2267 | BUG_ON(!tag_ref_entry); | ||
2268 | BUG_ON(tag_ref_entry->num_sock_tags <= 0); | ||
2269 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
2270 | pqd_entry = proc_qtu_data_tree_search( | ||
2271 | &proc_qtu_data_tree, current->tgid); | ||
2272 | /* | ||
2273 | * TODO: remove if, and start failing. | ||
2274 | * At first, we want to catch user-space code that is not | ||
2275 | * opening the /dev/xt_qtaguid. | ||
2276 | */ | ||
2277 | if (IS_ERR_OR_NULL(pqd_entry)) | ||
2278 | pr_warn_once("qtaguid: %s(): " | ||
2279 | "User space forgot to open /dev/xt_qtaguid? " | ||
2280 | "pid=%u tgid=%u uid=%u\n", __func__, | ||
2281 | current->pid, current->tgid, current_fsuid()); | ||
2282 | else | ||
2283 | list_del(&sock_tag_entry->list); | ||
2284 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
2285 | /* | ||
2286 | * We don't free tag_ref from the utd_entry here, | ||
2287 | * only during a cmd_delete(). | ||
2288 | */ | ||
2289 | tag_ref_entry->num_sock_tags--; | ||
2290 | spin_unlock_bh(&sock_tag_list_lock); | ||
2291 | /* | ||
2292 | * Release the sock_fd that was grabbed at tag time, | ||
2293 | * and once more for the sockfd_lookup() here. | ||
2294 | */ | ||
2295 | sockfd_put(sock_tag_entry->socket); | ||
2296 | CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n", | ||
2297 | input, sock_tag_entry, | ||
2298 | atomic_long_read(&el_socket->file->f_count) - 1); | ||
2299 | sockfd_put(el_socket); | ||
2300 | |||
2301 | kfree(sock_tag_entry); | ||
2302 | atomic64_inc(&qtu_events.sockets_untagged); | ||
2303 | |||
2304 | return 0; | ||
2305 | |||
2306 | err_put: | ||
2307 | CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n", | ||
2308 | input, atomic_long_read(&el_socket->file->f_count) - 1); | ||
2309 | /* Release the sock_fd that was grabbed by sockfd_lookup(). */ | ||
2310 | sockfd_put(el_socket); | ||
2311 | return res; | ||
2312 | |||
2313 | err: | ||
2314 | CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input); | ||
2315 | return res; | ||
2316 | } | ||
2317 | |||
2318 | static int qtaguid_ctrl_parse(const char *input, int count) | ||
2319 | { | ||
2320 | char cmd; | ||
2321 | int res; | ||
2322 | |||
2323 | cmd = input[0]; | ||
2324 | /* Collect params for commands */ | ||
2325 | switch (cmd) { | ||
2326 | case 'd': | ||
2327 | res = ctrl_cmd_delete(input); | ||
2328 | break; | ||
2329 | |||
2330 | case 's': | ||
2331 | res = ctrl_cmd_counter_set(input); | ||
2332 | break; | ||
2333 | |||
2334 | case 't': | ||
2335 | res = ctrl_cmd_tag(input); | ||
2336 | break; | ||
2337 | |||
2338 | case 'u': | ||
2339 | res = ctrl_cmd_untag(input); | ||
2340 | break; | ||
2341 | |||
2342 | default: | ||
2343 | res = -EINVAL; | ||
2344 | goto err; | ||
2345 | } | ||
2346 | if (!res) | ||
2347 | res = count; | ||
2348 | err: | ||
2349 | CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res); | ||
2350 | return res; | ||
2351 | } | ||
2352 | |||
2353 | #define MAX_QTAGUID_CTRL_INPUT_LEN 255 | ||
2354 | static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer, | ||
2355 | unsigned long count, void *data) | ||
2356 | { | ||
2357 | char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN]; | ||
2358 | |||
2359 | if (unlikely(module_passive)) | ||
2360 | return count; | ||
2361 | |||
2362 | if (count >= MAX_QTAGUID_CTRL_INPUT_LEN) | ||
2363 | return -EINVAL; | ||
2364 | |||
2365 | if (copy_from_user(input_buf, buffer, count)) | ||
2366 | return -EFAULT; | ||
2367 | |||
2368 | input_buf[count] = '\0'; | ||
2369 | return qtaguid_ctrl_parse(input_buf, count); | ||
2370 | } | ||
2371 | |||
2372 | struct proc_print_info { | ||
2373 | char *outp; | ||
2374 | char **num_items_returned; | ||
2375 | struct iface_stat *iface_entry; | ||
2376 | struct tag_stat *ts_entry; | ||
2377 | int item_index; | ||
2378 | int items_to_skip; | ||
2379 | int char_count; | ||
2380 | }; | ||
2381 | |||
2382 | static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) | ||
2383 | { | ||
2384 | int len; | ||
2385 | struct data_counters *cnts; | ||
2386 | |||
2387 | if (!ppi->item_index) { | ||
2388 | if (ppi->item_index++ < ppi->items_to_skip) | ||
2389 | return 0; | ||
2390 | len = snprintf(ppi->outp, ppi->char_count, | ||
2391 | "idx iface acct_tag_hex uid_tag_int cnt_set " | ||
2392 | "rx_bytes rx_packets " | ||
2393 | "tx_bytes tx_packets " | ||
2394 | "rx_tcp_bytes rx_tcp_packets " | ||
2395 | "rx_udp_bytes rx_udp_packets " | ||
2396 | "rx_other_bytes rx_other_packets " | ||
2397 | "tx_tcp_bytes tx_tcp_packets " | ||
2398 | "tx_udp_bytes tx_udp_packets " | ||
2399 | "tx_other_bytes tx_other_packets\n"); | ||
2400 | } else { | ||
2401 | tag_t tag = ppi->ts_entry->tn.tag; | ||
2402 | uid_t stat_uid = get_uid_from_tag(tag); | ||
2403 | |||
2404 | if (!can_read_other_uid_stats(stat_uid)) { | ||
2405 | CT_DEBUG("qtaguid: stats line: " | ||
2406 | "%s 0x%llx %u: insufficient priv " | ||
2407 | "from pid=%u tgid=%u uid=%u\n", | ||
2408 | ppi->iface_entry->ifname, | ||
2409 | get_atag_from_tag(tag), stat_uid, | ||
2410 | current->pid, current->tgid, current_fsuid()); | ||
2411 | return 0; | ||
2412 | } | ||
2413 | if (ppi->item_index++ < ppi->items_to_skip) | ||
2414 | return 0; | ||
2415 | cnts = &ppi->ts_entry->counters; | ||
2416 | len = snprintf( | ||
2417 | ppi->outp, ppi->char_count, | ||
2418 | "%d %s 0x%llx %u %u " | ||
2419 | "%llu %llu " | ||
2420 | "%llu %llu " | ||
2421 | "%llu %llu " | ||
2422 | "%llu %llu " | ||
2423 | "%llu %llu " | ||
2424 | "%llu %llu " | ||
2425 | "%llu %llu " | ||
2426 | "%llu %llu\n", | ||
2427 | ppi->item_index, | ||
2428 | ppi->iface_entry->ifname, | ||
2429 | get_atag_from_tag(tag), | ||
2430 | stat_uid, | ||
2431 | cnt_set, | ||
2432 | dc_sum_bytes(cnts, cnt_set, IFS_RX), | ||
2433 | dc_sum_packets(cnts, cnt_set, IFS_RX), | ||
2434 | dc_sum_bytes(cnts, cnt_set, IFS_TX), | ||
2435 | dc_sum_packets(cnts, cnt_set, IFS_TX), | ||
2436 | cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, | ||
2437 | cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, | ||
2438 | cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, | ||
2439 | cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, | ||
2440 | cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, | ||
2441 | cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, | ||
2442 | cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, | ||
2443 | cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, | ||
2444 | cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, | ||
2445 | cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, | ||
2446 | cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, | ||
2447 | cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); | ||
2448 | } | ||
2449 | return len; | ||
2450 | } | ||
2451 | |||
2452 | static bool pp_sets(struct proc_print_info *ppi) | ||
2453 | { | ||
2454 | int len; | ||
2455 | int counter_set; | ||
2456 | for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS; | ||
2457 | counter_set++) { | ||
2458 | len = pp_stats_line(ppi, counter_set); | ||
2459 | if (len >= ppi->char_count) { | ||
2460 | *ppi->outp = '\0'; | ||
2461 | return false; | ||
2462 | } | ||
2463 | if (len) { | ||
2464 | ppi->outp += len; | ||
2465 | ppi->char_count -= len; | ||
2466 | (*ppi->num_items_returned)++; | ||
2467 | } | ||
2468 | } | ||
2469 | return true; | ||
2470 | } | ||
2471 | |||
2472 | /* | ||
2473 | * Procfs reader to get all tag stats using style "1)" as described in | ||
2474 | * fs/proc/generic.c | ||
2475 | * Groups all protocols tx/rx bytes. | ||
2476 | */ | ||
2477 | static int qtaguid_stats_proc_read(char *page, char **num_items_returned, | ||
2478 | off_t items_to_skip, int char_count, int *eof, | ||
2479 | void *data) | ||
2480 | { | ||
2481 | struct proc_print_info ppi; | ||
2482 | int len; | ||
2483 | |||
2484 | ppi.outp = page; | ||
2485 | ppi.item_index = 0; | ||
2486 | ppi.char_count = char_count; | ||
2487 | ppi.num_items_returned = num_items_returned; | ||
2488 | ppi.items_to_skip = items_to_skip; | ||
2489 | |||
2490 | if (unlikely(module_passive)) { | ||
2491 | len = pp_stats_line(&ppi, 0); | ||
2492 | /* The header should always be shorter than the buffer. */ | ||
2493 | BUG_ON(len >= ppi.char_count); | ||
2494 | (*num_items_returned)++; | ||
2495 | *eof = 1; | ||
2496 | return len; | ||
2497 | } | ||
2498 | |||
2499 | CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld " | ||
2500 | "char_count=%d *eof=%d\n", page, *num_items_returned, | ||
2501 | items_to_skip, char_count, *eof); | ||
2502 | |||
2503 | if (*eof) | ||
2504 | return 0; | ||
2505 | |||
2506 | /* The idx is there to help debug when things go belly up. */ | ||
2507 | len = pp_stats_line(&ppi, 0); | ||
2508 | /* Don't advance the outp unless the whole line was printed */ | ||
2509 | if (len >= ppi.char_count) { | ||
2510 | *ppi.outp = '\0'; | ||
2511 | return ppi.outp - page; | ||
2512 | } | ||
2513 | if (len) { | ||
2514 | ppi.outp += len; | ||
2515 | ppi.char_count -= len; | ||
2516 | (*num_items_returned)++; | ||
2517 | } | ||
2518 | |||
2519 | spin_lock_bh(&iface_stat_list_lock); | ||
2520 | list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) { | ||
2521 | struct rb_node *node; | ||
2522 | spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock); | ||
2523 | for (node = rb_first(&ppi.iface_entry->tag_stat_tree); | ||
2524 | node; | ||
2525 | node = rb_next(node)) { | ||
2526 | ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node); | ||
2527 | if (!pp_sets(&ppi)) { | ||
2528 | spin_unlock_bh( | ||
2529 | &ppi.iface_entry->tag_stat_list_lock); | ||
2530 | spin_unlock_bh(&iface_stat_list_lock); | ||
2531 | return ppi.outp - page; | ||
2532 | } | ||
2533 | } | ||
2534 | spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock); | ||
2535 | } | ||
2536 | spin_unlock_bh(&iface_stat_list_lock); | ||
2537 | |||
2538 | *eof = 1; | ||
2539 | return ppi.outp - page; | ||
2540 | } | ||
2541 | |||
2542 | /*------------------------------------------*/ | ||
2543 | static int qtudev_open(struct inode *inode, struct file *file) | ||
2544 | { | ||
2545 | struct uid_tag_data *utd_entry; | ||
2546 | struct proc_qtu_data *pqd_entry; | ||
2547 | struct proc_qtu_data *new_pqd_entry; | ||
2548 | int res; | ||
2549 | bool utd_entry_found; | ||
2550 | |||
2551 | if (unlikely(qtu_proc_handling_passive)) | ||
2552 | return 0; | ||
2553 | |||
2554 | DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n", | ||
2555 | current->pid, current->tgid, current_fsuid()); | ||
2556 | |||
2557 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
2558 | |||
2559 | /* Look for existing uid data, or alloc one. */ | ||
2560 | utd_entry = get_uid_data(current_fsuid(), &utd_entry_found); | ||
2561 | if (IS_ERR_OR_NULL(utd_entry)) { | ||
2562 | res = PTR_ERR(utd_entry); | ||
2563 | goto err; | ||
2564 | } | ||
2565 | |||
2566 | /* Look for existing PID based proc_data */ | ||
2567 | pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree, | ||
2568 | current->tgid); | ||
2569 | if (pqd_entry) { | ||
2570 | pr_err("qtaguid: qtudev_open(): %u/%u %u " | ||
2571 | "%s already opened\n", | ||
2572 | current->pid, current->tgid, current_fsuid(), | ||
2573 | QTU_DEV_NAME); | ||
2574 | res = -EBUSY; | ||
2575 | goto err_unlock_free_utd; | ||
2576 | } | ||
2577 | |||
2578 | new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC); | ||
2579 | if (!new_pqd_entry) { | ||
2580 | pr_err("qtaguid: qtudev_open(): %u/%u %u: " | ||
2581 | "proc data alloc failed\n", | ||
2582 | current->pid, current->tgid, current_fsuid()); | ||
2583 | res = -ENOMEM; | ||
2584 | goto err_unlock_free_utd; | ||
2585 | } | ||
2586 | new_pqd_entry->pid = current->tgid; | ||
2587 | INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list); | ||
2588 | new_pqd_entry->parent_tag_data = utd_entry; | ||
2589 | utd_entry->num_pqd++; | ||
2590 | |||
2591 | proc_qtu_data_tree_insert(new_pqd_entry, | ||
2592 | &proc_qtu_data_tree); | ||
2593 | |||
2594 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
2595 | DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n", | ||
2596 | current_fsuid(), new_pqd_entry); | ||
2597 | file->private_data = new_pqd_entry; | ||
2598 | return 0; | ||
2599 | |||
2600 | err_unlock_free_utd: | ||
2601 | if (!utd_entry_found) { | ||
2602 | rb_erase(&utd_entry->node, &uid_tag_data_tree); | ||
2603 | kfree(utd_entry); | ||
2604 | } | ||
2605 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
2606 | err: | ||
2607 | return res; | ||
2608 | } | ||
2609 | |||
2610 | static int qtudev_release(struct inode *inode, struct file *file) | ||
2611 | { | ||
2612 | struct proc_qtu_data *pqd_entry = file->private_data; | ||
2613 | struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data; | ||
2614 | struct sock_tag *st_entry; | ||
2615 | struct rb_root st_to_free_tree = RB_ROOT; | ||
2616 | struct list_head *entry, *next; | ||
2617 | struct tag_ref *tr; | ||
2618 | |||
2619 | if (unlikely(qtu_proc_handling_passive)) | ||
2620 | return 0; | ||
2621 | |||
2622 | /* | ||
2623 | * Do not trust the current->pid, it might just be a kworker cleaning | ||
2624 | * up after a dead proc. | ||
2625 | */ | ||
2626 | DR_DEBUG("qtaguid: qtudev_release(): " | ||
2627 | "pid=%u tgid=%u uid=%u " | ||
2628 | "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n", | ||
2629 | current->pid, current->tgid, pqd_entry->parent_tag_data->uid, | ||
2630 | pqd_entry, pqd_entry->pid, utd_entry, | ||
2631 | utd_entry->num_active_tags); | ||
2632 | |||
2633 | spin_lock_bh(&sock_tag_list_lock); | ||
2634 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
2635 | |||
2636 | list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) { | ||
2637 | st_entry = list_entry(entry, struct sock_tag, list); | ||
2638 | DR_DEBUG("qtaguid: %s(): " | ||
2639 | "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n", | ||
2640 | __func__, | ||
2641 | st_entry, st_entry->sk, | ||
2642 | current->pid, current->tgid, | ||
2643 | pqd_entry->parent_tag_data->uid); | ||
2644 | |||
2645 | utd_entry = uid_tag_data_tree_search( | ||
2646 | &uid_tag_data_tree, | ||
2647 | get_uid_from_tag(st_entry->tag)); | ||
2648 | BUG_ON(IS_ERR_OR_NULL(utd_entry)); | ||
2649 | DR_DEBUG("qtaguid: %s(): " | ||
2650 | "looking for tag=0x%llx in utd_entry=%p\n", __func__, | ||
2651 | st_entry->tag, utd_entry); | ||
2652 | tr = tag_ref_tree_search(&utd_entry->tag_ref_tree, | ||
2653 | st_entry->tag); | ||
2654 | BUG_ON(!tr); | ||
2655 | BUG_ON(tr->num_sock_tags <= 0); | ||
2656 | tr->num_sock_tags--; | ||
2657 | free_tag_ref_from_utd_entry(tr, utd_entry); | ||
2658 | |||
2659 | rb_erase(&st_entry->sock_node, &sock_tag_tree); | ||
2660 | list_del(&st_entry->list); | ||
2661 | /* Can't sockfd_put() within spinlock, do it later. */ | ||
2662 | sock_tag_tree_insert(st_entry, &st_to_free_tree); | ||
2663 | |||
2664 | /* | ||
2665 | * Try to free the utd_entry if no other proc_qtu_data is | ||
2666 | * using it (num_pqd is 0) and it doesn't have active tags | ||
2667 | * (num_active_tags is 0). | ||
2668 | */ | ||
2669 | put_utd_entry(utd_entry); | ||
2670 | } | ||
2671 | |||
2672 | rb_erase(&pqd_entry->node, &proc_qtu_data_tree); | ||
2673 | BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1); | ||
2674 | pqd_entry->parent_tag_data->num_pqd--; | ||
2675 | put_utd_entry(pqd_entry->parent_tag_data); | ||
2676 | kfree(pqd_entry); | ||
2677 | file->private_data = NULL; | ||
2678 | |||
2679 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
2680 | spin_unlock_bh(&sock_tag_list_lock); | ||
2681 | |||
2682 | |||
2683 | sock_tag_tree_erase(&st_to_free_tree); | ||
2684 | |||
2685 | prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__, | ||
2686 | current->pid, current->tgid); | ||
2687 | return 0; | ||
2688 | } | ||
2689 | |||
2690 | /*------------------------------------------*/ | ||
2691 | static const struct file_operations qtudev_fops = { | ||
2692 | .owner = THIS_MODULE, | ||
2693 | .open = qtudev_open, | ||
2694 | .release = qtudev_release, | ||
2695 | }; | ||
2696 | |||
2697 | static struct miscdevice qtu_device = { | ||
2698 | .minor = MISC_DYNAMIC_MINOR, | ||
2699 | .name = QTU_DEV_NAME, | ||
2700 | .fops = &qtudev_fops, | ||
2701 | /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */ | ||
2702 | }; | ||
2703 | |||
2704 | /*------------------------------------------*/ | ||
2705 | static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir) | ||
2706 | { | ||
2707 | int ret; | ||
2708 | *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net); | ||
2709 | if (!*res_procdir) { | ||
2710 | pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n"); | ||
2711 | ret = -ENOMEM; | ||
2712 | goto no_dir; | ||
2713 | } | ||
2714 | |||
2715 | xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms, | ||
2716 | *res_procdir); | ||
2717 | if (!xt_qtaguid_ctrl_file) { | ||
2718 | pr_err("qtaguid: failed to create xt_qtaguid/ctrl " | ||
2719 | " file\n"); | ||
2720 | ret = -ENOMEM; | ||
2721 | goto no_ctrl_entry; | ||
2722 | } | ||
2723 | xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read; | ||
2724 | xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write; | ||
2725 | |||
2726 | xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms, | ||
2727 | *res_procdir); | ||
2728 | if (!xt_qtaguid_stats_file) { | ||
2729 | pr_err("qtaguid: failed to create xt_qtaguid/stats " | ||
2730 | "file\n"); | ||
2731 | ret = -ENOMEM; | ||
2732 | goto no_stats_entry; | ||
2733 | } | ||
2734 | xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read; | ||
2735 | /* | ||
2736 | * TODO: add support counter hacking | ||
2737 | * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write; | ||
2738 | */ | ||
2739 | return 0; | ||
2740 | |||
2741 | no_stats_entry: | ||
2742 | remove_proc_entry("ctrl", *res_procdir); | ||
2743 | no_ctrl_entry: | ||
2744 | remove_proc_entry("xt_qtaguid", NULL); | ||
2745 | no_dir: | ||
2746 | return ret; | ||
2747 | } | ||
2748 | |||
2749 | static struct xt_match qtaguid_mt_reg __read_mostly = { | ||
2750 | /* | ||
2751 | * This module masquerades as the "owner" module so that iptables | ||
2752 | * tools can deal with it. | ||
2753 | */ | ||
2754 | .name = "owner", | ||
2755 | .revision = 1, | ||
2756 | .family = NFPROTO_UNSPEC, | ||
2757 | .match = qtaguid_mt, | ||
2758 | .matchsize = sizeof(struct xt_qtaguid_match_info), | ||
2759 | .me = THIS_MODULE, | ||
2760 | }; | ||
2761 | |||
2762 | static int __init qtaguid_mt_init(void) | ||
2763 | { | ||
2764 | if (qtaguid_proc_register(&xt_qtaguid_procdir) | ||
2765 | || iface_stat_init(xt_qtaguid_procdir) | ||
2766 | || xt_register_match(&qtaguid_mt_reg) | ||
2767 | || misc_register(&qtu_device)) | ||
2768 | return -1; | ||
2769 | return 0; | ||
2770 | } | ||
2771 | |||
2772 | /* | ||
2773 | * TODO: allow unloading of the module. | ||
2774 | * For now stats are permanent. | ||
2775 | * Kconfig forces'y/n' and never an 'm'. | ||
2776 | */ | ||
2777 | |||
2778 | module_init(qtaguid_mt_init); | ||
2779 | MODULE_AUTHOR("jpa <jpa@google.com>"); | ||
2780 | MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats"); | ||
2781 | MODULE_LICENSE("GPL"); | ||
2782 | MODULE_ALIAS("ipt_owner"); | ||
2783 | MODULE_ALIAS("ip6t_owner"); | ||
2784 | MODULE_ALIAS("ipt_qtaguid"); | ||
2785 | MODULE_ALIAS("ip6t_qtaguid"); | ||
diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h new file mode 100644 index 00000000000..02479d6d317 --- /dev/null +++ b/net/netfilter/xt_qtaguid_internal.h | |||
@@ -0,0 +1,330 @@ | |||
1 | /* | ||
2 | * Kernel iptables module to track stats for packets based on user tags. | ||
3 | * | ||
4 | * (C) 2011 Google, Inc | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | #ifndef __XT_QTAGUID_INTERNAL_H__ | ||
11 | #define __XT_QTAGUID_INTERNAL_H__ | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/rbtree.h> | ||
15 | #include <linux/spinlock_types.h> | ||
16 | #include <linux/workqueue.h> | ||
17 | |||
18 | /* Iface handling */ | ||
19 | #define IDEBUG_MASK (1<<0) | ||
20 | /* Iptable Matching. Per packet. */ | ||
21 | #define MDEBUG_MASK (1<<1) | ||
22 | /* Red-black tree handling. Per packet. */ | ||
23 | #define RDEBUG_MASK (1<<2) | ||
24 | /* procfs ctrl/stats handling */ | ||
25 | #define CDEBUG_MASK (1<<3) | ||
26 | /* dev and resource tracking */ | ||
27 | #define DDEBUG_MASK (1<<4) | ||
28 | |||
29 | /* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */ | ||
30 | #define DEFAULT_DEBUG_MASK 0 | ||
31 | |||
32 | /* | ||
33 | * (Un)Define these *DEBUG to compile out/in the pr_debug calls. | ||
34 | * All undef: text size ~ 0x3030; all def: ~ 0x4404. | ||
35 | */ | ||
36 | #define IDEBUG | ||
37 | #define MDEBUG | ||
38 | #define RDEBUG | ||
39 | #define CDEBUG | ||
40 | #define DDEBUG | ||
41 | |||
42 | #define MSK_DEBUG(mask, ...) do { \ | ||
43 | if (unlikely(qtaguid_debug_mask & (mask))) \ | ||
44 | pr_debug(__VA_ARGS__); \ | ||
45 | } while (0) | ||
46 | #ifdef IDEBUG | ||
47 | #define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__) | ||
48 | #else | ||
49 | #define IF_DEBUG(...) no_printk(__VA_ARGS__) | ||
50 | #endif | ||
51 | #ifdef MDEBUG | ||
52 | #define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__) | ||
53 | #else | ||
54 | #define MT_DEBUG(...) no_printk(__VA_ARGS__) | ||
55 | #endif | ||
56 | #ifdef RDEBUG | ||
57 | #define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__) | ||
58 | #else | ||
59 | #define RB_DEBUG(...) no_printk(__VA_ARGS__) | ||
60 | #endif | ||
61 | #ifdef CDEBUG | ||
62 | #define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__) | ||
63 | #else | ||
64 | #define CT_DEBUG(...) no_printk(__VA_ARGS__) | ||
65 | #endif | ||
66 | #ifdef DDEBUG | ||
67 | #define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__) | ||
68 | #else | ||
69 | #define DR_DEBUG(...) no_printk(__VA_ARGS__) | ||
70 | #endif | ||
71 | |||
72 | extern uint qtaguid_debug_mask; | ||
73 | |||
74 | /*---------------------------------------------------------------------------*/ | ||
75 | /* | ||
76 | * Tags: | ||
77 | * | ||
78 | * They represent what the data usage counters will be tracked against. | ||
79 | * By default a tag is just based on the UID. | ||
80 | * The UID is used as the base for policing, and can not be ignored. | ||
81 | * So a tag will always at least represent a UID (uid_tag). | ||
82 | * | ||
83 | * A tag can be augmented with an "accounting tag" which is associated | ||
84 | * with a UID. | ||
85 | * User space can set the acct_tag portion of the tag which is then used | ||
86 | * with sockets: all data belonging to that socket will be counted against the | ||
87 | * tag. The policing is then based on the tag's uid_tag portion, | ||
88 | * and stats are collected for the acct_tag portion separately. | ||
89 | * | ||
90 | * There could be | ||
91 | * a: {acct_tag=1, uid_tag=10003} | ||
92 | * b: {acct_tag=2, uid_tag=10003} | ||
93 | * c: {acct_tag=3, uid_tag=10003} | ||
94 | * d: {acct_tag=0, uid_tag=10003} | ||
95 | * a, b, and c represent tags associated with specific sockets. | ||
96 | * d is for the totals for that uid, including all untagged traffic. | ||
97 | * Typically d is used with policing/quota rules. | ||
98 | * | ||
99 | * We want tag_t big enough to distinguish uid_t and acct_tag. | ||
100 | * It might become a struct if needed. | ||
101 | * Nothing should be using it as an int. | ||
102 | */ | ||
103 | typedef uint64_t tag_t; /* Only used via accessors */ | ||
104 | |||
105 | #define TAG_UID_MASK 0xFFFFFFFFULL | ||
106 | #define TAG_ACCT_MASK (~0xFFFFFFFFULL) | ||
107 | |||
108 | static inline int tag_compare(tag_t t1, tag_t t2) | ||
109 | { | ||
110 | return t1 < t2 ? -1 : t1 == t2 ? 0 : 1; | ||
111 | } | ||
112 | |||
113 | static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid) | ||
114 | { | ||
115 | return acct_tag | uid; | ||
116 | } | ||
117 | static inline tag_t make_tag_from_uid(uid_t uid) | ||
118 | { | ||
119 | return uid; | ||
120 | } | ||
121 | static inline uid_t get_uid_from_tag(tag_t tag) | ||
122 | { | ||
123 | return tag & TAG_UID_MASK; | ||
124 | } | ||
125 | static inline tag_t get_utag_from_tag(tag_t tag) | ||
126 | { | ||
127 | return tag & TAG_UID_MASK; | ||
128 | } | ||
129 | static inline tag_t get_atag_from_tag(tag_t tag) | ||
130 | { | ||
131 | return tag & TAG_ACCT_MASK; | ||
132 | } | ||
133 | |||
134 | static inline bool valid_atag(tag_t tag) | ||
135 | { | ||
136 | return !(tag & TAG_UID_MASK); | ||
137 | } | ||
138 | static inline tag_t make_atag_from_value(uint32_t value) | ||
139 | { | ||
140 | return (uint64_t)value << 32; | ||
141 | } | ||
142 | /*---------------------------------------------------------------------------*/ | ||
143 | |||
144 | /* | ||
145 | * Maximum number of socket tags that a UID is allowed to have active. | ||
146 | * Multiple processes belonging to the same UID contribute towards this limit. | ||
147 | * Special UIDs that can impersonate a UID also contribute (e.g. download | ||
148 | * manager, ...) | ||
149 | */ | ||
150 | #define DEFAULT_MAX_SOCK_TAGS 1024 | ||
151 | |||
152 | /* | ||
153 | * For now we only track 2 sets of counters. | ||
154 | * The default set is 0. | ||
155 | * Userspace can activate another set for a given uid being tracked. | ||
156 | */ | ||
157 | #define IFS_MAX_COUNTER_SETS 2 | ||
158 | |||
159 | enum ifs_tx_rx { | ||
160 | IFS_TX, | ||
161 | IFS_RX, | ||
162 | IFS_MAX_DIRECTIONS | ||
163 | }; | ||
164 | |||
165 | /* For now, TCP, UDP, the rest */ | ||
166 | enum ifs_proto { | ||
167 | IFS_TCP, | ||
168 | IFS_UDP, | ||
169 | IFS_PROTO_OTHER, | ||
170 | IFS_MAX_PROTOS | ||
171 | }; | ||
172 | |||
173 | struct byte_packet_counters { | ||
174 | uint64_t bytes; | ||
175 | uint64_t packets; | ||
176 | }; | ||
177 | |||
178 | struct data_counters { | ||
179 | struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; | ||
180 | }; | ||
181 | |||
182 | /* Generic X based nodes used as a base for rb_tree ops */ | ||
183 | struct tag_node { | ||
184 | struct rb_node node; | ||
185 | tag_t tag; | ||
186 | }; | ||
187 | |||
188 | struct tag_stat { | ||
189 | struct tag_node tn; | ||
190 | struct data_counters counters; | ||
191 | /* | ||
192 | * If this tag is acct_tag based, we need to count against the | ||
193 | * matching parent uid_tag. | ||
194 | */ | ||
195 | struct data_counters *parent_counters; | ||
196 | }; | ||
197 | |||
198 | struct iface_stat { | ||
199 | struct list_head list; /* in iface_stat_list */ | ||
200 | char *ifname; | ||
201 | bool active; | ||
202 | /* net_dev is only valid for active iface_stat */ | ||
203 | struct net_device *net_dev; | ||
204 | |||
205 | struct byte_packet_counters totals[IFS_MAX_DIRECTIONS]; | ||
206 | /* | ||
207 | * We keep the last_known, because some devices reset their counters | ||
208 | * just before NETDEV_UP, while some will reset just before | ||
209 | * NETDEV_REGISTER (which is more normal). | ||
210 | * So now, if the device didn't do a NETDEV_UNREGISTER and we see | ||
211 | * its current dev stats smaller that what was previously known, we | ||
212 | * assume an UNREGISTER and just use the last_known. | ||
213 | */ | ||
214 | struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS]; | ||
215 | /* last_known is usable when last_known_valid is true */ | ||
216 | bool last_known_valid; | ||
217 | |||
218 | struct proc_dir_entry *proc_ptr; | ||
219 | |||
220 | struct rb_root tag_stat_tree; | ||
221 | spinlock_t tag_stat_list_lock; | ||
222 | }; | ||
223 | |||
224 | /* This is needed to create proc_dir_entries from atomic context. */ | ||
225 | struct iface_stat_work { | ||
226 | struct work_struct iface_work; | ||
227 | struct iface_stat *iface_entry; | ||
228 | }; | ||
229 | |||
230 | /* | ||
231 | * Track tag that this socket is transferring data for, and not necessarily | ||
232 | * the uid that owns the socket. | ||
233 | * This is the tag against which tag_stat.counters will be billed. | ||
234 | * These structs need to be looked up by sock and pid. | ||
235 | */ | ||
236 | struct sock_tag { | ||
237 | struct rb_node sock_node; | ||
238 | struct sock *sk; /* Only used as a number, never dereferenced */ | ||
239 | /* The socket is needed for sockfd_put() */ | ||
240 | struct socket *socket; | ||
241 | /* Used to associate with a given pid */ | ||
242 | struct list_head list; /* in proc_qtu_data.sock_tag_list */ | ||
243 | pid_t pid; | ||
244 | |||
245 | tag_t tag; | ||
246 | }; | ||
247 | |||
248 | struct qtaguid_event_counts { | ||
249 | /* Various successful events */ | ||
250 | atomic64_t sockets_tagged; | ||
251 | atomic64_t sockets_untagged; | ||
252 | atomic64_t counter_set_changes; | ||
253 | atomic64_t delete_cmds; | ||
254 | atomic64_t iface_events; /* Number of NETDEV_* events handled */ | ||
255 | |||
256 | atomic64_t match_calls; /* Number of times iptables called mt */ | ||
257 | /* | ||
258 | * match_found_sk_*: numbers related to the netfilter matching | ||
259 | * function finding a sock for the sk_buff. | ||
260 | * Total skbs processed is sum(match_found*). | ||
261 | */ | ||
262 | atomic64_t match_found_sk; /* An sk was already in the sk_buff. */ | ||
263 | /* The connection tracker had or didn't have the sk. */ | ||
264 | atomic64_t match_found_sk_in_ct; | ||
265 | atomic64_t match_found_no_sk_in_ct; | ||
266 | /* | ||
267 | * No sk could be found. No apparent owner. Could happen with | ||
268 | * unsolicited traffic. | ||
269 | */ | ||
270 | atomic64_t match_no_sk; | ||
271 | /* | ||
272 | * The file ptr in the sk_socket wasn't there. | ||
273 | * This might happen for traffic while the socket is being closed. | ||
274 | */ | ||
275 | atomic64_t match_no_sk_file; | ||
276 | }; | ||
277 | |||
278 | /* Track the set active_set for the given tag. */ | ||
279 | struct tag_counter_set { | ||
280 | struct tag_node tn; | ||
281 | int active_set; | ||
282 | }; | ||
283 | |||
284 | /*----------------------------------------------*/ | ||
285 | /* | ||
286 | * The qtu uid data is used to track resources that are created directly or | ||
287 | * indirectly by processes (uid tracked). | ||
288 | * It is shared by the processes with the same uid. | ||
289 | * Some of the resource will be counted to prevent further rogue allocations, | ||
290 | * some will need freeing once the owner process (uid) exits. | ||
291 | */ | ||
292 | struct uid_tag_data { | ||
293 | struct rb_node node; | ||
294 | uid_t uid; | ||
295 | |||
296 | /* | ||
297 | * For the uid, how many accounting tags have been set. | ||
298 | */ | ||
299 | int num_active_tags; | ||
300 | /* Track the number of proc_qtu_data that reference it */ | ||
301 | int num_pqd; | ||
302 | struct rb_root tag_ref_tree; | ||
303 | /* No tag_node_tree_lock; use uid_tag_data_tree_lock */ | ||
304 | }; | ||
305 | |||
306 | struct tag_ref { | ||
307 | struct tag_node tn; | ||
308 | |||
309 | /* | ||
310 | * This tracks the number of active sockets that have a tag on them | ||
311 | * which matches this tag_ref.tn.tag. | ||
312 | * A tag ref can live on after the sockets are untagged. | ||
313 | * A tag ref can only be removed during a tag delete command. | ||
314 | */ | ||
315 | int num_sock_tags; | ||
316 | }; | ||
317 | |||
318 | struct proc_qtu_data { | ||
319 | struct rb_node node; | ||
320 | pid_t pid; | ||
321 | |||
322 | struct uid_tag_data *parent_tag_data; | ||
323 | |||
324 | /* Tracks the sock_tags that need freeing upon this proc's death */ | ||
325 | struct list_head sock_tag_list; | ||
326 | /* No spinlock_t sock_tag_list_lock; use the global one. */ | ||
327 | }; | ||
328 | |||
329 | /*----------------------------------------------*/ | ||
330 | #endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */ | ||
diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c new file mode 100644 index 00000000000..39176785c91 --- /dev/null +++ b/net/netfilter/xt_qtaguid_print.c | |||
@@ -0,0 +1,556 @@ | |||
1 | /* | ||
2 | * Pretty printing Support for iptables xt_qtaguid module. | ||
3 | * | ||
4 | * (C) 2011 Google, Inc | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | /* | ||
12 | * Most of the functions in this file just waste time if DEBUG is not defined. | ||
13 | * The matching xt_qtaguid_print.h will static inline empty funcs if the needed | ||
14 | * debug flags ore not defined. | ||
15 | * Those funcs that fail to allocate memory will panic as there is no need to | ||
16 | * hobble allong just pretending to do the requested work. | ||
17 | */ | ||
18 | |||
19 | #define DEBUG | ||
20 | |||
21 | #include <linux/fs.h> | ||
22 | #include <linux/gfp.h> | ||
23 | #include <linux/net.h> | ||
24 | #include <linux/rbtree.h> | ||
25 | #include <linux/slab.h> | ||
26 | #include <linux/spinlock_types.h> | ||
27 | |||
28 | |||
29 | #include "xt_qtaguid_internal.h" | ||
30 | #include "xt_qtaguid_print.h" | ||
31 | |||
32 | #ifdef DDEBUG | ||
33 | |||
34 | static void _bug_on_err_or_null(void *ptr) | ||
35 | { | ||
36 | if (IS_ERR_OR_NULL(ptr)) { | ||
37 | pr_err("qtaguid: kmalloc failed\n"); | ||
38 | BUG(); | ||
39 | } | ||
40 | } | ||
41 | |||
42 | char *pp_tag_t(tag_t *tag) | ||
43 | { | ||
44 | char *res; | ||
45 | |||
46 | if (!tag) | ||
47 | res = kasprintf(GFP_ATOMIC, "tag_t@null{}"); | ||
48 | else | ||
49 | res = kasprintf(GFP_ATOMIC, | ||
50 | "tag_t@%p{tag=0x%llx, uid=%u}", | ||
51 | tag, *tag, get_uid_from_tag(*tag)); | ||
52 | _bug_on_err_or_null(res); | ||
53 | return res; | ||
54 | } | ||
55 | |||
56 | char *pp_data_counters(struct data_counters *dc, bool showValues) | ||
57 | { | ||
58 | char *res; | ||
59 | |||
60 | if (!dc) | ||
61 | res = kasprintf(GFP_ATOMIC, "data_counters@null{}"); | ||
62 | else if (showValues) | ||
63 | res = kasprintf( | ||
64 | GFP_ATOMIC, "data_counters@%p{" | ||
65 | "set0{" | ||
66 | "rx{" | ||
67 | "tcp{b=%llu, p=%llu}, " | ||
68 | "udp{b=%llu, p=%llu}," | ||
69 | "other{b=%llu, p=%llu}}, " | ||
70 | "tx{" | ||
71 | "tcp{b=%llu, p=%llu}, " | ||
72 | "udp{b=%llu, p=%llu}," | ||
73 | "other{b=%llu, p=%llu}}}, " | ||
74 | "set1{" | ||
75 | "rx{" | ||
76 | "tcp{b=%llu, p=%llu}, " | ||
77 | "udp{b=%llu, p=%llu}," | ||
78 | "other{b=%llu, p=%llu}}, " | ||
79 | "tx{" | ||
80 | "tcp{b=%llu, p=%llu}, " | ||
81 | "udp{b=%llu, p=%llu}," | ||
82 | "other{b=%llu, p=%llu}}}}", | ||
83 | dc, | ||
84 | dc->bpc[0][IFS_RX][IFS_TCP].bytes, | ||
85 | dc->bpc[0][IFS_RX][IFS_TCP].packets, | ||
86 | dc->bpc[0][IFS_RX][IFS_UDP].bytes, | ||
87 | dc->bpc[0][IFS_RX][IFS_UDP].packets, | ||
88 | dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes, | ||
89 | dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets, | ||
90 | dc->bpc[0][IFS_TX][IFS_TCP].bytes, | ||
91 | dc->bpc[0][IFS_TX][IFS_TCP].packets, | ||
92 | dc->bpc[0][IFS_TX][IFS_UDP].bytes, | ||
93 | dc->bpc[0][IFS_TX][IFS_UDP].packets, | ||
94 | dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes, | ||
95 | dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets, | ||
96 | dc->bpc[1][IFS_RX][IFS_TCP].bytes, | ||
97 | dc->bpc[1][IFS_RX][IFS_TCP].packets, | ||
98 | dc->bpc[1][IFS_RX][IFS_UDP].bytes, | ||
99 | dc->bpc[1][IFS_RX][IFS_UDP].packets, | ||
100 | dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes, | ||
101 | dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets, | ||
102 | dc->bpc[1][IFS_TX][IFS_TCP].bytes, | ||
103 | dc->bpc[1][IFS_TX][IFS_TCP].packets, | ||
104 | dc->bpc[1][IFS_TX][IFS_UDP].bytes, | ||
105 | dc->bpc[1][IFS_TX][IFS_UDP].packets, | ||
106 | dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes, | ||
107 | dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets); | ||
108 | else | ||
109 | res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc); | ||
110 | _bug_on_err_or_null(res); | ||
111 | return res; | ||
112 | } | ||
113 | |||
114 | char *pp_tag_node(struct tag_node *tn) | ||
115 | { | ||
116 | char *tag_str; | ||
117 | char *res; | ||
118 | |||
119 | if (!tn) { | ||
120 | res = kasprintf(GFP_ATOMIC, "tag_node@null{}"); | ||
121 | _bug_on_err_or_null(res); | ||
122 | return res; | ||
123 | } | ||
124 | tag_str = pp_tag_t(&tn->tag); | ||
125 | res = kasprintf(GFP_ATOMIC, | ||
126 | "tag_node@%p{tag=%s}", | ||
127 | tn, tag_str); | ||
128 | _bug_on_err_or_null(res); | ||
129 | kfree(tag_str); | ||
130 | return res; | ||
131 | } | ||
132 | |||
133 | char *pp_tag_ref(struct tag_ref *tr) | ||
134 | { | ||
135 | char *tn_str; | ||
136 | char *res; | ||
137 | |||
138 | if (!tr) { | ||
139 | res = kasprintf(GFP_ATOMIC, "tag_ref@null{}"); | ||
140 | _bug_on_err_or_null(res); | ||
141 | return res; | ||
142 | } | ||
143 | tn_str = pp_tag_node(&tr->tn); | ||
144 | res = kasprintf(GFP_ATOMIC, | ||
145 | "tag_ref@%p{%s, num_sock_tags=%d}", | ||
146 | tr, tn_str, tr->num_sock_tags); | ||
147 | _bug_on_err_or_null(res); | ||
148 | kfree(tn_str); | ||
149 | return res; | ||
150 | } | ||
151 | |||
152 | char *pp_tag_stat(struct tag_stat *ts) | ||
153 | { | ||
154 | char *tn_str; | ||
155 | char *counters_str; | ||
156 | char *parent_counters_str; | ||
157 | char *res; | ||
158 | |||
159 | if (!ts) { | ||
160 | res = kasprintf(GFP_ATOMIC, "tag_stat@null{}"); | ||
161 | _bug_on_err_or_null(res); | ||
162 | return res; | ||
163 | } | ||
164 | tn_str = pp_tag_node(&ts->tn); | ||
165 | counters_str = pp_data_counters(&ts->counters, true); | ||
166 | parent_counters_str = pp_data_counters(ts->parent_counters, false); | ||
167 | res = kasprintf(GFP_ATOMIC, | ||
168 | "tag_stat@%p{%s, counters=%s, parent_counters=%s}", | ||
169 | ts, tn_str, counters_str, parent_counters_str); | ||
170 | _bug_on_err_or_null(res); | ||
171 | kfree(tn_str); | ||
172 | kfree(counters_str); | ||
173 | kfree(parent_counters_str); | ||
174 | return res; | ||
175 | } | ||
176 | |||
177 | char *pp_iface_stat(struct iface_stat *is) | ||
178 | { | ||
179 | char *res; | ||
180 | if (!is) | ||
181 | res = kasprintf(GFP_ATOMIC, "iface_stat@null{}"); | ||
182 | else | ||
183 | res = kasprintf(GFP_ATOMIC, "iface_stat@%p{" | ||
184 | "list=list_head{...}, " | ||
185 | "ifname=%s, " | ||
186 | "total={rx={bytes=%llu, " | ||
187 | "packets=%llu}, " | ||
188 | "tx={bytes=%llu, " | ||
189 | "packets=%llu}}, " | ||
190 | "last_known_valid=%d, " | ||
191 | "last_known={rx={bytes=%llu, " | ||
192 | "packets=%llu}, " | ||
193 | "tx={bytes=%llu, " | ||
194 | "packets=%llu}}, " | ||
195 | "active=%d, " | ||
196 | "net_dev=%p, " | ||
197 | "proc_ptr=%p, " | ||
198 | "tag_stat_tree=rb_root{...}}", | ||
199 | is, | ||
200 | is->ifname, | ||
201 | is->totals[IFS_RX].bytes, | ||
202 | is->totals[IFS_RX].packets, | ||
203 | is->totals[IFS_TX].bytes, | ||
204 | is->totals[IFS_TX].packets, | ||
205 | is->last_known_valid, | ||
206 | is->last_known[IFS_RX].bytes, | ||
207 | is->last_known[IFS_RX].packets, | ||
208 | is->last_known[IFS_TX].bytes, | ||
209 | is->last_known[IFS_TX].packets, | ||
210 | is->active, | ||
211 | is->net_dev, | ||
212 | is->proc_ptr); | ||
213 | _bug_on_err_or_null(res); | ||
214 | return res; | ||
215 | } | ||
216 | |||
217 | char *pp_sock_tag(struct sock_tag *st) | ||
218 | { | ||
219 | char *tag_str; | ||
220 | char *res; | ||
221 | |||
222 | if (!st) { | ||
223 | res = kasprintf(GFP_ATOMIC, "sock_tag@null{}"); | ||
224 | _bug_on_err_or_null(res); | ||
225 | return res; | ||
226 | } | ||
227 | tag_str = pp_tag_t(&st->tag); | ||
228 | res = kasprintf(GFP_ATOMIC, "sock_tag@%p{" | ||
229 | "sock_node=rb_node{...}, " | ||
230 | "sk=%p socket=%p (f_count=%lu), list=list_head{...}, " | ||
231 | "pid=%u, tag=%s}", | ||
232 | st, st->sk, st->socket, atomic_long_read( | ||
233 | &st->socket->file->f_count), | ||
234 | st->pid, tag_str); | ||
235 | _bug_on_err_or_null(res); | ||
236 | kfree(tag_str); | ||
237 | return res; | ||
238 | } | ||
239 | |||
240 | char *pp_uid_tag_data(struct uid_tag_data *utd) | ||
241 | { | ||
242 | char *res; | ||
243 | |||
244 | if (!utd) | ||
245 | res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}"); | ||
246 | else | ||
247 | res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{" | ||
248 | "uid=%u, num_active_acct_tags=%d, " | ||
249 | "num_pqd=%d, " | ||
250 | "tag_node_tree=rb_root{...}, " | ||
251 | "proc_qtu_data_tree=rb_root{...}}", | ||
252 | utd, utd->uid, | ||
253 | utd->num_active_tags, utd->num_pqd); | ||
254 | _bug_on_err_or_null(res); | ||
255 | return res; | ||
256 | } | ||
257 | |||
258 | char *pp_proc_qtu_data(struct proc_qtu_data *pqd) | ||
259 | { | ||
260 | char *parent_tag_data_str; | ||
261 | char *res; | ||
262 | |||
263 | if (!pqd) { | ||
264 | res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}"); | ||
265 | _bug_on_err_or_null(res); | ||
266 | return res; | ||
267 | } | ||
268 | parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data); | ||
269 | res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{" | ||
270 | "node=rb_node{...}, pid=%u, " | ||
271 | "parent_tag_data=%s, " | ||
272 | "sock_tag_list=list_head{...}}", | ||
273 | pqd, pqd->pid, parent_tag_data_str | ||
274 | ); | ||
275 | _bug_on_err_or_null(res); | ||
276 | kfree(parent_tag_data_str); | ||
277 | return res; | ||
278 | } | ||
279 | |||
280 | /*------------------------------------------*/ | ||
281 | void prdebug_sock_tag_tree(int indent_level, | ||
282 | struct rb_root *sock_tag_tree) | ||
283 | { | ||
284 | struct rb_node *node; | ||
285 | struct sock_tag *sock_tag_entry; | ||
286 | char *str; | ||
287 | |||
288 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
289 | return; | ||
290 | |||
291 | if (RB_EMPTY_ROOT(sock_tag_tree)) { | ||
292 | str = "sock_tag_tree=rb_root{}"; | ||
293 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
294 | return; | ||
295 | } | ||
296 | |||
297 | str = "sock_tag_tree=rb_root{"; | ||
298 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
299 | indent_level++; | ||
300 | for (node = rb_first(sock_tag_tree); | ||
301 | node; | ||
302 | node = rb_next(node)) { | ||
303 | sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); | ||
304 | str = pp_sock_tag(sock_tag_entry); | ||
305 | pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); | ||
306 | kfree(str); | ||
307 | } | ||
308 | indent_level--; | ||
309 | str = "}"; | ||
310 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
311 | } | ||
312 | |||
313 | void prdebug_sock_tag_list(int indent_level, | ||
314 | struct list_head *sock_tag_list) | ||
315 | { | ||
316 | struct sock_tag *sock_tag_entry; | ||
317 | char *str; | ||
318 | |||
319 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
320 | return; | ||
321 | |||
322 | if (list_empty(sock_tag_list)) { | ||
323 | str = "sock_tag_list=list_head{}"; | ||
324 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
325 | return; | ||
326 | } | ||
327 | |||
328 | str = "sock_tag_list=list_head{"; | ||
329 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
330 | indent_level++; | ||
331 | list_for_each_entry(sock_tag_entry, sock_tag_list, list) { | ||
332 | str = pp_sock_tag(sock_tag_entry); | ||
333 | pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); | ||
334 | kfree(str); | ||
335 | } | ||
336 | indent_level--; | ||
337 | str = "}"; | ||
338 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
339 | } | ||
340 | |||
341 | void prdebug_proc_qtu_data_tree(int indent_level, | ||
342 | struct rb_root *proc_qtu_data_tree) | ||
343 | { | ||
344 | char *str; | ||
345 | struct rb_node *node; | ||
346 | struct proc_qtu_data *proc_qtu_data_entry; | ||
347 | |||
348 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
349 | return; | ||
350 | |||
351 | if (RB_EMPTY_ROOT(proc_qtu_data_tree)) { | ||
352 | str = "proc_qtu_data_tree=rb_root{}"; | ||
353 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
354 | return; | ||
355 | } | ||
356 | |||
357 | str = "proc_qtu_data_tree=rb_root{"; | ||
358 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
359 | indent_level++; | ||
360 | for (node = rb_first(proc_qtu_data_tree); | ||
361 | node; | ||
362 | node = rb_next(node)) { | ||
363 | proc_qtu_data_entry = rb_entry(node, | ||
364 | struct proc_qtu_data, | ||
365 | node); | ||
366 | str = pp_proc_qtu_data(proc_qtu_data_entry); | ||
367 | pr_debug("%*d: %s,\n", indent_level*2, indent_level, | ||
368 | str); | ||
369 | kfree(str); | ||
370 | indent_level++; | ||
371 | prdebug_sock_tag_list(indent_level, | ||
372 | &proc_qtu_data_entry->sock_tag_list); | ||
373 | indent_level--; | ||
374 | |||
375 | } | ||
376 | indent_level--; | ||
377 | str = "}"; | ||
378 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
379 | } | ||
380 | |||
381 | void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) | ||
382 | { | ||
383 | char *str; | ||
384 | struct rb_node *node; | ||
385 | struct tag_ref *tag_ref_entry; | ||
386 | |||
387 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
388 | return; | ||
389 | |||
390 | if (RB_EMPTY_ROOT(tag_ref_tree)) { | ||
391 | str = "tag_ref_tree{}"; | ||
392 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
393 | return; | ||
394 | } | ||
395 | |||
396 | str = "tag_ref_tree{"; | ||
397 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
398 | indent_level++; | ||
399 | for (node = rb_first(tag_ref_tree); | ||
400 | node; | ||
401 | node = rb_next(node)) { | ||
402 | tag_ref_entry = rb_entry(node, | ||
403 | struct tag_ref, | ||
404 | tn.node); | ||
405 | str = pp_tag_ref(tag_ref_entry); | ||
406 | pr_debug("%*d: %s,\n", indent_level*2, indent_level, | ||
407 | str); | ||
408 | kfree(str); | ||
409 | } | ||
410 | indent_level--; | ||
411 | str = "}"; | ||
412 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
413 | } | ||
414 | |||
415 | void prdebug_uid_tag_data_tree(int indent_level, | ||
416 | struct rb_root *uid_tag_data_tree) | ||
417 | { | ||
418 | char *str; | ||
419 | struct rb_node *node; | ||
420 | struct uid_tag_data *uid_tag_data_entry; | ||
421 | |||
422 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
423 | return; | ||
424 | |||
425 | if (RB_EMPTY_ROOT(uid_tag_data_tree)) { | ||
426 | str = "uid_tag_data_tree=rb_root{}"; | ||
427 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
428 | return; | ||
429 | } | ||
430 | |||
431 | str = "uid_tag_data_tree=rb_root{"; | ||
432 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
433 | indent_level++; | ||
434 | for (node = rb_first(uid_tag_data_tree); | ||
435 | node; | ||
436 | node = rb_next(node)) { | ||
437 | uid_tag_data_entry = rb_entry(node, struct uid_tag_data, | ||
438 | node); | ||
439 | str = pp_uid_tag_data(uid_tag_data_entry); | ||
440 | pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); | ||
441 | kfree(str); | ||
442 | if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) { | ||
443 | indent_level++; | ||
444 | prdebug_tag_ref_tree(indent_level, | ||
445 | &uid_tag_data_entry->tag_ref_tree); | ||
446 | indent_level--; | ||
447 | } | ||
448 | } | ||
449 | indent_level--; | ||
450 | str = "}"; | ||
451 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
452 | } | ||
453 | |||
454 | void prdebug_tag_stat_tree(int indent_level, | ||
455 | struct rb_root *tag_stat_tree) | ||
456 | { | ||
457 | char *str; | ||
458 | struct rb_node *node; | ||
459 | struct tag_stat *ts_entry; | ||
460 | |||
461 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
462 | return; | ||
463 | |||
464 | if (RB_EMPTY_ROOT(tag_stat_tree)) { | ||
465 | str = "tag_stat_tree{}"; | ||
466 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
467 | return; | ||
468 | } | ||
469 | |||
470 | str = "tag_stat_tree{"; | ||
471 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
472 | indent_level++; | ||
473 | for (node = rb_first(tag_stat_tree); | ||
474 | node; | ||
475 | node = rb_next(node)) { | ||
476 | ts_entry = rb_entry(node, struct tag_stat, tn.node); | ||
477 | str = pp_tag_stat(ts_entry); | ||
478 | pr_debug("%*d: %s\n", indent_level*2, indent_level, | ||
479 | str); | ||
480 | kfree(str); | ||
481 | } | ||
482 | indent_level--; | ||
483 | str = "}"; | ||
484 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
485 | } | ||
486 | |||
487 | void prdebug_iface_stat_list(int indent_level, | ||
488 | struct list_head *iface_stat_list) | ||
489 | { | ||
490 | char *str; | ||
491 | struct iface_stat *iface_entry; | ||
492 | |||
493 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
494 | return; | ||
495 | |||
496 | if (list_empty(iface_stat_list)) { | ||
497 | str = "iface_stat_list=list_head{}"; | ||
498 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
499 | return; | ||
500 | } | ||
501 | |||
502 | str = "iface_stat_list=list_head{"; | ||
503 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
504 | indent_level++; | ||
505 | list_for_each_entry(iface_entry, iface_stat_list, list) { | ||
506 | str = pp_iface_stat(iface_entry); | ||
507 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
508 | kfree(str); | ||
509 | |||
510 | spin_lock_bh(&iface_entry->tag_stat_list_lock); | ||
511 | if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) { | ||
512 | indent_level++; | ||
513 | prdebug_tag_stat_tree(indent_level, | ||
514 | &iface_entry->tag_stat_tree); | ||
515 | indent_level--; | ||
516 | } | ||
517 | spin_unlock_bh(&iface_entry->tag_stat_list_lock); | ||
518 | } | ||
519 | indent_level--; | ||
520 | str = "}"; | ||
521 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
522 | } | ||
523 | |||
524 | #endif /* ifdef DDEBUG */ | ||
525 | /*------------------------------------------*/ | ||
526 | static const char * const netdev_event_strings[] = { | ||
527 | "netdev_unknown", | ||
528 | "NETDEV_UP", | ||
529 | "NETDEV_DOWN", | ||
530 | "NETDEV_REBOOT", | ||
531 | "NETDEV_CHANGE", | ||
532 | "NETDEV_REGISTER", | ||
533 | "NETDEV_UNREGISTER", | ||
534 | "NETDEV_CHANGEMTU", | ||
535 | "NETDEV_CHANGEADDR", | ||
536 | "NETDEV_GOING_DOWN", | ||
537 | "NETDEV_CHANGENAME", | ||
538 | "NETDEV_FEAT_CHANGE", | ||
539 | "NETDEV_BONDING_FAILOVER", | ||
540 | "NETDEV_PRE_UP", | ||
541 | "NETDEV_PRE_TYPE_CHANGE", | ||
542 | "NETDEV_POST_TYPE_CHANGE", | ||
543 | "NETDEV_POST_INIT", | ||
544 | "NETDEV_UNREGISTER_BATCH", | ||
545 | "NETDEV_RELEASE", | ||
546 | "NETDEV_NOTIFY_PEERS", | ||
547 | "NETDEV_JOIN", | ||
548 | }; | ||
549 | |||
550 | const char *netdev_evt_str(int netdev_event) | ||
551 | { | ||
552 | if (netdev_event < 0 | ||
553 | || netdev_event >= ARRAY_SIZE(netdev_event_strings)) | ||
554 | return "bad event num"; | ||
555 | return netdev_event_strings[netdev_event]; | ||
556 | } | ||
diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h new file mode 100644 index 00000000000..b63871a0be5 --- /dev/null +++ b/net/netfilter/xt_qtaguid_print.h | |||
@@ -0,0 +1,120 @@ | |||
1 | /* | ||
2 | * Pretty printing Support for iptables xt_qtaguid module. | ||
3 | * | ||
4 | * (C) 2011 Google, Inc | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License version 2 as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | #ifndef __XT_QTAGUID_PRINT_H__ | ||
11 | #define __XT_QTAGUID_PRINT_H__ | ||
12 | |||
13 | #include "xt_qtaguid_internal.h" | ||
14 | |||
15 | #ifdef DDEBUG | ||
16 | |||
17 | char *pp_tag_t(tag_t *tag); | ||
18 | char *pp_data_counters(struct data_counters *dc, bool showValues); | ||
19 | char *pp_tag_node(struct tag_node *tn); | ||
20 | char *pp_tag_ref(struct tag_ref *tr); | ||
21 | char *pp_tag_stat(struct tag_stat *ts); | ||
22 | char *pp_iface_stat(struct iface_stat *is); | ||
23 | char *pp_sock_tag(struct sock_tag *st); | ||
24 | char *pp_uid_tag_data(struct uid_tag_data *qtd); | ||
25 | char *pp_proc_qtu_data(struct proc_qtu_data *pqd); | ||
26 | |||
27 | /*------------------------------------------*/ | ||
28 | void prdebug_sock_tag_list(int indent_level, | ||
29 | struct list_head *sock_tag_list); | ||
30 | void prdebug_sock_tag_tree(int indent_level, | ||
31 | struct rb_root *sock_tag_tree); | ||
32 | void prdebug_proc_qtu_data_tree(int indent_level, | ||
33 | struct rb_root *proc_qtu_data_tree); | ||
34 | void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree); | ||
35 | void prdebug_uid_tag_data_tree(int indent_level, | ||
36 | struct rb_root *uid_tag_data_tree); | ||
37 | void prdebug_tag_stat_tree(int indent_level, | ||
38 | struct rb_root *tag_stat_tree); | ||
39 | void prdebug_iface_stat_list(int indent_level, | ||
40 | struct list_head *iface_stat_list); | ||
41 | |||
42 | #else | ||
43 | |||
44 | /*------------------------------------------*/ | ||
45 | static inline char *pp_tag_t(tag_t *tag) | ||
46 | { | ||
47 | return NULL; | ||
48 | } | ||
49 | static inline char *pp_data_counters(struct data_counters *dc, bool showValues) | ||
50 | { | ||
51 | return NULL; | ||
52 | } | ||
53 | static inline char *pp_tag_node(struct tag_node *tn) | ||
54 | { | ||
55 | return NULL; | ||
56 | } | ||
57 | static inline char *pp_tag_ref(struct tag_ref *tr) | ||
58 | { | ||
59 | return NULL; | ||
60 | } | ||
61 | static inline char *pp_tag_stat(struct tag_stat *ts) | ||
62 | { | ||
63 | return NULL; | ||
64 | } | ||
65 | static inline char *pp_iface_stat(struct iface_stat *is) | ||
66 | { | ||
67 | return NULL; | ||
68 | } | ||
69 | static inline char *pp_sock_tag(struct sock_tag *st) | ||
70 | { | ||
71 | return NULL; | ||
72 | } | ||
73 | static inline char *pp_uid_tag_data(struct uid_tag_data *qtd) | ||
74 | { | ||
75 | return NULL; | ||
76 | } | ||
77 | static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd) | ||
78 | { | ||
79 | return NULL; | ||
80 | } | ||
81 | |||
82 | /*------------------------------------------*/ | ||
83 | static inline | ||
84 | void prdebug_sock_tag_list(int indent_level, | ||
85 | struct list_head *sock_tag_list) | ||
86 | { | ||
87 | } | ||
88 | static inline | ||
89 | void prdebug_sock_tag_tree(int indent_level, | ||
90 | struct rb_root *sock_tag_tree) | ||
91 | { | ||
92 | } | ||
93 | static inline | ||
94 | void prdebug_proc_qtu_data_tree(int indent_level, | ||
95 | struct rb_root *proc_qtu_data_tree) | ||
96 | { | ||
97 | } | ||
98 | static inline | ||
99 | void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) | ||
100 | { | ||
101 | } | ||
102 | static inline | ||
103 | void prdebug_uid_tag_data_tree(int indent_level, | ||
104 | struct rb_root *uid_tag_data_tree) | ||
105 | { | ||
106 | } | ||
107 | static inline | ||
108 | void prdebug_tag_stat_tree(int indent_level, | ||
109 | struct rb_root *tag_stat_tree) | ||
110 | { | ||
111 | } | ||
112 | static inline | ||
113 | void prdebug_iface_stat_list(int indent_level, | ||
114 | struct list_head *iface_stat_list) | ||
115 | { | ||
116 | } | ||
117 | #endif | ||
118 | /*------------------------------------------*/ | ||
119 | const char *netdev_evt_str(int netdev_event); | ||
120 | #endif /* ifndef __XT_QTAGUID_PRINT_H__ */ | ||
diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c new file mode 100644 index 00000000000..3c72bea2dd6 --- /dev/null +++ b/net/netfilter/xt_quota2.c | |||
@@ -0,0 +1,381 @@ | |||
1 | /* | ||
2 | * xt_quota2 - enhanced xt_quota that can count upwards and in packets | ||
3 | * as a minimal accounting match. | ||
4 | * by Jan Engelhardt <jengelh@medozas.de>, 2008 | ||
5 | * | ||
6 | * Originally based on xt_quota.c: | ||
7 | * netfilter module to enforce network quotas | ||
8 | * Sam Johnston <samj@samj.net> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License; either | ||
12 | * version 2 of the License, as published by the Free Software Foundation. | ||
13 | */ | ||
14 | #include <linux/list.h> | ||
15 | #include <linux/proc_fs.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <linux/spinlock.h> | ||
18 | #include <asm/atomic.h> | ||
19 | |||
20 | #include <linux/netfilter/x_tables.h> | ||
21 | #include <linux/netfilter/xt_quota2.h> | ||
22 | #ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG | ||
23 | #include <linux/netfilter_ipv4/ipt_ULOG.h> | ||
24 | #endif | ||
25 | |||
26 | /** | ||
27 | * @lock: lock to protect quota writers from each other | ||
28 | */ | ||
29 | struct xt_quota_counter { | ||
30 | u_int64_t quota; | ||
31 | spinlock_t lock; | ||
32 | struct list_head list; | ||
33 | atomic_t ref; | ||
34 | char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)]; | ||
35 | struct proc_dir_entry *procfs_entry; | ||
36 | }; | ||
37 | |||
38 | #ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG | ||
39 | /* Harald's favorite number +1 :D From ipt_ULOG.C */ | ||
40 | static int qlog_nl_event = 112; | ||
41 | module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR); | ||
42 | MODULE_PARM_DESC(event_num, | ||
43 | "Event number for NETLINK_NFLOG message. 0 disables log." | ||
44 | "111 is what ipt_ULOG uses."); | ||
45 | static struct sock *nflognl; | ||
46 | #endif | ||
47 | |||
48 | static LIST_HEAD(counter_list); | ||
49 | static DEFINE_SPINLOCK(counter_list_lock); | ||
50 | |||
51 | static struct proc_dir_entry *proc_xt_quota; | ||
52 | static unsigned int quota_list_perms = S_IRUGO | S_IWUSR; | ||
53 | static unsigned int quota_list_uid = 0; | ||
54 | static unsigned int quota_list_gid = 0; | ||
55 | module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR); | ||
56 | module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR); | ||
57 | module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR); | ||
58 | |||
59 | |||
60 | #ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG | ||
61 | static void quota2_log(unsigned int hooknum, | ||
62 | const struct sk_buff *skb, | ||
63 | const struct net_device *in, | ||
64 | const struct net_device *out, | ||
65 | const char *prefix) | ||
66 | { | ||
67 | ulog_packet_msg_t *pm; | ||
68 | struct sk_buff *log_skb; | ||
69 | size_t size; | ||
70 | struct nlmsghdr *nlh; | ||
71 | |||
72 | if (!qlog_nl_event) | ||
73 | return; | ||
74 | |||
75 | size = NLMSG_SPACE(sizeof(*pm)); | ||
76 | size = max(size, (size_t)NLMSG_GOODSIZE); | ||
77 | log_skb = alloc_skb(size, GFP_ATOMIC); | ||
78 | if (!log_skb) { | ||
79 | pr_err("xt_quota2: cannot alloc skb for logging\n"); | ||
80 | return; | ||
81 | } | ||
82 | |||
83 | /* NLMSG_PUT() uses "goto nlmsg_failure" */ | ||
84 | nlh = NLMSG_PUT(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event, | ||
85 | sizeof(*pm)); | ||
86 | pm = NLMSG_DATA(nlh); | ||
87 | if (skb->tstamp.tv64 == 0) | ||
88 | __net_timestamp((struct sk_buff *)skb); | ||
89 | pm->data_len = 0; | ||
90 | pm->hook = hooknum; | ||
91 | if (prefix != NULL) | ||
92 | strlcpy(pm->prefix, prefix, sizeof(pm->prefix)); | ||
93 | else | ||
94 | *(pm->prefix) = '\0'; | ||
95 | if (in) | ||
96 | strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name)); | ||
97 | else | ||
98 | pm->indev_name[0] = '\0'; | ||
99 | |||
100 | if (out) | ||
101 | strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); | ||
102 | else | ||
103 | pm->outdev_name[0] = '\0'; | ||
104 | |||
105 | NETLINK_CB(log_skb).dst_group = 1; | ||
106 | pr_debug("throwing 1 packets to netlink group 1\n"); | ||
107 | netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC); | ||
108 | |||
109 | nlmsg_failure: /* Used within NLMSG_PUT() */ | ||
110 | pr_debug("xt_quota2: error during NLMSG_PUT\n"); | ||
111 | } | ||
112 | #else | ||
113 | static void quota2_log(unsigned int hooknum, | ||
114 | const struct sk_buff *skb, | ||
115 | const struct net_device *in, | ||
116 | const struct net_device *out, | ||
117 | const char *prefix) | ||
118 | { | ||
119 | } | ||
120 | #endif /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */ | ||
121 | |||
122 | static int quota_proc_read(char *page, char **start, off_t offset, | ||
123 | int count, int *eof, void *data) | ||
124 | { | ||
125 | struct xt_quota_counter *e = data; | ||
126 | int ret; | ||
127 | |||
128 | spin_lock_bh(&e->lock); | ||
129 | ret = snprintf(page, PAGE_SIZE, "%llu\n", e->quota); | ||
130 | spin_unlock_bh(&e->lock); | ||
131 | return ret; | ||
132 | } | ||
133 | |||
134 | static int quota_proc_write(struct file *file, const char __user *input, | ||
135 | unsigned long size, void *data) | ||
136 | { | ||
137 | struct xt_quota_counter *e = data; | ||
138 | char buf[sizeof("18446744073709551616")]; | ||
139 | |||
140 | if (size > sizeof(buf)) | ||
141 | size = sizeof(buf); | ||
142 | if (copy_from_user(buf, input, size) != 0) | ||
143 | return -EFAULT; | ||
144 | buf[sizeof(buf)-1] = '\0'; | ||
145 | |||
146 | spin_lock_bh(&e->lock); | ||
147 | e->quota = simple_strtoull(buf, NULL, 0); | ||
148 | spin_unlock_bh(&e->lock); | ||
149 | return size; | ||
150 | } | ||
151 | |||
152 | static struct xt_quota_counter * | ||
153 | q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon) | ||
154 | { | ||
155 | struct xt_quota_counter *e; | ||
156 | unsigned int size; | ||
157 | |||
158 | /* Do not need all the procfs things for anonymous counters. */ | ||
159 | size = anon ? offsetof(typeof(*e), list) : sizeof(*e); | ||
160 | e = kmalloc(size, GFP_KERNEL); | ||
161 | if (e == NULL) | ||
162 | return NULL; | ||
163 | |||
164 | e->quota = q->quota; | ||
165 | spin_lock_init(&e->lock); | ||
166 | if (!anon) { | ||
167 | INIT_LIST_HEAD(&e->list); | ||
168 | atomic_set(&e->ref, 1); | ||
169 | strlcpy(e->name, q->name, sizeof(e->name)); | ||
170 | } | ||
171 | return e; | ||
172 | } | ||
173 | |||
174 | /** | ||
175 | * q2_get_counter - get ref to counter or create new | ||
176 | * @name: name of counter | ||
177 | */ | ||
178 | static struct xt_quota_counter * | ||
179 | q2_get_counter(const struct xt_quota_mtinfo2 *q) | ||
180 | { | ||
181 | struct proc_dir_entry *p; | ||
182 | struct xt_quota_counter *e = NULL; | ||
183 | struct xt_quota_counter *new_e; | ||
184 | |||
185 | if (*q->name == '\0') | ||
186 | return q2_new_counter(q, true); | ||
187 | |||
188 | /* No need to hold a lock while getting a new counter */ | ||
189 | new_e = q2_new_counter(q, false); | ||
190 | if (new_e == NULL) | ||
191 | goto out; | ||
192 | |||
193 | spin_lock_bh(&counter_list_lock); | ||
194 | list_for_each_entry(e, &counter_list, list) | ||
195 | if (strcmp(e->name, q->name) == 0) { | ||
196 | atomic_inc(&e->ref); | ||
197 | spin_unlock_bh(&counter_list_lock); | ||
198 | kfree(new_e); | ||
199 | pr_debug("xt_quota2: old counter name=%s", e->name); | ||
200 | return e; | ||
201 | } | ||
202 | e = new_e; | ||
203 | pr_debug("xt_quota2: new_counter name=%s", e->name); | ||
204 | list_add_tail(&e->list, &counter_list); | ||
205 | /* The entry having a refcount of 1 is not directly destructible. | ||
206 | * This func has not yet returned the new entry, thus iptables | ||
207 | * has not references for destroying this entry. | ||
208 | * For another rule to try to destroy it, it would 1st need for this | ||
209 | * func* to be re-invoked, acquire a new ref for the same named quota. | ||
210 | * Nobody will access the e->procfs_entry either. | ||
211 | * So release the lock. */ | ||
212 | spin_unlock_bh(&counter_list_lock); | ||
213 | |||
214 | /* create_proc_entry() is not spin_lock happy */ | ||
215 | p = e->procfs_entry = create_proc_entry(e->name, quota_list_perms, | ||
216 | proc_xt_quota); | ||
217 | |||
218 | if (IS_ERR_OR_NULL(p)) { | ||
219 | spin_lock_bh(&counter_list_lock); | ||
220 | list_del(&e->list); | ||
221 | spin_unlock_bh(&counter_list_lock); | ||
222 | goto out; | ||
223 | } | ||
224 | p->data = e; | ||
225 | p->read_proc = quota_proc_read; | ||
226 | p->write_proc = quota_proc_write; | ||
227 | p->uid = quota_list_uid; | ||
228 | p->gid = quota_list_gid; | ||
229 | return e; | ||
230 | |||
231 | out: | ||
232 | kfree(e); | ||
233 | return NULL; | ||
234 | } | ||
235 | |||
236 | static int quota_mt2_check(const struct xt_mtchk_param *par) | ||
237 | { | ||
238 | struct xt_quota_mtinfo2 *q = par->matchinfo; | ||
239 | |||
240 | pr_debug("xt_quota2: check() flags=0x%04x", q->flags); | ||
241 | |||
242 | if (q->flags & ~XT_QUOTA_MASK) | ||
243 | return -EINVAL; | ||
244 | |||
245 | q->name[sizeof(q->name)-1] = '\0'; | ||
246 | if (*q->name == '.' || strchr(q->name, '/') != NULL) { | ||
247 | printk(KERN_ERR "xt_quota.3: illegal name\n"); | ||
248 | return -EINVAL; | ||
249 | } | ||
250 | |||
251 | q->master = q2_get_counter(q); | ||
252 | if (q->master == NULL) { | ||
253 | printk(KERN_ERR "xt_quota.3: memory alloc failure\n"); | ||
254 | return -ENOMEM; | ||
255 | } | ||
256 | |||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | static void quota_mt2_destroy(const struct xt_mtdtor_param *par) | ||
261 | { | ||
262 | struct xt_quota_mtinfo2 *q = par->matchinfo; | ||
263 | struct xt_quota_counter *e = q->master; | ||
264 | |||
265 | if (*q->name == '\0') { | ||
266 | kfree(e); | ||
267 | return; | ||
268 | } | ||
269 | |||
270 | spin_lock_bh(&counter_list_lock); | ||
271 | if (!atomic_dec_and_test(&e->ref)) { | ||
272 | spin_unlock_bh(&counter_list_lock); | ||
273 | return; | ||
274 | } | ||
275 | |||
276 | list_del(&e->list); | ||
277 | remove_proc_entry(e->name, proc_xt_quota); | ||
278 | spin_unlock_bh(&counter_list_lock); | ||
279 | kfree(e); | ||
280 | } | ||
281 | |||
282 | static bool | ||
283 | quota_mt2(const struct sk_buff *skb, struct xt_action_param *par) | ||
284 | { | ||
285 | struct xt_quota_mtinfo2 *q = (void *)par->matchinfo; | ||
286 | struct xt_quota_counter *e = q->master; | ||
287 | bool ret = q->flags & XT_QUOTA_INVERT; | ||
288 | |||
289 | spin_lock_bh(&e->lock); | ||
290 | if (q->flags & XT_QUOTA_GROW) { | ||
291 | /* | ||
292 | * While no_change is pointless in "grow" mode, we will | ||
293 | * implement it here simply to have a consistent behavior. | ||
294 | */ | ||
295 | if (!(q->flags & XT_QUOTA_NO_CHANGE)) { | ||
296 | e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; | ||
297 | } | ||
298 | ret = true; | ||
299 | } else { | ||
300 | if (e->quota >= skb->len) { | ||
301 | if (!(q->flags & XT_QUOTA_NO_CHANGE)) | ||
302 | e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; | ||
303 | ret = !ret; | ||
304 | } else { | ||
305 | /* We are transitioning, log that fact. */ | ||
306 | if (e->quota) { | ||
307 | quota2_log(par->hooknum, | ||
308 | skb, | ||
309 | par->in, | ||
310 | par->out, | ||
311 | q->name); | ||
312 | } | ||
313 | /* we do not allow even small packets from now on */ | ||
314 | e->quota = 0; | ||
315 | } | ||
316 | } | ||
317 | spin_unlock_bh(&e->lock); | ||
318 | return ret; | ||
319 | } | ||
320 | |||
321 | static struct xt_match quota_mt2_reg[] __read_mostly = { | ||
322 | { | ||
323 | .name = "quota2", | ||
324 | .revision = 3, | ||
325 | .family = NFPROTO_IPV4, | ||
326 | .checkentry = quota_mt2_check, | ||
327 | .match = quota_mt2, | ||
328 | .destroy = quota_mt2_destroy, | ||
329 | .matchsize = sizeof(struct xt_quota_mtinfo2), | ||
330 | .me = THIS_MODULE, | ||
331 | }, | ||
332 | { | ||
333 | .name = "quota2", | ||
334 | .revision = 3, | ||
335 | .family = NFPROTO_IPV6, | ||
336 | .checkentry = quota_mt2_check, | ||
337 | .match = quota_mt2, | ||
338 | .destroy = quota_mt2_destroy, | ||
339 | .matchsize = sizeof(struct xt_quota_mtinfo2), | ||
340 | .me = THIS_MODULE, | ||
341 | }, | ||
342 | }; | ||
343 | |||
344 | static int __init quota_mt2_init(void) | ||
345 | { | ||
346 | int ret; | ||
347 | pr_debug("xt_quota2: init()"); | ||
348 | |||
349 | #ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG | ||
350 | nflognl = netlink_kernel_create(&init_net, | ||
351 | NETLINK_NFLOG, 1, NULL, | ||
352 | NULL, THIS_MODULE); | ||
353 | if (!nflognl) | ||
354 | return -ENOMEM; | ||
355 | #endif | ||
356 | |||
357 | proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net); | ||
358 | if (proc_xt_quota == NULL) | ||
359 | return -EACCES; | ||
360 | |||
361 | ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); | ||
362 | if (ret < 0) | ||
363 | remove_proc_entry("xt_quota", init_net.proc_net); | ||
364 | pr_debug("xt_quota2: init() %d", ret); | ||
365 | return ret; | ||
366 | } | ||
367 | |||
368 | static void __exit quota_mt2_exit(void) | ||
369 | { | ||
370 | xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); | ||
371 | remove_proc_entry("xt_quota", init_net.proc_net); | ||
372 | } | ||
373 | |||
374 | module_init(quota_mt2_init); | ||
375 | module_exit(quota_mt2_exit); | ||
376 | MODULE_DESCRIPTION("Xtables: countdown quota match; up counter"); | ||
377 | MODULE_AUTHOR("Sam Johnston <samj@samj.net>"); | ||
378 | MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); | ||
379 | MODULE_LICENSE("GPL"); | ||
380 | MODULE_ALIAS("ipt_quota2"); | ||
381 | MODULE_ALIAS("ip6t_quota2"); | ||