diff options
Diffstat (limited to 'net/netfilter')
| -rw-r--r-- | net/netfilter/nfnetlink_queue.c | 1028 | ||||
| -rw-r--r-- | net/netfilter/xt_NOTRACK.c | 53 | ||||
| -rw-r--r-- | net/netfilter/xt_qtaguid.c | 2785 | ||||
| -rw-r--r-- | net/netfilter/xt_qtaguid_internal.h | 330 | ||||
| -rw-r--r-- | net/netfilter/xt_qtaguid_print.c | 556 | ||||
| -rw-r--r-- | net/netfilter/xt_qtaguid_print.h | 120 | ||||
| -rw-r--r-- | net/netfilter/xt_quota2.c | 381 |
7 files changed, 5253 insertions, 0 deletions
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c new file mode 100644 index 00000000000..a80b0cb03f1 --- /dev/null +++ b/net/netfilter/nfnetlink_queue.c | |||
| @@ -0,0 +1,1028 @@ | |||
| 1 | /* | ||
| 2 | * This is a module which is used for queueing packets and communicating with | ||
| 3 | * userspace via nfnetlink. | ||
| 4 | * | ||
| 5 | * (C) 2005 by Harald Welte <laforge@netfilter.org> | ||
| 6 | * (C) 2007 by Patrick McHardy <kaber@trash.net> | ||
| 7 | * | ||
| 8 | * Based on the old ipv4-only ip_queue.c: | ||
| 9 | * (C) 2000-2002 James Morris <jmorris@intercode.com.au> | ||
| 10 | * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org> | ||
| 11 | * | ||
| 12 | * This program is free software; you can redistribute it and/or modify | ||
| 13 | * it under the terms of the GNU General Public License version 2 as | ||
| 14 | * published by the Free Software Foundation. | ||
| 15 | * | ||
| 16 | */ | ||
| 17 | #include <linux/module.h> | ||
| 18 | #include <linux/skbuff.h> | ||
| 19 | #include <linux/init.h> | ||
| 20 | #include <linux/spinlock.h> | ||
| 21 | #include <linux/slab.h> | ||
| 22 | #include <linux/notifier.h> | ||
| 23 | #include <linux/netdevice.h> | ||
| 24 | #include <linux/netfilter.h> | ||
| 25 | #include <linux/proc_fs.h> | ||
| 26 | #include <linux/netfilter_ipv4.h> | ||
| 27 | #include <linux/netfilter_ipv6.h> | ||
| 28 | #include <linux/netfilter/nfnetlink.h> | ||
| 29 | #include <linux/netfilter/nfnetlink_queue.h> | ||
| 30 | #include <linux/list.h> | ||
| 31 | #include <net/sock.h> | ||
| 32 | #include <net/netfilter/nf_queue.h> | ||
| 33 | |||
| 34 | #include <linux/atomic.h> | ||
| 35 | |||
| 36 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
| 37 | #include "../bridge/br_private.h" | ||
| 38 | #endif | ||
| 39 | |||
| 40 | #define NFQNL_QMAX_DEFAULT 1024 | ||
| 41 | |||
| 42 | struct nfqnl_instance { | ||
| 43 | struct hlist_node hlist; /* global list of queues */ | ||
| 44 | struct rcu_head rcu; | ||
| 45 | |||
| 46 | int peer_pid; | ||
| 47 | unsigned int queue_maxlen; | ||
| 48 | unsigned int copy_range; | ||
| 49 | unsigned int queue_dropped; | ||
| 50 | unsigned int queue_user_dropped; | ||
| 51 | |||
| 52 | |||
| 53 | u_int16_t queue_num; /* number of this queue */ | ||
| 54 | u_int8_t copy_mode; | ||
| 55 | /* | ||
| 56 | * Following fields are dirtied for each queued packet, | ||
| 57 | * keep them in same cache line if possible. | ||
| 58 | */ | ||
| 59 | spinlock_t lock; | ||
| 60 | unsigned int queue_total; | ||
| 61 | unsigned int id_sequence; /* 'sequence' of pkt ids */ | ||
| 62 | struct list_head queue_list; /* packets in queue */ | ||
| 63 | }; | ||
| 64 | |||
| 65 | typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); | ||
| 66 | |||
| 67 | static DEFINE_SPINLOCK(instances_lock); | ||
| 68 | |||
| 69 | #define INSTANCE_BUCKETS 16 | ||
| 70 | static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; | ||
| 71 | |||
| 72 | static inline u_int8_t instance_hashfn(u_int16_t queue_num) | ||
| 73 | { | ||
| 74 | return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; | ||
| 75 | } | ||
| 76 | |||
| 77 | static struct nfqnl_instance * | ||
| 78 | instance_lookup(u_int16_t queue_num) | ||
| 79 | { | ||
| 80 | struct hlist_head *head; | ||
| 81 | struct hlist_node *pos; | ||
| 82 | struct nfqnl_instance *inst; | ||
| 83 | |||
| 84 | head = &instance_table[instance_hashfn(queue_num)]; | ||
| 85 | hlist_for_each_entry_rcu(inst, pos, head, hlist) { | ||
| 86 | if (inst->queue_num == queue_num) | ||
| 87 | return inst; | ||
| 88 | } | ||
| 89 | return NULL; | ||
| 90 | } | ||
| 91 | |||
| 92 | static struct nfqnl_instance * | ||
| 93 | instance_create(u_int16_t queue_num, int pid) | ||
| 94 | { | ||
| 95 | struct nfqnl_instance *inst; | ||
| 96 | unsigned int h; | ||
| 97 | int err; | ||
| 98 | |||
| 99 | spin_lock(&instances_lock); | ||
| 100 | if (instance_lookup(queue_num)) { | ||
| 101 | err = -EEXIST; | ||
| 102 | goto out_unlock; | ||
| 103 | } | ||
| 104 | |||
| 105 | inst = kzalloc(sizeof(*inst), GFP_ATOMIC); | ||
| 106 | if (!inst) { | ||
| 107 | err = -ENOMEM; | ||
| 108 | goto out_unlock; | ||
| 109 | } | ||
| 110 | |||
| 111 | inst->queue_num = queue_num; | ||
| 112 | inst->peer_pid = pid; | ||
| 113 | inst->queue_maxlen = NFQNL_QMAX_DEFAULT; | ||
| 114 | inst->copy_range = 0xfffff; | ||
| 115 | inst->copy_mode = NFQNL_COPY_NONE; | ||
| 116 | spin_lock_init(&inst->lock); | ||
| 117 | INIT_LIST_HEAD(&inst->queue_list); | ||
| 118 | |||
| 119 | if (!try_module_get(THIS_MODULE)) { | ||
| 120 | err = -EAGAIN; | ||
| 121 | goto out_free; | ||
| 122 | } | ||
| 123 | |||
| 124 | h = instance_hashfn(queue_num); | ||
| 125 | hlist_add_head_rcu(&inst->hlist, &instance_table[h]); | ||
| 126 | |||
| 127 | spin_unlock(&instances_lock); | ||
| 128 | |||
| 129 | return inst; | ||
| 130 | |||
| 131 | out_free: | ||
| 132 | kfree(inst); | ||
| 133 | out_unlock: | ||
| 134 | spin_unlock(&instances_lock); | ||
| 135 | return ERR_PTR(err); | ||
| 136 | } | ||
| 137 | |||
| 138 | static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, | ||
| 139 | unsigned long data); | ||
| 140 | |||
| 141 | static void | ||
| 142 | instance_destroy_rcu(struct rcu_head *head) | ||
| 143 | { | ||
| 144 | struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance, | ||
| 145 | rcu); | ||
| 146 | |||
| 147 | nfqnl_flush(inst, NULL, 0); | ||
| 148 | kfree(inst); | ||
| 149 | module_put(THIS_MODULE); | ||
| 150 | } | ||
| 151 | |||
| 152 | static void | ||
| 153 | __instance_destroy(struct nfqnl_instance *inst) | ||
| 154 | { | ||
| 155 | hlist_del_rcu(&inst->hlist); | ||
| 156 | call_rcu(&inst->rcu, instance_destroy_rcu); | ||
| 157 | } | ||
| 158 | |||
| 159 | static void | ||
| 160 | instance_destroy(struct nfqnl_instance *inst) | ||
| 161 | { | ||
| 162 | spin_lock(&instances_lock); | ||
| 163 | __instance_destroy(inst); | ||
| 164 | spin_unlock(&instances_lock); | ||
| 165 | } | ||
| 166 | |||
| 167 | static inline void | ||
| 168 | __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) | ||
| 169 | { | ||
| 170 | list_add_tail(&entry->list, &queue->queue_list); | ||
| 171 | queue->queue_total++; | ||
| 172 | } | ||
| 173 | |||
| 174 | static void | ||
| 175 | __dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry) | ||
| 176 | { | ||
| 177 | list_del(&entry->list); | ||
| 178 | queue->queue_total--; | ||
| 179 | } | ||
| 180 | |||
| 181 | static struct nf_queue_entry * | ||
| 182 | find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) | ||
| 183 | { | ||
| 184 | struct nf_queue_entry *entry = NULL, *i; | ||
| 185 | |||
| 186 | spin_lock_bh(&queue->lock); | ||
| 187 | |||
| 188 | list_for_each_entry(i, &queue->queue_list, list) { | ||
| 189 | if (i->id == id) { | ||
| 190 | entry = i; | ||
| 191 | break; | ||
| 192 | } | ||
| 193 | } | ||
| 194 | |||
| 195 | if (entry) | ||
| 196 | __dequeue_entry(queue, entry); | ||
| 197 | |||
| 198 | spin_unlock_bh(&queue->lock); | ||
| 199 | |||
| 200 | return entry; | ||
| 201 | } | ||
| 202 | |||
| 203 | static void | ||
| 204 | nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) | ||
| 205 | { | ||
| 206 | struct nf_queue_entry *entry, *next; | ||
| 207 | |||
| 208 | spin_lock_bh(&queue->lock); | ||
| 209 | list_for_each_entry_safe(entry, next, &queue->queue_list, list) { | ||
| 210 | if (!cmpfn || cmpfn(entry, data)) { | ||
| 211 | list_del(&entry->list); | ||
| 212 | queue->queue_total--; | ||
| 213 | nf_reinject(entry, NF_DROP); | ||
| 214 | } | ||
| 215 | } | ||
| 216 | spin_unlock_bh(&queue->lock); | ||
| 217 | } | ||
| 218 | |||
| 219 | static struct sk_buff * | ||
| 220 | nfqnl_build_packet_message(struct nfqnl_instance *queue, | ||
| 221 | struct nf_queue_entry *entry, | ||
| 222 | __be32 **packet_id_ptr) | ||
| 223 | { | ||
| 224 | sk_buff_data_t old_tail; | ||
| 225 | size_t size; | ||
| 226 | size_t data_len = 0; | ||
| 227 | struct sk_buff *skb; | ||
| 228 | struct nlattr *nla; | ||
| 229 | struct nfqnl_msg_packet_hdr *pmsg; | ||
| 230 | struct nlmsghdr *nlh; | ||
| 231 | struct nfgenmsg *nfmsg; | ||
| 232 | struct sk_buff *entskb = entry->skb; | ||
| 233 | struct net_device *indev; | ||
| 234 | struct net_device *outdev; | ||
| 235 | |||
| 236 | size = NLMSG_SPACE(sizeof(struct nfgenmsg)) | ||
| 237 | + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) | ||
| 238 | + nla_total_size(sizeof(u_int32_t)) /* ifindex */ | ||
| 239 | + nla_total_size(sizeof(u_int32_t)) /* ifindex */ | ||
| 240 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
| 241 | + nla_total_size(sizeof(u_int32_t)) /* ifindex */ | ||
| 242 | + nla_total_size(sizeof(u_int32_t)) /* ifindex */ | ||
| 243 | #endif | ||
| 244 | + nla_total_size(sizeof(u_int32_t)) /* mark */ | ||
| 245 | + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) | ||
| 246 | + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); | ||
| 247 | |||
| 248 | outdev = entry->outdev; | ||
| 249 | |||
| 250 | switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { | ||
| 251 | case NFQNL_COPY_META: | ||
| 252 | case NFQNL_COPY_NONE: | ||
| 253 | break; | ||
| 254 | |||
| 255 | case NFQNL_COPY_PACKET: | ||
| 256 | if (entskb->ip_summed == CHECKSUM_PARTIAL && | ||
| 257 | skb_checksum_help(entskb)) | ||
| 258 | return NULL; | ||
| 259 | |||
| 260 | data_len = ACCESS_ONCE(queue->copy_range); | ||
| 261 | if (data_len == 0 || data_len > entskb->len) | ||
| 262 | data_len = entskb->len; | ||
| 263 | |||
| 264 | size += nla_total_size(data_len); | ||
| 265 | break; | ||
| 266 | } | ||
| 267 | |||
| 268 | |||
| 269 | skb = alloc_skb(size, GFP_ATOMIC); | ||
| 270 | if (!skb) | ||
| 271 | goto nlmsg_failure; | ||
| 272 | |||
| 273 | old_tail = skb->tail; | ||
| 274 | nlh = NLMSG_PUT(skb, 0, 0, | ||
| 275 | NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, | ||
| 276 | sizeof(struct nfgenmsg)); | ||
| 277 | nfmsg = NLMSG_DATA(nlh); | ||
| 278 | nfmsg->nfgen_family = entry->pf; | ||
| 279 | nfmsg->version = NFNETLINK_V0; | ||
| 280 | nfmsg->res_id = htons(queue->queue_num); | ||
| 281 | |||
| 282 | nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg)); | ||
| 283 | pmsg = nla_data(nla); | ||
| 284 | pmsg->hw_protocol = entskb->protocol; | ||
| 285 | pmsg->hook = entry->hook; | ||
| 286 | *packet_id_ptr = &pmsg->packet_id; | ||
| 287 | |||
| 288 | indev = entry->indev; | ||
| 289 | if (indev) { | ||
| 290 | #ifndef CONFIG_BRIDGE_NETFILTER | ||
| 291 | NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)); | ||
| 292 | #else | ||
| 293 | if (entry->pf == PF_BRIDGE) { | ||
| 294 | /* Case 1: indev is physical input device, we need to | ||
| 295 | * look for bridge group (when called from | ||
| 296 | * netfilter_bridge) */ | ||
| 297 | NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, | ||
| 298 | htonl(indev->ifindex)); | ||
| 299 | /* this is the bridge group "brX" */ | ||
| 300 | /* rcu_read_lock()ed by __nf_queue */ | ||
| 301 | NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, | ||
| 302 | htonl(br_port_get_rcu(indev)->br->dev->ifindex)); | ||
| 303 | } else { | ||
| 304 | /* Case 2: indev is bridge group, we need to look for | ||
| 305 | * physical device (when called from ipv4) */ | ||
| 306 | NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, | ||
| 307 | htonl(indev->ifindex)); | ||
| 308 | if (entskb->nf_bridge && entskb->nf_bridge->physindev) | ||
| 309 | NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, | ||
| 310 | htonl(entskb->nf_bridge->physindev->ifindex)); | ||
| 311 | } | ||
| 312 | #endif | ||
| 313 | } | ||
| 314 | |||
| 315 | if (outdev) { | ||
| 316 | #ifndef CONFIG_BRIDGE_NETFILTER | ||
| 317 | NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)); | ||
| 318 | #else | ||
| 319 | if (entry->pf == PF_BRIDGE) { | ||
| 320 | /* Case 1: outdev is physical output device, we need to | ||
| 321 | * look for bridge group (when called from | ||
| 322 | * netfilter_bridge) */ | ||
| 323 | NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, | ||
| 324 | htonl(outdev->ifindex)); | ||
| 325 | /* this is the bridge group "brX" */ | ||
| 326 | /* rcu_read_lock()ed by __nf_queue */ | ||
| 327 | NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, | ||
| 328 | htonl(br_port_get_rcu(outdev)->br->dev->ifindex)); | ||
| 329 | } else { | ||
| 330 | /* Case 2: outdev is bridge group, we need to look for | ||
| 331 | * physical output device (when called from ipv4) */ | ||
| 332 | NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, | ||
| 333 | htonl(outdev->ifindex)); | ||
| 334 | if (entskb->nf_bridge && entskb->nf_bridge->physoutdev) | ||
| 335 | NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, | ||
| 336 | htonl(entskb->nf_bridge->physoutdev->ifindex)); | ||
| 337 | } | ||
| 338 | #endif | ||
| 339 | } | ||
| 340 | |||
| 341 | if (entskb->mark) | ||
| 342 | NLA_PUT_BE32(skb, NFQA_MARK, htonl(entskb->mark)); | ||
| 343 | |||
| 344 | if (indev && entskb->dev && | ||
| 345 | entskb->mac_header != entskb->network_header) { | ||
| 346 | struct nfqnl_msg_packet_hw phw; | ||
| 347 | int len = dev_parse_header(entskb, phw.hw_addr); | ||
| 348 | if (len) { | ||
| 349 | phw.hw_addrlen = htons(len); | ||
| 350 | NLA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | if (entskb->tstamp.tv64) { | ||
| 355 | struct nfqnl_msg_packet_timestamp ts; | ||
| 356 | struct timeval tv = ktime_to_timeval(entskb->tstamp); | ||
| 357 | ts.sec = cpu_to_be64(tv.tv_sec); | ||
| 358 | ts.usec = cpu_to_be64(tv.tv_usec); | ||
| 359 | |||
| 360 | NLA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); | ||
| 361 | } | ||
| 362 | |||
| 363 | if (data_len) { | ||
| 364 | struct nlattr *nla; | ||
| 365 | int sz = nla_attr_size(data_len); | ||
| 366 | |||
| 367 | if (skb_tailroom(skb) < nla_total_size(data_len)) { | ||
| 368 | printk(KERN_WARNING "nf_queue: no tailroom!\n"); | ||
| 369 | goto nlmsg_failure; | ||
| 370 | } | ||
| 371 | |||
| 372 | nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); | ||
| 373 | nla->nla_type = NFQA_PAYLOAD; | ||
| 374 | nla->nla_len = sz; | ||
| 375 | |||
| 376 | if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) | ||
| 377 | BUG(); | ||
| 378 | } | ||
| 379 | |||
| 380 | nlh->nlmsg_len = skb->tail - old_tail; | ||
| 381 | return skb; | ||
| 382 | |||
| 383 | nlmsg_failure: | ||
| 384 | nla_put_failure: | ||
| 385 | if (skb) | ||
| 386 | kfree_skb(skb); | ||
| 387 | if (net_ratelimit()) | ||
| 388 | printk(KERN_ERR "nf_queue: error creating packet message\n"); | ||
| 389 | return NULL; | ||
| 390 | } | ||
| 391 | |||
| 392 | static int | ||
| 393 | nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | ||
| 394 | { | ||
| 395 | struct sk_buff *nskb; | ||
| 396 | struct nfqnl_instance *queue; | ||
| 397 | int err = -ENOBUFS; | ||
| 398 | __be32 *packet_id_ptr; | ||
| 399 | |||
| 400 | /* rcu_read_lock()ed by nf_hook_slow() */ | ||
| 401 | queue = instance_lookup(queuenum); | ||
| 402 | if (!queue) { | ||
| 403 | err = -ESRCH; | ||
| 404 | goto err_out; | ||
| 405 | } | ||
| 406 | |||
| 407 | if (queue->copy_mode == NFQNL_COPY_NONE) { | ||
| 408 | err = -EINVAL; | ||
| 409 | goto err_out; | ||
| 410 | } | ||
| 411 | |||
| 412 | nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); | ||
| 413 | if (nskb == NULL) { | ||
| 414 | err = -ENOMEM; | ||
| 415 | goto err_out; | ||
| 416 | } | ||
| 417 | spin_lock_bh(&queue->lock); | ||
| 418 | |||
| 419 | if (!queue->peer_pid) { | ||
| 420 | err = -EINVAL; | ||
| 421 | goto err_out_free_nskb; | ||
| 422 | } | ||
| 423 | if (queue->queue_total >= queue->queue_maxlen) { | ||
| 424 | queue->queue_dropped++; | ||
| 425 | if (net_ratelimit()) | ||
| 426 | printk(KERN_WARNING "nf_queue: full at %d entries, " | ||
| 427 | "dropping packets(s).\n", | ||
| 428 | queue->queue_total); | ||
| 429 | goto err_out_free_nskb; | ||
| 430 | } | ||
| 431 | entry->id = ++queue->id_sequence; | ||
| 432 | *packet_id_ptr = htonl(entry->id); | ||
| 433 | |||
| 434 | /* nfnetlink_unicast will either free the nskb or add it to a socket */ | ||
| 435 | err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); | ||
| 436 | if (err < 0) { | ||
| 437 | queue->queue_user_dropped++; | ||
| 438 | goto err_out_unlock; | ||
| 439 | } | ||
| 440 | |||
| 441 | __enqueue_entry(queue, entry); | ||
| 442 | |||
| 443 | spin_unlock_bh(&queue->lock); | ||
| 444 | return 0; | ||
| 445 | |||
| 446 | err_out_free_nskb: | ||
| 447 | kfree_skb(nskb); | ||
| 448 | err_out_unlock: | ||
| 449 | spin_unlock_bh(&queue->lock); | ||
| 450 | err_out: | ||
| 451 | return err; | ||
| 452 | } | ||
| 453 | |||
| 454 | static int | ||
| 455 | nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e) | ||
| 456 | { | ||
| 457 | struct sk_buff *nskb; | ||
| 458 | int diff; | ||
| 459 | |||
| 460 | diff = data_len - e->skb->len; | ||
| 461 | if (diff < 0) { | ||
| 462 | if (pskb_trim(e->skb, data_len)) | ||
| 463 | return -ENOMEM; | ||
| 464 | } else if (diff > 0) { | ||
| 465 | if (data_len > 0xFFFF) | ||
| 466 | return -EINVAL; | ||
| 467 | if (diff > skb_tailroom(e->skb)) { | ||
| 468 | nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), | ||
| 469 | diff, GFP_ATOMIC); | ||
| 470 | if (!nskb) { | ||
| 471 | printk(KERN_WARNING "nf_queue: OOM " | ||
| 472 | "in mangle, dropping packet\n"); | ||
| 473 | return -ENOMEM; | ||
| 474 | } | ||
| 475 | kfree_skb(e->skb); | ||
| 476 | e->skb = nskb; | ||
| 477 | } | ||
| 478 | skb_put(e->skb, diff); | ||
| 479 | } | ||
| 480 | if (!skb_make_writable(e->skb, data_len)) | ||
| 481 | return -ENOMEM; | ||
| 482 | skb_copy_to_linear_data(e->skb, data, data_len); | ||
| 483 | e->skb->ip_summed = CHECKSUM_NONE; | ||
| 484 | return 0; | ||
| 485 | } | ||
| 486 | |||
| 487 | static int | ||
| 488 | nfqnl_set_mode(struct nfqnl_instance *queue, | ||
| 489 | unsigned char mode, unsigned int range) | ||
| 490 | { | ||
| 491 | int status = 0; | ||
| 492 | |||
| 493 | spin_lock_bh(&queue->lock); | ||
| 494 | switch (mode) { | ||
| 495 | case NFQNL_COPY_NONE: | ||
| 496 | case NFQNL_COPY_META: | ||
| 497 | queue->copy_mode = mode; | ||
| 498 | queue->copy_range = 0; | ||
| 499 | break; | ||
| 500 | |||
| 501 | case NFQNL_COPY_PACKET: | ||
| 502 | queue->copy_mode = mode; | ||
| 503 | /* we're using struct nlattr which has 16bit nla_len */ | ||
| 504 | if (range > 0xffff) | ||
| 505 | queue->copy_range = 0xffff; | ||
| 506 | else | ||
| 507 | queue->copy_range = range; | ||
| 508 | break; | ||
| 509 | |||
| 510 | default: | ||
| 511 | status = -EINVAL; | ||
| 512 | |||
| 513 | } | ||
| 514 | spin_unlock_bh(&queue->lock); | ||
| 515 | |||
| 516 | return status; | ||
| 517 | } | ||
| 518 | |||
| 519 | static int | ||
| 520 | dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) | ||
| 521 | { | ||
| 522 | if (entry->indev) | ||
| 523 | if (entry->indev->ifindex == ifindex) | ||
| 524 | return 1; | ||
| 525 | if (entry->outdev) | ||
| 526 | if (entry->outdev->ifindex == ifindex) | ||
| 527 | return 1; | ||
| 528 | #ifdef CONFIG_BRIDGE_NETFILTER | ||
| 529 | if (entry->skb->nf_bridge) { | ||
| 530 | if (entry->skb->nf_bridge->physindev && | ||
| 531 | entry->skb->nf_bridge->physindev->ifindex == ifindex) | ||
| 532 | return 1; | ||
| 533 | if (entry->skb->nf_bridge->physoutdev && | ||
| 534 | entry->skb->nf_bridge->physoutdev->ifindex == ifindex) | ||
| 535 | return 1; | ||
| 536 | } | ||
| 537 | #endif | ||
| 538 | return 0; | ||
| 539 | } | ||
| 540 | |||
| 541 | /* drop all packets with either indev or outdev == ifindex from all queue | ||
| 542 | * instances */ | ||
| 543 | static void | ||
| 544 | nfqnl_dev_drop(int ifindex) | ||
| 545 | { | ||
| 546 | int i; | ||
| 547 | |||
| 548 | rcu_read_lock(); | ||
| 549 | |||
| 550 | for (i = 0; i < INSTANCE_BUCKETS; i++) { | ||
| 551 | struct hlist_node *tmp; | ||
| 552 | struct nfqnl_instance *inst; | ||
| 553 | struct hlist_head *head = &instance_table[i]; | ||
| 554 | |||
| 555 | hlist_for_each_entry_rcu(inst, tmp, head, hlist) | ||
| 556 | nfqnl_flush(inst, dev_cmp, ifindex); | ||
| 557 | } | ||
| 558 | |||
| 559 | rcu_read_unlock(); | ||
| 560 | } | ||
| 561 | |||
| 562 | #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) | ||
| 563 | |||
| 564 | static int | ||
| 565 | nfqnl_rcv_dev_event(struct notifier_block *this, | ||
| 566 | unsigned long event, void *ptr) | ||
| 567 | { | ||
| 568 | struct net_device *dev = ptr; | ||
| 569 | |||
| 570 | if (!net_eq(dev_net(dev), &init_net)) | ||
| 571 | return NOTIFY_DONE; | ||
| 572 | |||
| 573 | /* Drop any packets associated with the downed device */ | ||
| 574 | if (event == NETDEV_DOWN) | ||
| 575 | nfqnl_dev_drop(dev->ifindex); | ||
| 576 | return NOTIFY_DONE; | ||
| 577 | } | ||
| 578 | |||
| 579 | static struct notifier_block nfqnl_dev_notifier = { | ||
| 580 | .notifier_call = nfqnl_rcv_dev_event, | ||
| 581 | }; | ||
| 582 | |||
| 583 | static int | ||
| 584 | nfqnl_rcv_nl_event(struct notifier_block *this, | ||
| 585 | unsigned long event, void *ptr) | ||
| 586 | { | ||
| 587 | struct netlink_notify *n = ptr; | ||
| 588 | |||
| 589 | if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { | ||
| 590 | int i; | ||
| 591 | |||
| 592 | /* destroy all instances for this pid */ | ||
| 593 | spin_lock(&instances_lock); | ||
| 594 | for (i = 0; i < INSTANCE_BUCKETS; i++) { | ||
| 595 | struct hlist_node *tmp, *t2; | ||
| 596 | struct nfqnl_instance *inst; | ||
| 597 | struct hlist_head *head = &instance_table[i]; | ||
| 598 | |||
| 599 | hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { | ||
| 600 | if ((n->net == &init_net) && | ||
| 601 | (n->pid == inst->peer_pid)) | ||
| 602 | __instance_destroy(inst); | ||
| 603 | } | ||
| 604 | } | ||
| 605 | spin_unlock(&instances_lock); | ||
| 606 | } | ||
| 607 | return NOTIFY_DONE; | ||
| 608 | } | ||
| 609 | |||
| 610 | static struct notifier_block nfqnl_rtnl_notifier = { | ||
| 611 | .notifier_call = nfqnl_rcv_nl_event, | ||
| 612 | }; | ||
| 613 | |||
| 614 | static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { | ||
| 615 | [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, | ||
| 616 | [NFQA_MARK] = { .type = NLA_U32 }, | ||
| 617 | [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, | ||
| 618 | }; | ||
| 619 | |||
| 620 | static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { | ||
| 621 | [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, | ||
| 622 | [NFQA_MARK] = { .type = NLA_U32 }, | ||
| 623 | }; | ||
| 624 | |||
| 625 | static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) | ||
| 626 | { | ||
| 627 | struct nfqnl_instance *queue; | ||
| 628 | |||
| 629 | queue = instance_lookup(queue_num); | ||
| 630 | if (!queue) | ||
| 631 | return ERR_PTR(-ENODEV); | ||
| 632 | |||
| 633 | if (queue->peer_pid != nlpid) | ||
| 634 | return ERR_PTR(-EPERM); | ||
| 635 | |||
| 636 | return queue; | ||
| 637 | } | ||
| 638 | |||
| 639 | static struct nfqnl_msg_verdict_hdr* | ||
| 640 | verdicthdr_get(const struct nlattr * const nfqa[]) | ||
| 641 | { | ||
| 642 | struct nfqnl_msg_verdict_hdr *vhdr; | ||
| 643 | unsigned int verdict; | ||
| 644 | |||
| 645 | if (!nfqa[NFQA_VERDICT_HDR]) | ||
| 646 | return NULL; | ||
| 647 | |||
| 648 | vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]); | ||
| 649 | verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK; | ||
| 650 | if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN) | ||
| 651 | return NULL; | ||
| 652 | return vhdr; | ||
| 653 | } | ||
| 654 | |||
| 655 | static int nfq_id_after(unsigned int id, unsigned int max) | ||
| 656 | { | ||
| 657 | return (int)(id - max) > 0; | ||
| 658 | } | ||
| 659 | |||
| 660 | static int | ||
| 661 | nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, | ||
| 662 | const struct nlmsghdr *nlh, | ||
| 663 | const struct nlattr * const nfqa[]) | ||
| 664 | { | ||
| 665 | struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); | ||
| 666 | struct nf_queue_entry *entry, *tmp; | ||
| 667 | unsigned int verdict, maxid; | ||
| 668 | struct nfqnl_msg_verdict_hdr *vhdr; | ||
| 669 | struct nfqnl_instance *queue; | ||
| 670 | LIST_HEAD(batch_list); | ||
| 671 | u16 queue_num = ntohs(nfmsg->res_id); | ||
| 672 | |||
| 673 | queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); | ||
| 674 | if (IS_ERR(queue)) | ||
| 675 | return PTR_ERR(queue); | ||
| 676 | |||
| 677 | vhdr = verdicthdr_get(nfqa); | ||
| 678 | if (!vhdr) | ||
| 679 | return -EINVAL; | ||
| 680 | |||
| 681 | verdict = ntohl(vhdr->verdict); | ||
| 682 | maxid = ntohl(vhdr->id); | ||
| 683 | |||
| 684 | spin_lock_bh(&queue->lock); | ||
| 685 | |||
| 686 | list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) { | ||
| 687 | if (nfq_id_after(entry->id, maxid)) | ||
| 688 | break; | ||
| 689 | __dequeue_entry(queue, entry); | ||
| 690 | list_add_tail(&entry->list, &batch_list); | ||
| 691 | } | ||
| 692 | |||
| 693 | spin_unlock_bh(&queue->lock); | ||
| 694 | |||
| 695 | if (list_empty(&batch_list)) | ||
| 696 | return -ENOENT; | ||
| 697 | |||
| 698 | list_for_each_entry_safe(entry, tmp, &batch_list, list) { | ||
| 699 | if (nfqa[NFQA_MARK]) | ||
| 700 | entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); | ||
| 701 | nf_reinject(entry, verdict); | ||
| 702 | } | ||
| 703 | return 0; | ||
| 704 | } | ||
| 705 | |||
| 706 | static int | ||
| 707 | nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, | ||
| 708 | const struct nlmsghdr *nlh, | ||
| 709 | const struct nlattr * const nfqa[]) | ||
| 710 | { | ||
| 711 | struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); | ||
| 712 | u_int16_t queue_num = ntohs(nfmsg->res_id); | ||
| 713 | |||
| 714 | struct nfqnl_msg_verdict_hdr *vhdr; | ||
| 715 | struct nfqnl_instance *queue; | ||
| 716 | unsigned int verdict; | ||
| 717 | struct nf_queue_entry *entry; | ||
| 718 | |||
| 719 | queue = instance_lookup(queue_num); | ||
| 720 | if (!queue) | ||
| 721 | |||
| 722 | queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); | ||
| 723 | if (IS_ERR(queue)) | ||
| 724 | return PTR_ERR(queue); | ||
| 725 | |||
| 726 | vhdr = verdicthdr_get(nfqa); | ||
| 727 | if (!vhdr) | ||
| 728 | return -EINVAL; | ||
| 729 | |||
| 730 | verdict = ntohl(vhdr->verdict); | ||
| 731 | |||
| 732 | entry = find_dequeue_entry(queue, ntohl(vhdr->id)); | ||
| 733 | if (entry == NULL) | ||
| 734 | return -ENOENT; | ||
| 735 | |||
| 736 | if (nfqa[NFQA_PAYLOAD]) { | ||
| 737 | if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), | ||
| 738 | nla_len(nfqa[NFQA_PAYLOAD]), entry) < 0) | ||
| 739 | verdict = NF_DROP; | ||
| 740 | } | ||
| 741 | |||
| 742 | if (nfqa[NFQA_MARK]) | ||
| 743 | entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); | ||
| 744 | |||
| 745 | nf_reinject(entry, verdict); | ||
| 746 | return 0; | ||
| 747 | } | ||
| 748 | |||
| 749 | static int | ||
| 750 | nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, | ||
| 751 | const struct nlmsghdr *nlh, | ||
| 752 | const struct nlattr * const nfqa[]) | ||
| 753 | { | ||
| 754 | return -ENOTSUPP; | ||
| 755 | } | ||
| 756 | |||
| 757 | static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { | ||
| 758 | [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, | ||
| 759 | [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, | ||
| 760 | }; | ||
| 761 | |||
| 762 | static const struct nf_queue_handler nfqh = { | ||
| 763 | .name = "nf_queue", | ||
| 764 | .outfn = &nfqnl_enqueue_packet, | ||
| 765 | }; | ||
| 766 | |||
| 767 | static int | ||
| 768 | nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, | ||
| 769 | const struct nlmsghdr *nlh, | ||
| 770 | const struct nlattr * const nfqa[]) | ||
| 771 | { | ||
| 772 | struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); | ||
| 773 | u_int16_t queue_num = ntohs(nfmsg->res_id); | ||
| 774 | struct nfqnl_instance *queue; | ||
| 775 | struct nfqnl_msg_config_cmd *cmd = NULL; | ||
| 776 | int ret = 0; | ||
| 777 | |||
| 778 | if (nfqa[NFQA_CFG_CMD]) { | ||
| 779 | cmd = nla_data(nfqa[NFQA_CFG_CMD]); | ||
| 780 | |||
| 781 | /* Commands without queue context - might sleep */ | ||
| 782 | switch (cmd->command) { | ||
| 783 | case NFQNL_CFG_CMD_PF_BIND: | ||
| 784 | return nf_register_queue_handler(ntohs(cmd->pf), | ||
| 785 | &nfqh); | ||
| 786 | case NFQNL_CFG_CMD_PF_UNBIND: | ||
| 787 | return nf_unregister_queue_handler(ntohs(cmd->pf), | ||
| 788 | &nfqh); | ||
| 789 | } | ||
| 790 | } | ||
| 791 | |||
| 792 | rcu_read_lock(); | ||
| 793 | queue = instance_lookup(queue_num); | ||
| 794 | if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { | ||
| 795 | ret = -EPERM; | ||
| 796 | goto err_out_unlock; | ||
| 797 | } | ||
| 798 | |||
| 799 | if (cmd != NULL) { | ||
| 800 | switch (cmd->command) { | ||
| 801 | case NFQNL_CFG_CMD_BIND: | ||
| 802 | if (queue) { | ||
| 803 | ret = -EBUSY; | ||
| 804 | goto err_out_unlock; | ||
| 805 | } | ||
| 806 | queue = instance_create(queue_num, NETLINK_CB(skb).pid); | ||
| 807 | if (IS_ERR(queue)) { | ||
| 808 | ret = PTR_ERR(queue); | ||
| 809 | goto err_out_unlock; | ||
| 810 | } | ||
| 811 | break; | ||
| 812 | case NFQNL_CFG_CMD_UNBIND: | ||
| 813 | if (!queue) { | ||
| 814 | ret = -ENODEV; | ||
| 815 | goto err_out_unlock; | ||
| 816 | } | ||
| 817 | instance_destroy(queue); | ||
| 818 | break; | ||
| 819 | case NFQNL_CFG_CMD_PF_BIND: | ||
| 820 | case NFQNL_CFG_CMD_PF_UNBIND: | ||
| 821 | break; | ||
| 822 | default: | ||
| 823 | ret = -ENOTSUPP; | ||
| 824 | break; | ||
| 825 | } | ||
| 826 | } | ||
| 827 | |||
| 828 | if (nfqa[NFQA_CFG_PARAMS]) { | ||
| 829 | struct nfqnl_msg_config_params *params; | ||
| 830 | |||
| 831 | if (!queue) { | ||
| 832 | ret = -ENODEV; | ||
| 833 | goto err_out_unlock; | ||
| 834 | } | ||
| 835 | params = nla_data(nfqa[NFQA_CFG_PARAMS]); | ||
| 836 | nfqnl_set_mode(queue, params->copy_mode, | ||
| 837 | ntohl(params->copy_range)); | ||
| 838 | } | ||
| 839 | |||
| 840 | if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) { | ||
| 841 | __be32 *queue_maxlen; | ||
| 842 | |||
| 843 | if (!queue) { | ||
| 844 | ret = -ENODEV; | ||
| 845 | goto err_out_unlock; | ||
| 846 | } | ||
| 847 | queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]); | ||
| 848 | spin_lock_bh(&queue->lock); | ||
| 849 | queue->queue_maxlen = ntohl(*queue_maxlen); | ||
| 850 | spin_unlock_bh(&queue->lock); | ||
| 851 | } | ||
| 852 | |||
| 853 | err_out_unlock: | ||
| 854 | rcu_read_unlock(); | ||
| 855 | return ret; | ||
| 856 | } | ||
| 857 | |||
| 858 | static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { | ||
| 859 | [NFQNL_MSG_PACKET] = { .call_rcu = nfqnl_recv_unsupp, | ||
| 860 | .attr_count = NFQA_MAX, }, | ||
| 861 | [NFQNL_MSG_VERDICT] = { .call_rcu = nfqnl_recv_verdict, | ||
| 862 | .attr_count = NFQA_MAX, | ||
| 863 | .policy = nfqa_verdict_policy }, | ||
| 864 | [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, | ||
| 865 | .attr_count = NFQA_CFG_MAX, | ||
| 866 | .policy = nfqa_cfg_policy }, | ||
| 867 | [NFQNL_MSG_VERDICT_BATCH]={ .call_rcu = nfqnl_recv_verdict_batch, | ||
| 868 | .attr_count = NFQA_MAX, | ||
| 869 | .policy = nfqa_verdict_batch_policy }, | ||
| 870 | }; | ||
| 871 | |||
| 872 | static const struct nfnetlink_subsystem nfqnl_subsys = { | ||
| 873 | .name = "nf_queue", | ||
| 874 | .subsys_id = NFNL_SUBSYS_QUEUE, | ||
| 875 | .cb_count = NFQNL_MSG_MAX, | ||
| 876 | .cb = nfqnl_cb, | ||
| 877 | }; | ||
| 878 | |||
| 879 | #ifdef CONFIG_PROC_FS | ||
| 880 | struct iter_state { | ||
| 881 | unsigned int bucket; | ||
| 882 | }; | ||
| 883 | |||
| 884 | static struct hlist_node *get_first(struct seq_file *seq) | ||
| 885 | { | ||
| 886 | struct iter_state *st = seq->private; | ||
| 887 | |||
| 888 | if (!st) | ||
| 889 | return NULL; | ||
| 890 | |||
| 891 | for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { | ||
| 892 | if (!hlist_empty(&instance_table[st->bucket])) | ||
| 893 | return instance_table[st->bucket].first; | ||
| 894 | } | ||
| 895 | return NULL; | ||
| 896 | } | ||
| 897 | |||
| 898 | static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) | ||
| 899 | { | ||
| 900 | struct iter_state *st = seq->private; | ||
| 901 | |||
| 902 | h = h->next; | ||
| 903 | while (!h) { | ||
| 904 | if (++st->bucket >= INSTANCE_BUCKETS) | ||
| 905 | return NULL; | ||
| 906 | |||
| 907 | h = instance_table[st->bucket].first; | ||
| 908 | } | ||
| 909 | return h; | ||
| 910 | } | ||
| 911 | |||
| 912 | static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) | ||
| 913 | { | ||
| 914 | struct hlist_node *head; | ||
| 915 | head = get_first(seq); | ||
| 916 | |||
| 917 | if (head) | ||
| 918 | while (pos && (head = get_next(seq, head))) | ||
| 919 | pos--; | ||
| 920 | return pos ? NULL : head; | ||
| 921 | } | ||
| 922 | |||
| 923 | static void *seq_start(struct seq_file *seq, loff_t *pos) | ||
| 924 | __acquires(instances_lock) | ||
| 925 | { | ||
| 926 | spin_lock(&instances_lock); | ||
| 927 | return get_idx(seq, *pos); | ||
| 928 | } | ||
| 929 | |||
| 930 | static void *seq_next(struct seq_file *s, void *v, loff_t *pos) | ||
| 931 | { | ||
| 932 | (*pos)++; | ||
| 933 | return get_next(s, v); | ||
| 934 | } | ||
| 935 | |||
| 936 | static void seq_stop(struct seq_file *s, void *v) | ||
| 937 | __releases(instances_lock) | ||
| 938 | { | ||
| 939 | spin_unlock(&instances_lock); | ||
| 940 | } | ||
| 941 | |||
| 942 | static int seq_show(struct seq_file *s, void *v) | ||
| 943 | { | ||
| 944 | const struct nfqnl_instance *inst = v; | ||
| 945 | |||
| 946 | return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", | ||
| 947 | inst->queue_num, | ||
| 948 | inst->peer_pid, inst->queue_total, | ||
| 949 | inst->copy_mode, inst->copy_range, | ||
| 950 | inst->queue_dropped, inst->queue_user_dropped, | ||
| 951 | inst->id_sequence, 1); | ||
| 952 | } | ||
| 953 | |||
| 954 | static const struct seq_operations nfqnl_seq_ops = { | ||
| 955 | .start = seq_start, | ||
| 956 | .next = seq_next, | ||
| 957 | .stop = seq_stop, | ||
| 958 | .show = seq_show, | ||
| 959 | }; | ||
| 960 | |||
| 961 | static int nfqnl_open(struct inode *inode, struct file *file) | ||
| 962 | { | ||
| 963 | return seq_open_private(file, &nfqnl_seq_ops, | ||
| 964 | sizeof(struct iter_state)); | ||
| 965 | } | ||
| 966 | |||
| 967 | static const struct file_operations nfqnl_file_ops = { | ||
| 968 | .owner = THIS_MODULE, | ||
| 969 | .open = nfqnl_open, | ||
| 970 | .read = seq_read, | ||
| 971 | .llseek = seq_lseek, | ||
| 972 | .release = seq_release_private, | ||
| 973 | }; | ||
| 974 | |||
| 975 | #endif /* PROC_FS */ | ||
| 976 | |||
| 977 | static int __init nfnetlink_queue_init(void) | ||
| 978 | { | ||
| 979 | int i, status = -ENOMEM; | ||
| 980 | |||
| 981 | for (i = 0; i < INSTANCE_BUCKETS; i++) | ||
| 982 | INIT_HLIST_HEAD(&instance_table[i]); | ||
| 983 | |||
| 984 | netlink_register_notifier(&nfqnl_rtnl_notifier); | ||
| 985 | status = nfnetlink_subsys_register(&nfqnl_subsys); | ||
| 986 | if (status < 0) { | ||
| 987 | printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); | ||
| 988 | goto cleanup_netlink_notifier; | ||
| 989 | } | ||
| 990 | |||
| 991 | #ifdef CONFIG_PROC_FS | ||
| 992 | if (!proc_create("nfnetlink_queue", 0440, | ||
| 993 | proc_net_netfilter, &nfqnl_file_ops)) | ||
| 994 | goto cleanup_subsys; | ||
| 995 | #endif | ||
| 996 | |||
| 997 | register_netdevice_notifier(&nfqnl_dev_notifier); | ||
| 998 | return status; | ||
| 999 | |||
| 1000 | #ifdef CONFIG_PROC_FS | ||
| 1001 | cleanup_subsys: | ||
| 1002 | nfnetlink_subsys_unregister(&nfqnl_subsys); | ||
| 1003 | #endif | ||
| 1004 | cleanup_netlink_notifier: | ||
| 1005 | netlink_unregister_notifier(&nfqnl_rtnl_notifier); | ||
| 1006 | return status; | ||
| 1007 | } | ||
| 1008 | |||
| 1009 | static void __exit nfnetlink_queue_fini(void) | ||
| 1010 | { | ||
| 1011 | nf_unregister_queue_handlers(&nfqh); | ||
| 1012 | unregister_netdevice_notifier(&nfqnl_dev_notifier); | ||
| 1013 | #ifdef CONFIG_PROC_FS | ||
| 1014 | remove_proc_entry("nfnetlink_queue", proc_net_netfilter); | ||
| 1015 | #endif | ||
| 1016 | nfnetlink_subsys_unregister(&nfqnl_subsys); | ||
| 1017 | netlink_unregister_notifier(&nfqnl_rtnl_notifier); | ||
| 1018 | |||
| 1019 | rcu_barrier(); /* Wait for completion of call_rcu()'s */ | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | MODULE_DESCRIPTION("netfilter packet queue handler"); | ||
| 1023 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
| 1024 | MODULE_LICENSE("GPL"); | ||
| 1025 | MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE); | ||
| 1026 | |||
| 1027 | module_init(nfnetlink_queue_init); | ||
| 1028 | module_exit(nfnetlink_queue_fini); | ||
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c new file mode 100644 index 00000000000..9d782181b6c --- /dev/null +++ b/net/netfilter/xt_NOTRACK.c | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | /* This is a module which is used for setting up fake conntracks | ||
| 2 | * on packets so that they are not seen by the conntrack/NAT code. | ||
| 3 | */ | ||
| 4 | #include <linux/module.h> | ||
| 5 | #include <linux/skbuff.h> | ||
| 6 | |||
| 7 | #include <linux/netfilter/x_tables.h> | ||
| 8 | #include <net/netfilter/nf_conntrack.h> | ||
| 9 | |||
| 10 | MODULE_DESCRIPTION("Xtables: Disabling connection tracking for packets"); | ||
| 11 | MODULE_LICENSE("GPL"); | ||
| 12 | MODULE_ALIAS("ipt_NOTRACK"); | ||
| 13 | MODULE_ALIAS("ip6t_NOTRACK"); | ||
| 14 | |||
| 15 | static unsigned int | ||
| 16 | notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) | ||
| 17 | { | ||
| 18 | /* Previously seen (loopback)? Ignore. */ | ||
| 19 | if (skb->nfct != NULL) | ||
| 20 | return XT_CONTINUE; | ||
| 21 | |||
| 22 | /* Attach fake conntrack entry. | ||
| 23 | If there is a real ct entry correspondig to this packet, | ||
| 24 | it'll hang aroun till timing out. We don't deal with it | ||
| 25 | for performance reasons. JK */ | ||
| 26 | skb->nfct = &nf_ct_untracked_get()->ct_general; | ||
| 27 | skb->nfctinfo = IP_CT_NEW; | ||
| 28 | nf_conntrack_get(skb->nfct); | ||
| 29 | |||
| 30 | return XT_CONTINUE; | ||
| 31 | } | ||
| 32 | |||
| 33 | static struct xt_target notrack_tg_reg __read_mostly = { | ||
| 34 | .name = "NOTRACK", | ||
| 35 | .revision = 0, | ||
| 36 | .family = NFPROTO_UNSPEC, | ||
| 37 | .target = notrack_tg, | ||
| 38 | .table = "raw", | ||
| 39 | .me = THIS_MODULE, | ||
| 40 | }; | ||
| 41 | |||
| 42 | static int __init notrack_tg_init(void) | ||
| 43 | { | ||
| 44 | return xt_register_target(¬rack_tg_reg); | ||
| 45 | } | ||
| 46 | |||
| 47 | static void __exit notrack_tg_exit(void) | ||
| 48 | { | ||
| 49 | xt_unregister_target(¬rack_tg_reg); | ||
| 50 | } | ||
| 51 | |||
| 52 | module_init(notrack_tg_init); | ||
| 53 | module_exit(notrack_tg_exit); | ||
diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c new file mode 100644 index 00000000000..08086d680c2 --- /dev/null +++ b/net/netfilter/xt_qtaguid.c | |||
| @@ -0,0 +1,2785 @@ | |||
| 1 | /* | ||
| 2 | * Kernel iptables module to track stats for packets based on user tags. | ||
| 3 | * | ||
| 4 | * (C) 2011 Google, Inc | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License version 2 as | ||
| 8 | * published by the Free Software Foundation. | ||
| 9 | */ | ||
| 10 | |||
| 11 | /* | ||
| 12 | * There are run-time debug flags enabled via the debug_mask module param, or | ||
| 13 | * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h. | ||
| 14 | */ | ||
| 15 | #define DEBUG | ||
| 16 | |||
| 17 | #include <linux/file.h> | ||
| 18 | #include <linux/inetdevice.h> | ||
| 19 | #include <linux/module.h> | ||
| 20 | #include <linux/netfilter/x_tables.h> | ||
| 21 | #include <linux/netfilter/xt_qtaguid.h> | ||
| 22 | #include <linux/skbuff.h> | ||
| 23 | #include <linux/workqueue.h> | ||
| 24 | #include <net/addrconf.h> | ||
| 25 | #include <net/sock.h> | ||
| 26 | #include <net/tcp.h> | ||
| 27 | #include <net/udp.h> | ||
| 28 | |||
| 29 | #include <linux/netfilter/xt_socket.h> | ||
| 30 | #include "xt_qtaguid_internal.h" | ||
| 31 | #include "xt_qtaguid_print.h" | ||
| 32 | |||
| 33 | /* | ||
| 34 | * We only use the xt_socket funcs within a similar context to avoid unexpected | ||
| 35 | * return values. | ||
| 36 | */ | ||
| 37 | #define XT_SOCKET_SUPPORTED_HOOKS \ | ||
| 38 | ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) | ||
| 39 | |||
| 40 | |||
| 41 | static const char *module_procdirname = "xt_qtaguid"; | ||
| 42 | static struct proc_dir_entry *xt_qtaguid_procdir; | ||
| 43 | |||
| 44 | static unsigned int proc_iface_perms = S_IRUGO; | ||
| 45 | module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR); | ||
| 46 | |||
| 47 | static struct proc_dir_entry *xt_qtaguid_stats_file; | ||
| 48 | static unsigned int proc_stats_perms = S_IRUGO; | ||
| 49 | module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR); | ||
| 50 | |||
| 51 | static struct proc_dir_entry *xt_qtaguid_ctrl_file; | ||
| 52 | #ifdef CONFIG_ANDROID_PARANOID_NETWORK | ||
| 53 | static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO; | ||
| 54 | #else | ||
| 55 | static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR; | ||
| 56 | #endif | ||
| 57 | module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR); | ||
| 58 | |||
| 59 | #ifdef CONFIG_ANDROID_PARANOID_NETWORK | ||
| 60 | #include <linux/android_aid.h> | ||
| 61 | static gid_t proc_stats_readall_gid = AID_NET_BW_STATS; | ||
| 62 | static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT; | ||
| 63 | #else | ||
| 64 | /* 0 means, don't limit anybody */ | ||
| 65 | static gid_t proc_stats_readall_gid; | ||
| 66 | static gid_t proc_ctrl_write_gid; | ||
| 67 | #endif | ||
| 68 | module_param_named(stats_readall_gid, proc_stats_readall_gid, uint, | ||
| 69 | S_IRUGO | S_IWUSR); | ||
| 70 | module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint, | ||
| 71 | S_IRUGO | S_IWUSR); | ||
| 72 | |||
| 73 | /* | ||
| 74 | * Limit the number of active tags (via socket tags) for a given UID. | ||
| 75 | * Multiple processes could share the UID. | ||
| 76 | */ | ||
| 77 | static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS; | ||
| 78 | module_param(max_sock_tags, int, S_IRUGO | S_IWUSR); | ||
| 79 | |||
| 80 | /* | ||
| 81 | * After the kernel has initiallized this module, it is still possible | ||
| 82 | * to make it passive. | ||
| 83 | * Setting passive to Y: | ||
| 84 | * - the iface stats handling will not act on notifications. | ||
| 85 | * - iptables matches will never match. | ||
| 86 | * - ctrl commands silently succeed. | ||
| 87 | * - stats are always empty. | ||
| 88 | * This is mostly usefull when a bug is suspected. | ||
| 89 | */ | ||
| 90 | static bool module_passive; | ||
| 91 | module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR); | ||
| 92 | |||
| 93 | /* | ||
| 94 | * Control how qtaguid data is tracked per proc/uid. | ||
| 95 | * Setting tag_tracking_passive to Y: | ||
| 96 | * - don't create proc specific structs to track tags | ||
| 97 | * - don't check that active tag stats exceed some limits. | ||
| 98 | * - don't clean up socket tags on process exits. | ||
| 99 | * This is mostly usefull when a bug is suspected. | ||
| 100 | */ | ||
| 101 | static bool qtu_proc_handling_passive; | ||
| 102 | module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool, | ||
| 103 | S_IRUGO | S_IWUSR); | ||
| 104 | |||
| 105 | #define QTU_DEV_NAME "xt_qtaguid" | ||
| 106 | |||
| 107 | uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK; | ||
| 108 | module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR); | ||
| 109 | |||
| 110 | /*---------------------------------------------------------------------------*/ | ||
| 111 | static const char *iface_stat_procdirname = "iface_stat"; | ||
| 112 | static struct proc_dir_entry *iface_stat_procdir; | ||
| 113 | static const char *iface_stat_all_procfilename = "iface_stat_all"; | ||
| 114 | static struct proc_dir_entry *iface_stat_all_procfile; | ||
| 115 | |||
| 116 | /* | ||
| 117 | * Ordering of locks: | ||
| 118 | * outer locks: | ||
| 119 | * iface_stat_list_lock | ||
| 120 | * sock_tag_list_lock | ||
| 121 | * inner locks: | ||
| 122 | * uid_tag_data_tree_lock | ||
| 123 | * tag_counter_set_list_lock | ||
| 124 | * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock | ||
| 125 | * is acquired. | ||
| 126 | * | ||
| 127 | * Call tree with all lock holders as of 2011-09-25: | ||
| 128 | * | ||
| 129 | * iface_stat_all_proc_read() | ||
| 130 | * iface_stat_list_lock | ||
| 131 | * (struct iface_stat) | ||
| 132 | * | ||
| 133 | * qtaguid_ctrl_proc_read() | ||
| 134 | * sock_tag_list_lock | ||
| 135 | * (sock_tag_tree) | ||
| 136 | * (struct proc_qtu_data->sock_tag_list) | ||
| 137 | * prdebug_full_state() | ||
| 138 | * sock_tag_list_lock | ||
| 139 | * (sock_tag_tree) | ||
| 140 | * uid_tag_data_tree_lock | ||
| 141 | * (uid_tag_data_tree) | ||
| 142 | * (proc_qtu_data_tree) | ||
| 143 | * iface_stat_list_lock | ||
| 144 | * | ||
| 145 | * qtaguid_stats_proc_read() | ||
| 146 | * iface_stat_list_lock | ||
| 147 | * struct iface_stat->tag_stat_list_lock | ||
| 148 | * | ||
| 149 | * qtudev_open() | ||
| 150 | * uid_tag_data_tree_lock | ||
| 151 | * | ||
| 152 | * qtudev_release() | ||
| 153 | * sock_tag_data_list_lock | ||
| 154 | * uid_tag_data_tree_lock | ||
| 155 | * prdebug_full_state() | ||
| 156 | * sock_tag_list_lock | ||
| 157 | * uid_tag_data_tree_lock | ||
| 158 | * iface_stat_list_lock | ||
| 159 | * | ||
| 160 | * iface_netdev_event_handler() | ||
| 161 | * iface_stat_create() | ||
| 162 | * iface_stat_list_lock | ||
| 163 | * iface_stat_update() | ||
| 164 | * iface_stat_list_lock | ||
| 165 | * | ||
| 166 | * iface_inetaddr_event_handler() | ||
| 167 | * iface_stat_create() | ||
| 168 | * iface_stat_list_lock | ||
| 169 | * iface_stat_update() | ||
| 170 | * iface_stat_list_lock | ||
| 171 | * | ||
| 172 | * iface_inet6addr_event_handler() | ||
| 173 | * iface_stat_create_ipv6() | ||
| 174 | * iface_stat_list_lock | ||
| 175 | * iface_stat_update() | ||
| 176 | * iface_stat_list_lock | ||
| 177 | * | ||
| 178 | * qtaguid_mt() | ||
| 179 | * account_for_uid() | ||
| 180 | * if_tag_stat_update() | ||
| 181 | * get_sock_stat() | ||
| 182 | * sock_tag_list_lock | ||
| 183 | * struct iface_stat->tag_stat_list_lock | ||
| 184 | * tag_stat_update() | ||
| 185 | * get_active_counter_set() | ||
| 186 | * tag_counter_set_list_lock | ||
| 187 | * tag_stat_update() | ||
| 188 | * get_active_counter_set() | ||
| 189 | * tag_counter_set_list_lock | ||
| 190 | * | ||
| 191 | * | ||
| 192 | * qtaguid_ctrl_parse() | ||
| 193 | * ctrl_cmd_delete() | ||
| 194 | * sock_tag_list_lock | ||
| 195 | * tag_counter_set_list_lock | ||
| 196 | * iface_stat_list_lock | ||
| 197 | * struct iface_stat->tag_stat_list_lock | ||
| 198 | * uid_tag_data_tree_lock | ||
| 199 | * ctrl_cmd_counter_set() | ||
| 200 | * tag_counter_set_list_lock | ||
| 201 | * ctrl_cmd_tag() | ||
| 202 | * sock_tag_list_lock | ||
| 203 | * (sock_tag_tree) | ||
| 204 | * get_tag_ref() | ||
| 205 | * uid_tag_data_tree_lock | ||
| 206 | * (uid_tag_data_tree) | ||
| 207 | * uid_tag_data_tree_lock | ||
| 208 | * (proc_qtu_data_tree) | ||
| 209 | * ctrl_cmd_untag() | ||
| 210 | * sock_tag_list_lock | ||
| 211 | * uid_tag_data_tree_lock | ||
| 212 | * | ||
| 213 | */ | ||
| 214 | static LIST_HEAD(iface_stat_list); | ||
| 215 | static DEFINE_SPINLOCK(iface_stat_list_lock); | ||
| 216 | |||
| 217 | static struct rb_root sock_tag_tree = RB_ROOT; | ||
| 218 | static DEFINE_SPINLOCK(sock_tag_list_lock); | ||
| 219 | |||
| 220 | static struct rb_root tag_counter_set_tree = RB_ROOT; | ||
| 221 | static DEFINE_SPINLOCK(tag_counter_set_list_lock); | ||
| 222 | |||
| 223 | static struct rb_root uid_tag_data_tree = RB_ROOT; | ||
| 224 | static DEFINE_SPINLOCK(uid_tag_data_tree_lock); | ||
| 225 | |||
| 226 | static struct rb_root proc_qtu_data_tree = RB_ROOT; | ||
| 227 | /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */ | ||
| 228 | |||
| 229 | static struct qtaguid_event_counts qtu_events; | ||
| 230 | /*----------------------------------------------*/ | ||
| 231 | static bool can_manipulate_uids(void) | ||
| 232 | { | ||
| 233 | /* root pwnd */ | ||
| 234 | return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid) | ||
| 235 | || in_egroup_p(proc_ctrl_write_gid); | ||
| 236 | } | ||
| 237 | |||
| 238 | static bool can_impersonate_uid(uid_t uid) | ||
| 239 | { | ||
| 240 | return uid == current_fsuid() || can_manipulate_uids(); | ||
| 241 | } | ||
| 242 | |||
| 243 | static bool can_read_other_uid_stats(uid_t uid) | ||
| 244 | { | ||
| 245 | /* root pwnd */ | ||
| 246 | return unlikely(!current_fsuid()) || uid == current_fsuid() | ||
| 247 | || unlikely(!proc_stats_readall_gid) | ||
| 248 | || in_egroup_p(proc_stats_readall_gid); | ||
| 249 | } | ||
| 250 | |||
| 251 | static inline void dc_add_byte_packets(struct data_counters *counters, int set, | ||
| 252 | enum ifs_tx_rx direction, | ||
| 253 | enum ifs_proto ifs_proto, | ||
| 254 | int bytes, | ||
| 255 | int packets) | ||
| 256 | { | ||
| 257 | counters->bpc[set][direction][ifs_proto].bytes += bytes; | ||
| 258 | counters->bpc[set][direction][ifs_proto].packets += packets; | ||
| 259 | } | ||
| 260 | |||
| 261 | static inline uint64_t dc_sum_bytes(struct data_counters *counters, | ||
| 262 | int set, | ||
| 263 | enum ifs_tx_rx direction) | ||
| 264 | { | ||
| 265 | return counters->bpc[set][direction][IFS_TCP].bytes | ||
| 266 | + counters->bpc[set][direction][IFS_UDP].bytes | ||
| 267 | + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; | ||
| 268 | } | ||
| 269 | |||
| 270 | static inline uint64_t dc_sum_packets(struct data_counters *counters, | ||
| 271 | int set, | ||
| 272 | enum ifs_tx_rx direction) | ||
| 273 | { | ||
| 274 | return counters->bpc[set][direction][IFS_TCP].packets | ||
| 275 | + counters->bpc[set][direction][IFS_UDP].packets | ||
| 276 | + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; | ||
| 277 | } | ||
| 278 | |||
| 279 | static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag) | ||
| 280 | { | ||
| 281 | struct rb_node *node = root->rb_node; | ||
| 282 | |||
| 283 | while (node) { | ||
| 284 | struct tag_node *data = rb_entry(node, struct tag_node, node); | ||
| 285 | int result; | ||
| 286 | RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " | ||
| 287 | " node=%p data=%p\n", tag, node, data); | ||
| 288 | result = tag_compare(tag, data->tag); | ||
| 289 | RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): " | ||
| 290 | " data.tag=0x%llx (uid=%u) res=%d\n", | ||
| 291 | tag, data->tag, get_uid_from_tag(data->tag), result); | ||
| 292 | if (result < 0) | ||
| 293 | node = node->rb_left; | ||
| 294 | else if (result > 0) | ||
| 295 | node = node->rb_right; | ||
| 296 | else | ||
| 297 | return data; | ||
| 298 | } | ||
| 299 | return NULL; | ||
| 300 | } | ||
| 301 | |||
| 302 | static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root) | ||
| 303 | { | ||
| 304 | struct rb_node **new = &(root->rb_node), *parent = NULL; | ||
| 305 | |||
| 306 | /* Figure out where to put new node */ | ||
| 307 | while (*new) { | ||
| 308 | struct tag_node *this = rb_entry(*new, struct tag_node, | ||
| 309 | node); | ||
| 310 | int result = tag_compare(data->tag, this->tag); | ||
| 311 | RB_DEBUG("qtaguid: %s(): tag=0x%llx" | ||
| 312 | " (uid=%u)\n", __func__, | ||
| 313 | this->tag, | ||
| 314 | get_uid_from_tag(this->tag)); | ||
| 315 | parent = *new; | ||
| 316 | if (result < 0) | ||
| 317 | new = &((*new)->rb_left); | ||
| 318 | else if (result > 0) | ||
| 319 | new = &((*new)->rb_right); | ||
| 320 | else | ||
| 321 | BUG(); | ||
| 322 | } | ||
| 323 | |||
| 324 | /* Add new node and rebalance tree. */ | ||
| 325 | rb_link_node(&data->node, parent, new); | ||
| 326 | rb_insert_color(&data->node, root); | ||
| 327 | } | ||
| 328 | |||
| 329 | static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root) | ||
| 330 | { | ||
| 331 | tag_node_tree_insert(&data->tn, root); | ||
| 332 | } | ||
| 333 | |||
| 334 | static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag) | ||
| 335 | { | ||
| 336 | struct tag_node *node = tag_node_tree_search(root, tag); | ||
| 337 | if (!node) | ||
| 338 | return NULL; | ||
| 339 | return rb_entry(&node->node, struct tag_stat, tn.node); | ||
| 340 | } | ||
| 341 | |||
| 342 | static void tag_counter_set_tree_insert(struct tag_counter_set *data, | ||
| 343 | struct rb_root *root) | ||
| 344 | { | ||
| 345 | tag_node_tree_insert(&data->tn, root); | ||
| 346 | } | ||
| 347 | |||
| 348 | static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root, | ||
| 349 | tag_t tag) | ||
| 350 | { | ||
| 351 | struct tag_node *node = tag_node_tree_search(root, tag); | ||
| 352 | if (!node) | ||
| 353 | return NULL; | ||
| 354 | return rb_entry(&node->node, struct tag_counter_set, tn.node); | ||
| 355 | |||
| 356 | } | ||
| 357 | |||
| 358 | static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root) | ||
| 359 | { | ||
| 360 | tag_node_tree_insert(&data->tn, root); | ||
| 361 | } | ||
| 362 | |||
| 363 | static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag) | ||
| 364 | { | ||
| 365 | struct tag_node *node = tag_node_tree_search(root, tag); | ||
| 366 | if (!node) | ||
| 367 | return NULL; | ||
| 368 | return rb_entry(&node->node, struct tag_ref, tn.node); | ||
| 369 | } | ||
| 370 | |||
| 371 | static struct sock_tag *sock_tag_tree_search(struct rb_root *root, | ||
| 372 | const struct sock *sk) | ||
| 373 | { | ||
| 374 | struct rb_node *node = root->rb_node; | ||
| 375 | |||
| 376 | while (node) { | ||
| 377 | struct sock_tag *data = rb_entry(node, struct sock_tag, | ||
| 378 | sock_node); | ||
| 379 | if (sk < data->sk) | ||
| 380 | node = node->rb_left; | ||
| 381 | else if (sk > data->sk) | ||
| 382 | node = node->rb_right; | ||
| 383 | else | ||
| 384 | return data; | ||
| 385 | } | ||
| 386 | return NULL; | ||
| 387 | } | ||
| 388 | |||
| 389 | static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root) | ||
| 390 | { | ||
| 391 | struct rb_node **new = &(root->rb_node), *parent = NULL; | ||
| 392 | |||
| 393 | /* Figure out where to put new node */ | ||
| 394 | while (*new) { | ||
| 395 | struct sock_tag *this = rb_entry(*new, struct sock_tag, | ||
| 396 | sock_node); | ||
| 397 | parent = *new; | ||
| 398 | if (data->sk < this->sk) | ||
| 399 | new = &((*new)->rb_left); | ||
| 400 | else if (data->sk > this->sk) | ||
| 401 | new = &((*new)->rb_right); | ||
| 402 | else | ||
| 403 | BUG(); | ||
| 404 | } | ||
| 405 | |||
| 406 | /* Add new node and rebalance tree. */ | ||
| 407 | rb_link_node(&data->sock_node, parent, new); | ||
| 408 | rb_insert_color(&data->sock_node, root); | ||
| 409 | } | ||
| 410 | |||
| 411 | static void sock_tag_tree_erase(struct rb_root *st_to_free_tree) | ||
| 412 | { | ||
| 413 | struct rb_node *node; | ||
| 414 | struct sock_tag *st_entry; | ||
| 415 | |||
| 416 | node = rb_first(st_to_free_tree); | ||
| 417 | while (node) { | ||
| 418 | st_entry = rb_entry(node, struct sock_tag, sock_node); | ||
| 419 | node = rb_next(node); | ||
| 420 | CT_DEBUG("qtaguid: %s(): " | ||
| 421 | "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__, | ||
| 422 | st_entry->sk, | ||
| 423 | st_entry->tag, | ||
| 424 | get_uid_from_tag(st_entry->tag)); | ||
| 425 | rb_erase(&st_entry->sock_node, st_to_free_tree); | ||
| 426 | sockfd_put(st_entry->socket); | ||
| 427 | kfree(st_entry); | ||
| 428 | } | ||
| 429 | } | ||
| 430 | |||
| 431 | static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root, | ||
| 432 | const pid_t pid) | ||
| 433 | { | ||
| 434 | struct rb_node *node = root->rb_node; | ||
| 435 | |||
| 436 | while (node) { | ||
| 437 | struct proc_qtu_data *data = rb_entry(node, | ||
| 438 | struct proc_qtu_data, | ||
| 439 | node); | ||
| 440 | if (pid < data->pid) | ||
| 441 | node = node->rb_left; | ||
| 442 | else if (pid > data->pid) | ||
| 443 | node = node->rb_right; | ||
| 444 | else | ||
| 445 | return data; | ||
| 446 | } | ||
| 447 | return NULL; | ||
| 448 | } | ||
| 449 | |||
| 450 | static void proc_qtu_data_tree_insert(struct proc_qtu_data *data, | ||
| 451 | struct rb_root *root) | ||
| 452 | { | ||
| 453 | struct rb_node **new = &(root->rb_node), *parent = NULL; | ||
| 454 | |||
| 455 | /* Figure out where to put new node */ | ||
| 456 | while (*new) { | ||
| 457 | struct proc_qtu_data *this = rb_entry(*new, | ||
| 458 | struct proc_qtu_data, | ||
| 459 | node); | ||
| 460 | parent = *new; | ||
| 461 | if (data->pid < this->pid) | ||
| 462 | new = &((*new)->rb_left); | ||
| 463 | else if (data->pid > this->pid) | ||
| 464 | new = &((*new)->rb_right); | ||
| 465 | else | ||
| 466 | BUG(); | ||
| 467 | } | ||
| 468 | |||
| 469 | /* Add new node and rebalance tree. */ | ||
| 470 | rb_link_node(&data->node, parent, new); | ||
| 471 | rb_insert_color(&data->node, root); | ||
| 472 | } | ||
| 473 | |||
| 474 | static void uid_tag_data_tree_insert(struct uid_tag_data *data, | ||
| 475 | struct rb_root *root) | ||
| 476 | { | ||
| 477 | struct rb_node **new = &(root->rb_node), *parent = NULL; | ||
| 478 | |||
| 479 | /* Figure out where to put new node */ | ||
| 480 | while (*new) { | ||
| 481 | struct uid_tag_data *this = rb_entry(*new, | ||
| 482 | struct uid_tag_data, | ||
| 483 | node); | ||
| 484 | parent = *new; | ||
| 485 | if (data->uid < this->uid) | ||
| 486 | new = &((*new)->rb_left); | ||
| 487 | else if (data->uid > this->uid) | ||
| 488 | new = &((*new)->rb_right); | ||
| 489 | else | ||
| 490 | BUG(); | ||
| 491 | } | ||
| 492 | |||
| 493 | /* Add new node and rebalance tree. */ | ||
| 494 | rb_link_node(&data->node, parent, new); | ||
| 495 | rb_insert_color(&data->node, root); | ||
| 496 | } | ||
| 497 | |||
| 498 | static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root, | ||
| 499 | uid_t uid) | ||
| 500 | { | ||
| 501 | struct rb_node *node = root->rb_node; | ||
| 502 | |||
| 503 | while (node) { | ||
| 504 | struct uid_tag_data *data = rb_entry(node, | ||
| 505 | struct uid_tag_data, | ||
| 506 | node); | ||
| 507 | if (uid < data->uid) | ||
| 508 | node = node->rb_left; | ||
| 509 | else if (uid > data->uid) | ||
| 510 | node = node->rb_right; | ||
| 511 | else | ||
| 512 | return data; | ||
| 513 | } | ||
| 514 | return NULL; | ||
| 515 | } | ||
| 516 | |||
| 517 | /* | ||
| 518 | * Allocates a new uid_tag_data struct if needed. | ||
| 519 | * Returns a pointer to the found or allocated uid_tag_data. | ||
| 520 | * Returns a PTR_ERR on failures, and lock is not held. | ||
| 521 | * If found is not NULL: | ||
| 522 | * sets *found to true if not allocated. | ||
| 523 | * sets *found to false if allocated. | ||
| 524 | */ | ||
| 525 | struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res) | ||
| 526 | { | ||
| 527 | struct uid_tag_data *utd_entry; | ||
| 528 | |||
| 529 | /* Look for top level uid_tag_data for the UID */ | ||
| 530 | utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid); | ||
| 531 | DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry); | ||
| 532 | |||
| 533 | if (found_res) | ||
| 534 | *found_res = utd_entry; | ||
| 535 | if (utd_entry) | ||
| 536 | return utd_entry; | ||
| 537 | |||
| 538 | utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC); | ||
| 539 | if (!utd_entry) { | ||
| 540 | pr_err("qtaguid: get_uid_data(%u): " | ||
| 541 | "tag data alloc failed\n", uid); | ||
| 542 | return ERR_PTR(-ENOMEM); | ||
| 543 | } | ||
| 544 | |||
| 545 | utd_entry->uid = uid; | ||
| 546 | utd_entry->tag_ref_tree = RB_ROOT; | ||
| 547 | uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree); | ||
| 548 | DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry); | ||
| 549 | return utd_entry; | ||
| 550 | } | ||
| 551 | |||
| 552 | /* Never returns NULL. Either PTR_ERR or a valid ptr. */ | ||
| 553 | static struct tag_ref *new_tag_ref(tag_t new_tag, | ||
| 554 | struct uid_tag_data *utd_entry) | ||
| 555 | { | ||
| 556 | struct tag_ref *tr_entry; | ||
| 557 | int res; | ||
| 558 | |||
| 559 | if (utd_entry->num_active_tags + 1 > max_sock_tags) { | ||
| 560 | pr_info("qtaguid: new_tag_ref(0x%llx): " | ||
| 561 | "tag ref alloc quota exceeded. max=%d\n", | ||
| 562 | new_tag, max_sock_tags); | ||
| 563 | res = -EMFILE; | ||
| 564 | goto err_res; | ||
| 565 | |||
| 566 | } | ||
| 567 | |||
| 568 | tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC); | ||
| 569 | if (!tr_entry) { | ||
| 570 | pr_err("qtaguid: new_tag_ref(0x%llx): " | ||
| 571 | "tag ref alloc failed\n", | ||
| 572 | new_tag); | ||
| 573 | res = -ENOMEM; | ||
| 574 | goto err_res; | ||
| 575 | } | ||
| 576 | tr_entry->tn.tag = new_tag; | ||
| 577 | /* tr_entry->num_sock_tags handled by caller */ | ||
| 578 | utd_entry->num_active_tags++; | ||
| 579 | tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree); | ||
| 580 | DR_DEBUG("qtaguid: new_tag_ref(0x%llx): " | ||
| 581 | " inserted new tag ref %p\n", | ||
| 582 | new_tag, tr_entry); | ||
| 583 | return tr_entry; | ||
| 584 | |||
| 585 | err_res: | ||
| 586 | return ERR_PTR(res); | ||
| 587 | } | ||
| 588 | |||
| 589 | static struct tag_ref *lookup_tag_ref(tag_t full_tag, | ||
| 590 | struct uid_tag_data **utd_res) | ||
| 591 | { | ||
| 592 | struct uid_tag_data *utd_entry; | ||
| 593 | struct tag_ref *tr_entry; | ||
| 594 | bool found_utd; | ||
| 595 | uid_t uid = get_uid_from_tag(full_tag); | ||
| 596 | |||
| 597 | DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n", | ||
| 598 | full_tag, uid); | ||
| 599 | |||
| 600 | utd_entry = get_uid_data(uid, &found_utd); | ||
| 601 | if (IS_ERR_OR_NULL(utd_entry)) { | ||
| 602 | if (utd_res) | ||
| 603 | *utd_res = utd_entry; | ||
| 604 | return NULL; | ||
| 605 | } | ||
| 606 | |||
| 607 | tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag); | ||
| 608 | if (utd_res) | ||
| 609 | *utd_res = utd_entry; | ||
| 610 | DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n", | ||
| 611 | full_tag, utd_entry, tr_entry); | ||
| 612 | return tr_entry; | ||
| 613 | } | ||
| 614 | |||
| 615 | /* Never returns NULL. Either PTR_ERR or a valid ptr. */ | ||
| 616 | static struct tag_ref *get_tag_ref(tag_t full_tag, | ||
| 617 | struct uid_tag_data **utd_res) | ||
| 618 | { | ||
| 619 | struct uid_tag_data *utd_entry; | ||
| 620 | struct tag_ref *tr_entry; | ||
| 621 | |||
| 622 | DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n", | ||
| 623 | full_tag); | ||
| 624 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
| 625 | tr_entry = lookup_tag_ref(full_tag, &utd_entry); | ||
| 626 | BUG_ON(IS_ERR_OR_NULL(utd_entry)); | ||
| 627 | if (!tr_entry) | ||
| 628 | tr_entry = new_tag_ref(full_tag, utd_entry); | ||
| 629 | |||
| 630 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
| 631 | if (utd_res) | ||
| 632 | *utd_res = utd_entry; | ||
| 633 | DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n", | ||
| 634 | full_tag, utd_entry, tr_entry); | ||
| 635 | return tr_entry; | ||
| 636 | } | ||
| 637 | |||
| 638 | /* Checks and maybe frees the UID Tag Data entry */ | ||
| 639 | static void put_utd_entry(struct uid_tag_data *utd_entry) | ||
| 640 | { | ||
| 641 | /* Are we done with the UID tag data entry? */ | ||
| 642 | if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) && | ||
| 643 | !utd_entry->num_pqd) { | ||
| 644 | DR_DEBUG("qtaguid: %s(): " | ||
| 645 | "erase utd_entry=%p uid=%u " | ||
| 646 | "by pid=%u tgid=%u uid=%u\n", __func__, | ||
| 647 | utd_entry, utd_entry->uid, | ||
| 648 | current->pid, current->tgid, current_fsuid()); | ||
| 649 | BUG_ON(utd_entry->num_active_tags); | ||
| 650 | rb_erase(&utd_entry->node, &uid_tag_data_tree); | ||
| 651 | kfree(utd_entry); | ||
| 652 | } else { | ||
| 653 | DR_DEBUG("qtaguid: %s(): " | ||
| 654 | "utd_entry=%p still has %d tags %d proc_qtu_data\n", | ||
| 655 | __func__, utd_entry, utd_entry->num_active_tags, | ||
| 656 | utd_entry->num_pqd); | ||
| 657 | BUG_ON(!(utd_entry->num_active_tags || | ||
| 658 | utd_entry->num_pqd)); | ||
| 659 | } | ||
| 660 | } | ||
| 661 | |||
| 662 | /* | ||
| 663 | * If no sock_tags are using this tag_ref, | ||
| 664 | * decrements refcount of utd_entry, removes tr_entry | ||
| 665 | * from utd_entry->tag_ref_tree and frees. | ||
| 666 | */ | ||
| 667 | static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry, | ||
| 668 | struct uid_tag_data *utd_entry) | ||
| 669 | { | ||
| 670 | DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__, | ||
| 671 | tr_entry, tr_entry->tn.tag, | ||
| 672 | get_uid_from_tag(tr_entry->tn.tag)); | ||
| 673 | if (!tr_entry->num_sock_tags) { | ||
| 674 | BUG_ON(!utd_entry->num_active_tags); | ||
| 675 | utd_entry->num_active_tags--; | ||
| 676 | rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree); | ||
| 677 | DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry); | ||
| 678 | kfree(tr_entry); | ||
| 679 | } | ||
| 680 | } | ||
| 681 | |||
| 682 | static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry) | ||
| 683 | { | ||
| 684 | struct rb_node *node; | ||
| 685 | struct tag_ref *tr_entry; | ||
| 686 | tag_t acct_tag; | ||
| 687 | |||
| 688 | DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__, | ||
| 689 | full_tag, get_uid_from_tag(full_tag)); | ||
| 690 | acct_tag = get_atag_from_tag(full_tag); | ||
| 691 | node = rb_first(&utd_entry->tag_ref_tree); | ||
| 692 | while (node) { | ||
| 693 | tr_entry = rb_entry(node, struct tag_ref, tn.node); | ||
| 694 | node = rb_next(node); | ||
| 695 | if (!acct_tag || tr_entry->tn.tag == full_tag) | ||
| 696 | free_tag_ref_from_utd_entry(tr_entry, utd_entry); | ||
| 697 | } | ||
| 698 | } | ||
| 699 | |||
| 700 | static int read_proc_u64(char *page, char **start, off_t off, | ||
| 701 | int count, int *eof, void *data) | ||
| 702 | { | ||
| 703 | int len; | ||
| 704 | uint64_t value; | ||
| 705 | char *p = page; | ||
| 706 | uint64_t *iface_entry = data; | ||
| 707 | |||
| 708 | if (!data) | ||
| 709 | return 0; | ||
| 710 | |||
| 711 | value = *iface_entry; | ||
| 712 | p += sprintf(p, "%llu\n", value); | ||
| 713 | len = (p - page) - off; | ||
| 714 | *eof = (len <= count) ? 1 : 0; | ||
| 715 | *start = page + off; | ||
| 716 | return len; | ||
| 717 | } | ||
| 718 | |||
| 719 | static int read_proc_bool(char *page, char **start, off_t off, | ||
| 720 | int count, int *eof, void *data) | ||
| 721 | { | ||
| 722 | int len; | ||
| 723 | bool value; | ||
| 724 | char *p = page; | ||
| 725 | bool *bool_entry = data; | ||
| 726 | |||
| 727 | if (!data) | ||
| 728 | return 0; | ||
| 729 | |||
| 730 | value = *bool_entry; | ||
| 731 | p += sprintf(p, "%u\n", value); | ||
| 732 | len = (p - page) - off; | ||
| 733 | *eof = (len <= count) ? 1 : 0; | ||
| 734 | *start = page + off; | ||
| 735 | return len; | ||
| 736 | } | ||
| 737 | |||
| 738 | static int get_active_counter_set(tag_t tag) | ||
| 739 | { | ||
| 740 | int active_set = 0; | ||
| 741 | struct tag_counter_set *tcs; | ||
| 742 | |||
| 743 | MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)" | ||
| 744 | " (uid=%u)\n", | ||
| 745 | tag, get_uid_from_tag(tag)); | ||
| 746 | /* For now we only handle UID tags for active sets */ | ||
| 747 | tag = get_utag_from_tag(tag); | ||
| 748 | spin_lock_bh(&tag_counter_set_list_lock); | ||
| 749 | tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); | ||
| 750 | if (tcs) | ||
| 751 | active_set = tcs->active_set; | ||
| 752 | spin_unlock_bh(&tag_counter_set_list_lock); | ||
| 753 | return active_set; | ||
| 754 | } | ||
| 755 | |||
| 756 | /* | ||
| 757 | * Find the entry for tracking the specified interface. | ||
| 758 | * Caller must hold iface_stat_list_lock | ||
| 759 | */ | ||
| 760 | static struct iface_stat *get_iface_entry(const char *ifname) | ||
| 761 | { | ||
| 762 | struct iface_stat *iface_entry; | ||
| 763 | |||
| 764 | /* Find the entry for tracking the specified tag within the interface */ | ||
| 765 | if (ifname == NULL) { | ||
| 766 | pr_info("qtaguid: iface_stat: get() NULL device name\n"); | ||
| 767 | return NULL; | ||
| 768 | } | ||
| 769 | |||
| 770 | /* Iterate over interfaces */ | ||
| 771 | list_for_each_entry(iface_entry, &iface_stat_list, list) { | ||
| 772 | if (!strcmp(ifname, iface_entry->ifname)) | ||
| 773 | goto done; | ||
| 774 | } | ||
| 775 | iface_entry = NULL; | ||
| 776 | done: | ||
| 777 | return iface_entry; | ||
| 778 | } | ||
| 779 | |||
| 780 | static int iface_stat_all_proc_read(char *page, char **num_items_returned, | ||
| 781 | off_t items_to_skip, int char_count, | ||
| 782 | int *eof, void *data) | ||
| 783 | { | ||
| 784 | char *outp = page; | ||
| 785 | int item_index = 0; | ||
| 786 | int len; | ||
| 787 | struct iface_stat *iface_entry; | ||
| 788 | struct rtnl_link_stats64 dev_stats, *stats; | ||
| 789 | struct rtnl_link_stats64 no_dev_stats = {0}; | ||
| 790 | |||
| 791 | if (unlikely(module_passive)) { | ||
| 792 | *eof = 1; | ||
| 793 | return 0; | ||
| 794 | } | ||
| 795 | |||
| 796 | CT_DEBUG("qtaguid:proc iface_stat_all " | ||
| 797 | "page=%p *num_items_returned=%p off=%ld " | ||
| 798 | "char_count=%d *eof=%d\n", page, *num_items_returned, | ||
| 799 | items_to_skip, char_count, *eof); | ||
| 800 | |||
| 801 | if (*eof) | ||
| 802 | return 0; | ||
| 803 | |||
| 804 | /* | ||
| 805 | * This lock will prevent iface_stat_update() from changing active, | ||
| 806 | * and in turn prevent an interface from unregistering itself. | ||
| 807 | */ | ||
| 808 | spin_lock_bh(&iface_stat_list_lock); | ||
| 809 | list_for_each_entry(iface_entry, &iface_stat_list, list) { | ||
| 810 | if (item_index++ < items_to_skip) | ||
| 811 | continue; | ||
| 812 | |||
| 813 | if (iface_entry->active) { | ||
| 814 | stats = dev_get_stats(iface_entry->net_dev, | ||
| 815 | &dev_stats); | ||
| 816 | } else { | ||
| 817 | stats = &no_dev_stats; | ||
| 818 | } | ||
| 819 | len = snprintf(outp, char_count, | ||
| 820 | "%s %d " | ||
| 821 | "%llu %llu %llu %llu " | ||
| 822 | "%llu %llu %llu %llu\n", | ||
| 823 | iface_entry->ifname, | ||
| 824 | iface_entry->active, | ||
| 825 | iface_entry->totals[IFS_RX].bytes, | ||
| 826 | iface_entry->totals[IFS_RX].packets, | ||
| 827 | iface_entry->totals[IFS_TX].bytes, | ||
| 828 | iface_entry->totals[IFS_TX].packets, | ||
| 829 | stats->rx_bytes, stats->rx_packets, | ||
| 830 | stats->tx_bytes, stats->tx_packets); | ||
| 831 | if (len >= char_count) { | ||
| 832 | spin_unlock_bh(&iface_stat_list_lock); | ||
| 833 | *outp = '\0'; | ||
| 834 | return outp - page; | ||
| 835 | } | ||
| 836 | outp += len; | ||
| 837 | char_count -= len; | ||
| 838 | (*num_items_returned)++; | ||
| 839 | } | ||
| 840 | spin_unlock_bh(&iface_stat_list_lock); | ||
| 841 | |||
| 842 | *eof = 1; | ||
| 843 | return outp - page; | ||
| 844 | } | ||
| 845 | |||
| 846 | static void iface_create_proc_worker(struct work_struct *work) | ||
| 847 | { | ||
| 848 | struct proc_dir_entry *proc_entry; | ||
| 849 | struct iface_stat_work *isw = container_of(work, struct iface_stat_work, | ||
| 850 | iface_work); | ||
| 851 | struct iface_stat *new_iface = isw->iface_entry; | ||
| 852 | |||
| 853 | /* iface_entries are not deleted, so safe to manipulate. */ | ||
| 854 | proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir); | ||
| 855 | if (IS_ERR_OR_NULL(proc_entry)) { | ||
| 856 | pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n"); | ||
| 857 | kfree(isw); | ||
| 858 | return; | ||
| 859 | } | ||
| 860 | |||
| 861 | new_iface->proc_ptr = proc_entry; | ||
| 862 | |||
| 863 | create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry, | ||
| 864 | read_proc_u64, &new_iface->totals[IFS_TX].bytes); | ||
| 865 | create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry, | ||
| 866 | read_proc_u64, &new_iface->totals[IFS_RX].bytes); | ||
| 867 | create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry, | ||
| 868 | read_proc_u64, &new_iface->totals[IFS_TX].packets); | ||
| 869 | create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry, | ||
| 870 | read_proc_u64, &new_iface->totals[IFS_RX].packets); | ||
| 871 | create_proc_read_entry("active", proc_iface_perms, proc_entry, | ||
| 872 | read_proc_bool, &new_iface->active); | ||
| 873 | |||
| 874 | IF_DEBUG("qtaguid: iface_stat: create_proc(): done " | ||
| 875 | "entry=%p dev=%s\n", new_iface, new_iface->ifname); | ||
| 876 | kfree(isw); | ||
| 877 | } | ||
| 878 | |||
| 879 | /* | ||
| 880 | * Will set the entry's active state, and | ||
| 881 | * update the net_dev accordingly also. | ||
| 882 | */ | ||
| 883 | static void _iface_stat_set_active(struct iface_stat *entry, | ||
| 884 | struct net_device *net_dev, | ||
| 885 | bool activate) | ||
| 886 | { | ||
| 887 | if (activate) { | ||
| 888 | entry->net_dev = net_dev; | ||
| 889 | entry->active = true; | ||
| 890 | IF_DEBUG("qtaguid: %s(%s): " | ||
| 891 | "enable tracking. rfcnt=%d\n", __func__, | ||
| 892 | entry->ifname, | ||
| 893 | percpu_read(*net_dev->pcpu_refcnt)); | ||
| 894 | } else { | ||
| 895 | entry->active = false; | ||
| 896 | entry->net_dev = NULL; | ||
| 897 | IF_DEBUG("qtaguid: %s(%s): " | ||
| 898 | "disable tracking. rfcnt=%d\n", __func__, | ||
| 899 | entry->ifname, | ||
| 900 | percpu_read(*net_dev->pcpu_refcnt)); | ||
| 901 | |||
| 902 | } | ||
| 903 | } | ||
| 904 | |||
| 905 | /* Caller must hold iface_stat_list_lock */ | ||
| 906 | static struct iface_stat *iface_alloc(struct net_device *net_dev) | ||
| 907 | { | ||
| 908 | struct iface_stat *new_iface; | ||
| 909 | struct iface_stat_work *isw; | ||
| 910 | |||
| 911 | new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC); | ||
| 912 | if (new_iface == NULL) { | ||
| 913 | pr_err("qtaguid: iface_stat: create(%s): " | ||
| 914 | "iface_stat alloc failed\n", net_dev->name); | ||
| 915 | return NULL; | ||
| 916 | } | ||
| 917 | new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC); | ||
| 918 | if (new_iface->ifname == NULL) { | ||
| 919 | pr_err("qtaguid: iface_stat: create(%s): " | ||
| 920 | "ifname alloc failed\n", net_dev->name); | ||
| 921 | kfree(new_iface); | ||
| 922 | return NULL; | ||
| 923 | } | ||
| 924 | spin_lock_init(&new_iface->tag_stat_list_lock); | ||
| 925 | new_iface->tag_stat_tree = RB_ROOT; | ||
| 926 | _iface_stat_set_active(new_iface, net_dev, true); | ||
| 927 | |||
| 928 | /* | ||
| 929 | * ipv6 notifier chains are atomic :( | ||
| 930 | * No create_proc_read_entry() for you! | ||
| 931 | */ | ||
| 932 | isw = kmalloc(sizeof(*isw), GFP_ATOMIC); | ||
| 933 | if (!isw) { | ||
| 934 | pr_err("qtaguid: iface_stat: create(%s): " | ||
| 935 | "work alloc failed\n", new_iface->ifname); | ||
| 936 | _iface_stat_set_active(new_iface, net_dev, false); | ||
| 937 | kfree(new_iface->ifname); | ||
| 938 | kfree(new_iface); | ||
| 939 | return NULL; | ||
| 940 | } | ||
| 941 | isw->iface_entry = new_iface; | ||
| 942 | INIT_WORK(&isw->iface_work, iface_create_proc_worker); | ||
| 943 | schedule_work(&isw->iface_work); | ||
| 944 | list_add(&new_iface->list, &iface_stat_list); | ||
| 945 | return new_iface; | ||
| 946 | } | ||
| 947 | |||
| 948 | static void iface_check_stats_reset_and_adjust(struct net_device *net_dev, | ||
| 949 | struct iface_stat *iface) | ||
| 950 | { | ||
| 951 | struct rtnl_link_stats64 dev_stats, *stats; | ||
| 952 | bool stats_rewound; | ||
| 953 | |||
| 954 | stats = dev_get_stats(net_dev, &dev_stats); | ||
| 955 | /* No empty packets */ | ||
| 956 | stats_rewound = | ||
| 957 | (stats->rx_bytes < iface->last_known[IFS_RX].bytes) | ||
| 958 | || (stats->tx_bytes < iface->last_known[IFS_TX].bytes); | ||
| 959 | |||
| 960 | IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p " | ||
| 961 | "bytes rx/tx=%llu/%llu " | ||
| 962 | "active=%d last_known=%d " | ||
| 963 | "stats_rewound=%d\n", __func__, | ||
| 964 | net_dev ? net_dev->name : "?", | ||
| 965 | iface, net_dev, | ||
| 966 | stats->rx_bytes, stats->tx_bytes, | ||
| 967 | iface->active, iface->last_known_valid, stats_rewound); | ||
| 968 | |||
| 969 | if (iface->active && iface->last_known_valid && stats_rewound) { | ||
| 970 | pr_warn_once("qtaguid: iface_stat: %s(%s): " | ||
| 971 | "iface reset its stats unexpectedly\n", __func__, | ||
| 972 | net_dev->name); | ||
| 973 | |||
| 974 | iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes; | ||
| 975 | iface->totals[IFS_TX].packets += | ||
| 976 | iface->last_known[IFS_TX].packets; | ||
| 977 | iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes; | ||
| 978 | iface->totals[IFS_RX].packets += | ||
| 979 | iface->last_known[IFS_RX].packets; | ||
| 980 | iface->last_known_valid = false; | ||
| 981 | IF_DEBUG("qtaguid: %s(%s): iface=%p " | ||
| 982 | "used last known bytes rx/tx=%llu/%llu\n", __func__, | ||
| 983 | iface->ifname, iface, iface->last_known[IFS_RX].bytes, | ||
| 984 | iface->last_known[IFS_TX].bytes); | ||
| 985 | } | ||
| 986 | } | ||
| 987 | |||
| 988 | /* | ||
| 989 | * Create a new entry for tracking the specified interface. | ||
| 990 | * Do nothing if the entry already exists. | ||
| 991 | * Called when an interface is configured with a valid IP address. | ||
| 992 | */ | ||
| 993 | static void iface_stat_create(struct net_device *net_dev, | ||
| 994 | struct in_ifaddr *ifa) | ||
| 995 | { | ||
| 996 | struct in_device *in_dev = NULL; | ||
| 997 | const char *ifname; | ||
| 998 | struct iface_stat *entry; | ||
| 999 | __be32 ipaddr = 0; | ||
| 1000 | struct iface_stat *new_iface; | ||
| 1001 | |||
| 1002 | IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n", | ||
| 1003 | net_dev ? net_dev->name : "?", | ||
| 1004 | ifa, net_dev); | ||
| 1005 | if (!net_dev) { | ||
| 1006 | pr_err("qtaguid: iface_stat: create(): no net dev\n"); | ||
| 1007 | return; | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | ifname = net_dev->name; | ||
| 1011 | if (!ifa) { | ||
| 1012 | in_dev = in_dev_get(net_dev); | ||
| 1013 | if (!in_dev) { | ||
| 1014 | pr_err("qtaguid: iface_stat: create(%s): no inet dev\n", | ||
| 1015 | ifname); | ||
| 1016 | return; | ||
| 1017 | } | ||
| 1018 | IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n", | ||
| 1019 | ifname, in_dev); | ||
| 1020 | for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { | ||
| 1021 | IF_DEBUG("qtaguid: iface_stat: create(%s): " | ||
| 1022 | "ifa=%p ifa_label=%s\n", | ||
| 1023 | ifname, ifa, | ||
| 1024 | ifa->ifa_label ? ifa->ifa_label : "(null)"); | ||
| 1025 | if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label)) | ||
| 1026 | break; | ||
| 1027 | } | ||
| 1028 | } | ||
| 1029 | |||
| 1030 | if (!ifa) { | ||
| 1031 | IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n", | ||
| 1032 | ifname); | ||
| 1033 | goto done_put; | ||
| 1034 | } | ||
| 1035 | ipaddr = ifa->ifa_local; | ||
| 1036 | |||
| 1037 | spin_lock_bh(&iface_stat_list_lock); | ||
| 1038 | entry = get_iface_entry(ifname); | ||
| 1039 | if (entry != NULL) { | ||
| 1040 | bool activate = !ipv4_is_loopback(ipaddr); | ||
| 1041 | IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n", | ||
| 1042 | ifname, entry); | ||
| 1043 | iface_check_stats_reset_and_adjust(net_dev, entry); | ||
| 1044 | _iface_stat_set_active(entry, net_dev, activate); | ||
| 1045 | IF_DEBUG("qtaguid: %s(%s): " | ||
| 1046 | "tracking now %d on ip=%pI4\n", __func__, | ||
| 1047 | entry->ifname, activate, &ipaddr); | ||
| 1048 | goto done_unlock_put; | ||
| 1049 | } else if (ipv4_is_loopback(ipaddr)) { | ||
| 1050 | IF_DEBUG("qtaguid: iface_stat: create(%s): " | ||
| 1051 | "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr); | ||
| 1052 | goto done_unlock_put; | ||
| 1053 | } | ||
| 1054 | |||
| 1055 | new_iface = iface_alloc(net_dev); | ||
| 1056 | IF_DEBUG("qtaguid: iface_stat: create(%s): done " | ||
| 1057 | "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr); | ||
| 1058 | done_unlock_put: | ||
| 1059 | spin_unlock_bh(&iface_stat_list_lock); | ||
| 1060 | done_put: | ||
| 1061 | if (in_dev) | ||
| 1062 | in_dev_put(in_dev); | ||
| 1063 | } | ||
| 1064 | |||
| 1065 | static void iface_stat_create_ipv6(struct net_device *net_dev, | ||
| 1066 | struct inet6_ifaddr *ifa) | ||
| 1067 | { | ||
| 1068 | struct in_device *in_dev; | ||
| 1069 | const char *ifname; | ||
| 1070 | struct iface_stat *entry; | ||
| 1071 | struct iface_stat *new_iface; | ||
| 1072 | int addr_type; | ||
| 1073 | |||
| 1074 | IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n", | ||
| 1075 | ifa, net_dev, net_dev ? net_dev->name : ""); | ||
| 1076 | if (!net_dev) { | ||
| 1077 | pr_err("qtaguid: iface_stat: create6(): no net dev!\n"); | ||
| 1078 | return; | ||
| 1079 | } | ||
| 1080 | ifname = net_dev->name; | ||
| 1081 | |||
| 1082 | in_dev = in_dev_get(net_dev); | ||
| 1083 | if (!in_dev) { | ||
| 1084 | pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n", | ||
| 1085 | ifname); | ||
| 1086 | return; | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n", | ||
| 1090 | ifname, in_dev); | ||
| 1091 | |||
| 1092 | if (!ifa) { | ||
| 1093 | IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n", | ||
| 1094 | ifname); | ||
| 1095 | goto done_put; | ||
| 1096 | } | ||
| 1097 | addr_type = ipv6_addr_type(&ifa->addr); | ||
| 1098 | |||
| 1099 | spin_lock_bh(&iface_stat_list_lock); | ||
| 1100 | entry = get_iface_entry(ifname); | ||
| 1101 | if (entry != NULL) { | ||
| 1102 | bool activate = !(addr_type & IPV6_ADDR_LOOPBACK); | ||
| 1103 | IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, | ||
| 1104 | ifname, entry); | ||
| 1105 | iface_check_stats_reset_and_adjust(net_dev, entry); | ||
| 1106 | _iface_stat_set_active(entry, net_dev, activate); | ||
| 1107 | IF_DEBUG("qtaguid: %s(%s): " | ||
| 1108 | "tracking now %d on ip=%pI6c\n", __func__, | ||
| 1109 | entry->ifname, activate, &ifa->addr); | ||
| 1110 | goto done_unlock_put; | ||
| 1111 | } else if (addr_type & IPV6_ADDR_LOOPBACK) { | ||
| 1112 | IF_DEBUG("qtaguid: %s(%s): " | ||
| 1113 | "ignore loopback dev. ip=%pI6c\n", __func__, | ||
| 1114 | ifname, &ifa->addr); | ||
| 1115 | goto done_unlock_put; | ||
| 1116 | } | ||
| 1117 | |||
| 1118 | new_iface = iface_alloc(net_dev); | ||
| 1119 | IF_DEBUG("qtaguid: iface_stat: create6(%s): done " | ||
| 1120 | "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr); | ||
| 1121 | |||
| 1122 | done_unlock_put: | ||
| 1123 | spin_unlock_bh(&iface_stat_list_lock); | ||
| 1124 | done_put: | ||
| 1125 | in_dev_put(in_dev); | ||
| 1126 | } | ||
| 1127 | |||
| 1128 | static struct sock_tag *get_sock_stat_nl(const struct sock *sk) | ||
| 1129 | { | ||
| 1130 | MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk); | ||
| 1131 | return sock_tag_tree_search(&sock_tag_tree, sk); | ||
| 1132 | } | ||
| 1133 | |||
| 1134 | static struct sock_tag *get_sock_stat(const struct sock *sk) | ||
| 1135 | { | ||
| 1136 | struct sock_tag *sock_tag_entry; | ||
| 1137 | MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk); | ||
| 1138 | if (!sk) | ||
| 1139 | return NULL; | ||
| 1140 | spin_lock_bh(&sock_tag_list_lock); | ||
| 1141 | sock_tag_entry = get_sock_stat_nl(sk); | ||
| 1142 | spin_unlock_bh(&sock_tag_list_lock); | ||
| 1143 | return sock_tag_entry; | ||
| 1144 | } | ||
| 1145 | |||
| 1146 | static void | ||
| 1147 | data_counters_update(struct data_counters *dc, int set, | ||
| 1148 | enum ifs_tx_rx direction, int proto, int bytes) | ||
| 1149 | { | ||
| 1150 | switch (proto) { | ||
| 1151 | case IPPROTO_TCP: | ||
| 1152 | dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1); | ||
| 1153 | break; | ||
| 1154 | case IPPROTO_UDP: | ||
| 1155 | dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1); | ||
| 1156 | break; | ||
| 1157 | case IPPROTO_IP: | ||
| 1158 | default: | ||
| 1159 | dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes, | ||
| 1160 | 1); | ||
| 1161 | break; | ||
| 1162 | } | ||
| 1163 | } | ||
| 1164 | |||
| 1165 | /* | ||
| 1166 | * Update stats for the specified interface. Do nothing if the entry | ||
| 1167 | * does not exist (when a device was never configured with an IP address). | ||
| 1168 | * Called when an device is being unregistered. | ||
| 1169 | */ | ||
| 1170 | static void iface_stat_update(struct net_device *net_dev, bool stash_only) | ||
| 1171 | { | ||
| 1172 | struct rtnl_link_stats64 dev_stats, *stats; | ||
| 1173 | struct iface_stat *entry; | ||
| 1174 | |||
| 1175 | stats = dev_get_stats(net_dev, &dev_stats); | ||
| 1176 | spin_lock_bh(&iface_stat_list_lock); | ||
| 1177 | entry = get_iface_entry(net_dev->name); | ||
| 1178 | if (entry == NULL) { | ||
| 1179 | IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n", | ||
| 1180 | net_dev->name); | ||
| 1181 | spin_unlock_bh(&iface_stat_list_lock); | ||
| 1182 | return; | ||
| 1183 | } | ||
| 1184 | |||
| 1185 | IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__, | ||
| 1186 | net_dev->name, entry); | ||
| 1187 | if (!entry->active) { | ||
| 1188 | IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__, | ||
| 1189 | net_dev->name); | ||
| 1190 | spin_unlock_bh(&iface_stat_list_lock); | ||
| 1191 | return; | ||
| 1192 | } | ||
| 1193 | |||
| 1194 | if (stash_only) { | ||
| 1195 | entry->last_known[IFS_TX].bytes = stats->tx_bytes; | ||
| 1196 | entry->last_known[IFS_TX].packets = stats->tx_packets; | ||
| 1197 | entry->last_known[IFS_RX].bytes = stats->rx_bytes; | ||
| 1198 | entry->last_known[IFS_RX].packets = stats->rx_packets; | ||
| 1199 | entry->last_known_valid = true; | ||
| 1200 | IF_DEBUG("qtaguid: %s(%s): " | ||
| 1201 | "dev stats stashed rx/tx=%llu/%llu\n", __func__, | ||
| 1202 | net_dev->name, stats->rx_bytes, stats->tx_bytes); | ||
| 1203 | spin_unlock_bh(&iface_stat_list_lock); | ||
| 1204 | return; | ||
| 1205 | } | ||
| 1206 | entry->totals[IFS_TX].bytes += stats->tx_bytes; | ||
| 1207 | entry->totals[IFS_TX].packets += stats->tx_packets; | ||
| 1208 | entry->totals[IFS_RX].bytes += stats->rx_bytes; | ||
| 1209 | entry->totals[IFS_RX].packets += stats->rx_packets; | ||
| 1210 | /* We don't need the last_known[] anymore */ | ||
| 1211 | entry->last_known_valid = false; | ||
| 1212 | _iface_stat_set_active(entry, net_dev, false); | ||
| 1213 | IF_DEBUG("qtaguid: %s(%s): " | ||
| 1214 | "disable tracking. rx/tx=%llu/%llu\n", __func__, | ||
| 1215 | net_dev->name, stats->rx_bytes, stats->tx_bytes); | ||
| 1216 | spin_unlock_bh(&iface_stat_list_lock); | ||
| 1217 | } | ||
| 1218 | |||
| 1219 | static void tag_stat_update(struct tag_stat *tag_entry, | ||
| 1220 | enum ifs_tx_rx direction, int proto, int bytes) | ||
| 1221 | { | ||
| 1222 | int active_set; | ||
| 1223 | active_set = get_active_counter_set(tag_entry->tn.tag); | ||
| 1224 | MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d " | ||
| 1225 | "dir=%d proto=%d bytes=%d)\n", | ||
| 1226 | tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag), | ||
| 1227 | active_set, direction, proto, bytes); | ||
| 1228 | data_counters_update(&tag_entry->counters, active_set, direction, | ||
| 1229 | proto, bytes); | ||
| 1230 | if (tag_entry->parent_counters) | ||
| 1231 | data_counters_update(tag_entry->parent_counters, active_set, | ||
| 1232 | direction, proto, bytes); | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | /* | ||
| 1236 | * Create a new entry for tracking the specified {acct_tag,uid_tag} within | ||
| 1237 | * the interface. | ||
| 1238 | * iface_entry->tag_stat_list_lock should be held. | ||
| 1239 | */ | ||
| 1240 | static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry, | ||
| 1241 | tag_t tag) | ||
| 1242 | { | ||
| 1243 | struct tag_stat *new_tag_stat_entry = NULL; | ||
| 1244 | IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx" | ||
| 1245 | " (uid=%u)\n", __func__, | ||
| 1246 | iface_entry, tag, get_uid_from_tag(tag)); | ||
| 1247 | new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC); | ||
| 1248 | if (!new_tag_stat_entry) { | ||
| 1249 | pr_err("qtaguid: iface_stat: tag stat alloc failed\n"); | ||
| 1250 | goto done; | ||
| 1251 | } | ||
| 1252 | new_tag_stat_entry->tn.tag = tag; | ||
| 1253 | tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree); | ||
| 1254 | done: | ||
| 1255 | return new_tag_stat_entry; | ||
| 1256 | } | ||
| 1257 | |||
| 1258 | static void if_tag_stat_update(const char *ifname, uid_t uid, | ||
| 1259 | const struct sock *sk, enum ifs_tx_rx direction, | ||
| 1260 | int proto, int bytes) | ||
| 1261 | { | ||
| 1262 | struct tag_stat *tag_stat_entry; | ||
| 1263 | tag_t tag, acct_tag; | ||
| 1264 | tag_t uid_tag; | ||
| 1265 | struct data_counters *uid_tag_counters; | ||
| 1266 | struct sock_tag *sock_tag_entry; | ||
| 1267 | struct iface_stat *iface_entry; | ||
| 1268 | struct tag_stat *new_tag_stat; | ||
| 1269 | MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s " | ||
| 1270 | "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n", | ||
| 1271 | ifname, uid, sk, direction, proto, bytes); | ||
| 1272 | |||
| 1273 | |||
| 1274 | iface_entry = get_iface_entry(ifname); | ||
| 1275 | if (!iface_entry) { | ||
| 1276 | pr_err("qtaguid: iface_stat: stat_update() %s not found\n", | ||
| 1277 | ifname); | ||
| 1278 | return; | ||
| 1279 | } | ||
| 1280 | /* It is ok to process data when an iface_entry is inactive */ | ||
| 1281 | |||
| 1282 | MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n", | ||
| 1283 | ifname, iface_entry); | ||
| 1284 | |||
| 1285 | /* | ||
| 1286 | * Look for a tagged sock. | ||
| 1287 | * It will have an acct_uid. | ||
| 1288 | */ | ||
| 1289 | sock_tag_entry = get_sock_stat(sk); | ||
| 1290 | if (sock_tag_entry) { | ||
| 1291 | tag = sock_tag_entry->tag; | ||
| 1292 | acct_tag = get_atag_from_tag(tag); | ||
| 1293 | uid_tag = get_utag_from_tag(tag); | ||
| 1294 | } else { | ||
| 1295 | acct_tag = make_atag_from_value(0); | ||
| 1296 | tag = combine_atag_with_uid(acct_tag, uid); | ||
| 1297 | uid_tag = make_tag_from_uid(uid); | ||
| 1298 | } | ||
| 1299 | MT_DEBUG("qtaguid: iface_stat: stat_update(): " | ||
| 1300 | " looking for tag=0x%llx (uid=%u) in ife=%p\n", | ||
| 1301 | tag, get_uid_from_tag(tag), iface_entry); | ||
| 1302 | /* Loop over tag list under this interface for {acct_tag,uid_tag} */ | ||
| 1303 | spin_lock_bh(&iface_entry->tag_stat_list_lock); | ||
| 1304 | |||
| 1305 | tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, | ||
| 1306 | tag); | ||
| 1307 | if (tag_stat_entry) { | ||
| 1308 | /* | ||
| 1309 | * Updating the {acct_tag, uid_tag} entry handles both stats: | ||
| 1310 | * {0, uid_tag} will also get updated. | ||
| 1311 | */ | ||
| 1312 | tag_stat_update(tag_stat_entry, direction, proto, bytes); | ||
| 1313 | spin_unlock_bh(&iface_entry->tag_stat_list_lock); | ||
| 1314 | return; | ||
| 1315 | } | ||
| 1316 | |||
| 1317 | /* Loop over tag list under this interface for {0,uid_tag} */ | ||
| 1318 | tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree, | ||
| 1319 | uid_tag); | ||
| 1320 | if (!tag_stat_entry) { | ||
| 1321 | /* Here: the base uid_tag did not exist */ | ||
| 1322 | /* | ||
| 1323 | * No parent counters. So | ||
| 1324 | * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats. | ||
| 1325 | */ | ||
| 1326 | new_tag_stat = create_if_tag_stat(iface_entry, uid_tag); | ||
| 1327 | uid_tag_counters = &new_tag_stat->counters; | ||
| 1328 | } else { | ||
| 1329 | uid_tag_counters = &tag_stat_entry->counters; | ||
| 1330 | } | ||
| 1331 | |||
| 1332 | if (acct_tag) { | ||
| 1333 | new_tag_stat = create_if_tag_stat(iface_entry, tag); | ||
| 1334 | new_tag_stat->parent_counters = uid_tag_counters; | ||
| 1335 | } | ||
| 1336 | tag_stat_update(new_tag_stat, direction, proto, bytes); | ||
| 1337 | spin_unlock_bh(&iface_entry->tag_stat_list_lock); | ||
| 1338 | } | ||
| 1339 | |||
| 1340 | static int iface_netdev_event_handler(struct notifier_block *nb, | ||
| 1341 | unsigned long event, void *ptr) { | ||
| 1342 | struct net_device *dev = ptr; | ||
| 1343 | |||
| 1344 | if (unlikely(module_passive)) | ||
| 1345 | return NOTIFY_DONE; | ||
| 1346 | |||
| 1347 | IF_DEBUG("qtaguid: iface_stat: netdev_event(): " | ||
| 1348 | "ev=0x%lx/%s netdev=%p->name=%s\n", | ||
| 1349 | event, netdev_evt_str(event), dev, dev ? dev->name : ""); | ||
| 1350 | |||
| 1351 | switch (event) { | ||
| 1352 | case NETDEV_UP: | ||
| 1353 | iface_stat_create(dev, NULL); | ||
| 1354 | atomic64_inc(&qtu_events.iface_events); | ||
| 1355 | break; | ||
| 1356 | case NETDEV_DOWN: | ||
| 1357 | case NETDEV_UNREGISTER: | ||
| 1358 | iface_stat_update(dev, event == NETDEV_DOWN); | ||
| 1359 | atomic64_inc(&qtu_events.iface_events); | ||
| 1360 | break; | ||
| 1361 | } | ||
| 1362 | return NOTIFY_DONE; | ||
| 1363 | } | ||
| 1364 | |||
| 1365 | static int iface_inet6addr_event_handler(struct notifier_block *nb, | ||
| 1366 | unsigned long event, void *ptr) | ||
| 1367 | { | ||
| 1368 | struct inet6_ifaddr *ifa = ptr; | ||
| 1369 | struct net_device *dev; | ||
| 1370 | |||
| 1371 | if (unlikely(module_passive)) | ||
| 1372 | return NOTIFY_DONE; | ||
| 1373 | |||
| 1374 | IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): " | ||
| 1375 | "ev=0x%lx/%s ifa=%p\n", | ||
| 1376 | event, netdev_evt_str(event), ifa); | ||
| 1377 | |||
| 1378 | switch (event) { | ||
| 1379 | case NETDEV_UP: | ||
| 1380 | BUG_ON(!ifa || !ifa->idev); | ||
| 1381 | dev = (struct net_device *)ifa->idev->dev; | ||
| 1382 | iface_stat_create_ipv6(dev, ifa); | ||
| 1383 | atomic64_inc(&qtu_events.iface_events); | ||
| 1384 | break; | ||
| 1385 | case NETDEV_DOWN: | ||
| 1386 | case NETDEV_UNREGISTER: | ||
| 1387 | BUG_ON(!ifa || !ifa->idev); | ||
| 1388 | dev = (struct net_device *)ifa->idev->dev; | ||
| 1389 | iface_stat_update(dev, event == NETDEV_DOWN); | ||
| 1390 | atomic64_inc(&qtu_events.iface_events); | ||
| 1391 | break; | ||
| 1392 | } | ||
| 1393 | return NOTIFY_DONE; | ||
| 1394 | } | ||
| 1395 | |||
| 1396 | static int iface_inetaddr_event_handler(struct notifier_block *nb, | ||
| 1397 | unsigned long event, void *ptr) | ||
| 1398 | { | ||
| 1399 | struct in_ifaddr *ifa = ptr; | ||
| 1400 | struct net_device *dev; | ||
| 1401 | |||
| 1402 | if (unlikely(module_passive)) | ||
| 1403 | return NOTIFY_DONE; | ||
| 1404 | |||
| 1405 | IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): " | ||
| 1406 | "ev=0x%lx/%s ifa=%p\n", | ||
| 1407 | event, netdev_evt_str(event), ifa); | ||
| 1408 | |||
| 1409 | switch (event) { | ||
| 1410 | case NETDEV_UP: | ||
| 1411 | BUG_ON(!ifa || !ifa->ifa_dev); | ||
| 1412 | dev = ifa->ifa_dev->dev; | ||
| 1413 | iface_stat_create(dev, ifa); | ||
| 1414 | atomic64_inc(&qtu_events.iface_events); | ||
| 1415 | break; | ||
| 1416 | case NETDEV_DOWN: | ||
| 1417 | case NETDEV_UNREGISTER: | ||
| 1418 | BUG_ON(!ifa || !ifa->ifa_dev); | ||
| 1419 | dev = ifa->ifa_dev->dev; | ||
| 1420 | iface_stat_update(dev, event == NETDEV_DOWN); | ||
| 1421 | atomic64_inc(&qtu_events.iface_events); | ||
| 1422 | break; | ||
| 1423 | } | ||
| 1424 | return NOTIFY_DONE; | ||
| 1425 | } | ||
| 1426 | |||
| 1427 | static struct notifier_block iface_netdev_notifier_blk = { | ||
| 1428 | .notifier_call = iface_netdev_event_handler, | ||
| 1429 | }; | ||
| 1430 | |||
| 1431 | static struct notifier_block iface_inetaddr_notifier_blk = { | ||
| 1432 | .notifier_call = iface_inetaddr_event_handler, | ||
| 1433 | }; | ||
| 1434 | |||
| 1435 | static struct notifier_block iface_inet6addr_notifier_blk = { | ||
| 1436 | .notifier_call = iface_inet6addr_event_handler, | ||
| 1437 | }; | ||
| 1438 | |||
| 1439 | static int __init iface_stat_init(struct proc_dir_entry *parent_procdir) | ||
| 1440 | { | ||
| 1441 | int err; | ||
| 1442 | |||
| 1443 | iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir); | ||
| 1444 | if (!iface_stat_procdir) { | ||
| 1445 | pr_err("qtaguid: iface_stat: init failed to create proc entry\n"); | ||
| 1446 | err = -1; | ||
| 1447 | goto err; | ||
| 1448 | } | ||
| 1449 | |||
| 1450 | iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename, | ||
| 1451 | proc_iface_perms, | ||
| 1452 | parent_procdir); | ||
| 1453 | if (!iface_stat_all_procfile) { | ||
| 1454 | pr_err("qtaguid: iface_stat: init " | ||
| 1455 | " failed to create stat_all proc entry\n"); | ||
| 1456 | err = -1; | ||
| 1457 | goto err_zap_entry; | ||
| 1458 | } | ||
| 1459 | iface_stat_all_procfile->read_proc = iface_stat_all_proc_read; | ||
| 1460 | |||
| 1461 | |||
| 1462 | err = register_netdevice_notifier(&iface_netdev_notifier_blk); | ||
| 1463 | if (err) { | ||
| 1464 | pr_err("qtaguid: iface_stat: init " | ||
| 1465 | "failed to register dev event handler\n"); | ||
| 1466 | goto err_zap_all_stats_entry; | ||
| 1467 | } | ||
| 1468 | err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk); | ||
| 1469 | if (err) { | ||
| 1470 | pr_err("qtaguid: iface_stat: init " | ||
| 1471 | "failed to register ipv4 dev event handler\n"); | ||
| 1472 | goto err_unreg_nd; | ||
| 1473 | } | ||
| 1474 | |||
| 1475 | err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk); | ||
| 1476 | if (err) { | ||
| 1477 | pr_err("qtaguid: iface_stat: init " | ||
| 1478 | "failed to register ipv6 dev event handler\n"); | ||
| 1479 | goto err_unreg_ip4_addr; | ||
| 1480 | } | ||
| 1481 | return 0; | ||
| 1482 | |||
| 1483 | err_unreg_ip4_addr: | ||
| 1484 | unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk); | ||
| 1485 | err_unreg_nd: | ||
| 1486 | unregister_netdevice_notifier(&iface_netdev_notifier_blk); | ||
| 1487 | err_zap_all_stats_entry: | ||
| 1488 | remove_proc_entry(iface_stat_all_procfilename, parent_procdir); | ||
| 1489 | err_zap_entry: | ||
| 1490 | remove_proc_entry(iface_stat_procdirname, parent_procdir); | ||
| 1491 | err: | ||
| 1492 | return err; | ||
| 1493 | } | ||
| 1494 | |||
| 1495 | static struct sock *qtaguid_find_sk(const struct sk_buff *skb, | ||
| 1496 | struct xt_action_param *par) | ||
| 1497 | { | ||
| 1498 | struct sock *sk; | ||
| 1499 | unsigned int hook_mask = (1 << par->hooknum); | ||
| 1500 | |||
| 1501 | MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb, | ||
| 1502 | par->hooknum, par->family); | ||
| 1503 | |||
| 1504 | /* | ||
| 1505 | * Let's not abuse the the xt_socket_get*_sk(), or else it will | ||
| 1506 | * return garbage SKs. | ||
| 1507 | */ | ||
| 1508 | if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS)) | ||
| 1509 | return NULL; | ||
| 1510 | |||
| 1511 | switch (par->family) { | ||
| 1512 | case NFPROTO_IPV6: | ||
| 1513 | sk = xt_socket_get6_sk(skb, par); | ||
| 1514 | break; | ||
| 1515 | case NFPROTO_IPV4: | ||
| 1516 | sk = xt_socket_get4_sk(skb, par); | ||
| 1517 | break; | ||
| 1518 | default: | ||
| 1519 | return NULL; | ||
| 1520 | } | ||
| 1521 | |||
| 1522 | /* | ||
| 1523 | * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs. | ||
| 1524 | * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959 | ||
| 1525 | * Not fixed in 3.0-r3 :( | ||
| 1526 | */ | ||
| 1527 | if (sk) { | ||
| 1528 | MT_DEBUG("qtaguid: %p->sk_proto=%u " | ||
| 1529 | "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state); | ||
| 1530 | if (sk->sk_state == TCP_TIME_WAIT) { | ||
| 1531 | xt_socket_put_sk(sk); | ||
| 1532 | sk = NULL; | ||
| 1533 | } | ||
| 1534 | } | ||
| 1535 | return sk; | ||
| 1536 | } | ||
| 1537 | |||
| 1538 | static void account_for_uid(const struct sk_buff *skb, | ||
| 1539 | const struct sock *alternate_sk, uid_t uid, | ||
| 1540 | struct xt_action_param *par) | ||
| 1541 | { | ||
| 1542 | const struct net_device *el_dev; | ||
| 1543 | |||
| 1544 | if (!skb->dev) { | ||
| 1545 | MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum); | ||
| 1546 | el_dev = par->in ? : par->out; | ||
| 1547 | } else { | ||
| 1548 | const struct net_device *other_dev; | ||
| 1549 | el_dev = skb->dev; | ||
| 1550 | other_dev = par->in ? : par->out; | ||
| 1551 | if (el_dev != other_dev) { | ||
| 1552 | MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs " | ||
| 1553 | "par->(in/out)=%p %s\n", | ||
| 1554 | par->hooknum, el_dev, el_dev->name, other_dev, | ||
| 1555 | other_dev->name); | ||
| 1556 | } | ||
| 1557 | } | ||
| 1558 | |||
| 1559 | if (unlikely(!el_dev)) { | ||
| 1560 | pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum); | ||
| 1561 | } else if (unlikely(!el_dev->name)) { | ||
| 1562 | pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum); | ||
| 1563 | } else { | ||
| 1564 | MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n", | ||
| 1565 | par->hooknum, | ||
| 1566 | el_dev->name, | ||
| 1567 | el_dev->type); | ||
| 1568 | |||
| 1569 | if_tag_stat_update(el_dev->name, uid, | ||
| 1570 | skb->sk ? skb->sk : alternate_sk, | ||
| 1571 | par->in ? IFS_RX : IFS_TX, | ||
| 1572 | ip_hdr(skb)->protocol, skb->len); | ||
| 1573 | } | ||
| 1574 | } | ||
| 1575 | |||
| 1576 | static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par) | ||
| 1577 | { | ||
| 1578 | const struct xt_qtaguid_match_info *info = par->matchinfo; | ||
| 1579 | const struct file *filp; | ||
| 1580 | bool got_sock = false; | ||
| 1581 | struct sock *sk; | ||
| 1582 | uid_t sock_uid; | ||
| 1583 | bool res; | ||
| 1584 | |||
| 1585 | if (unlikely(module_passive)) | ||
| 1586 | return (info->match ^ info->invert) == 0; | ||
| 1587 | |||
| 1588 | MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n", | ||
| 1589 | par->hooknum, skb, par->in, par->out, par->family); | ||
| 1590 | |||
| 1591 | atomic64_inc(&qtu_events.match_calls); | ||
| 1592 | if (skb == NULL) { | ||
| 1593 | res = (info->match ^ info->invert) == 0; | ||
| 1594 | goto ret_res; | ||
| 1595 | } | ||
| 1596 | |||
| 1597 | sk = skb->sk; | ||
| 1598 | |||
| 1599 | if (sk == NULL) { | ||
| 1600 | /* | ||
| 1601 | * A missing sk->sk_socket happens when packets are in-flight | ||
| 1602 | * and the matching socket is already closed and gone. | ||
| 1603 | */ | ||
| 1604 | sk = qtaguid_find_sk(skb, par); | ||
| 1605 | /* | ||
| 1606 | * If we got the socket from the find_sk(), we will need to put | ||
| 1607 | * it back, as nf_tproxy_get_sock_v4() got it. | ||
| 1608 | */ | ||
| 1609 | got_sock = sk; | ||
| 1610 | if (sk) | ||
| 1611 | atomic64_inc(&qtu_events.match_found_sk_in_ct); | ||
| 1612 | else | ||
| 1613 | atomic64_inc(&qtu_events.match_found_no_sk_in_ct); | ||
| 1614 | } else { | ||
| 1615 | atomic64_inc(&qtu_events.match_found_sk); | ||
| 1616 | } | ||
| 1617 | MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n", | ||
| 1618 | par->hooknum, sk, got_sock, ip_hdr(skb)->protocol); | ||
| 1619 | if (sk != NULL) { | ||
| 1620 | MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n", | ||
| 1621 | par->hooknum, sk, sk->sk_socket, | ||
| 1622 | sk->sk_socket ? sk->sk_socket->file : (void *)-1LL); | ||
| 1623 | filp = sk->sk_socket ? sk->sk_socket->file : NULL; | ||
| 1624 | MT_DEBUG("qtaguid[%d]: filp...uid=%u\n", | ||
| 1625 | par->hooknum, filp ? filp->f_cred->fsuid : -1); | ||
| 1626 | } | ||
| 1627 | |||
| 1628 | if (sk == NULL || sk->sk_socket == NULL) { | ||
| 1629 | /* | ||
| 1630 | * Here, the qtaguid_find_sk() using connection tracking | ||
| 1631 | * couldn't find the owner, so for now we just count them | ||
| 1632 | * against the system. | ||
| 1633 | */ | ||
| 1634 | /* | ||
| 1635 | * TODO: unhack how to force just accounting. | ||
| 1636 | * For now we only do iface stats when the uid-owner is not | ||
| 1637 | * requested. | ||
| 1638 | */ | ||
| 1639 | if (!(info->match & XT_QTAGUID_UID)) | ||
| 1640 | account_for_uid(skb, sk, 0, par); | ||
| 1641 | MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n", | ||
| 1642 | par->hooknum, | ||
| 1643 | sk ? sk->sk_socket : NULL); | ||
| 1644 | res = (info->match ^ info->invert) == 0; | ||
| 1645 | atomic64_inc(&qtu_events.match_no_sk); | ||
| 1646 | goto put_sock_ret_res; | ||
| 1647 | } else if (info->match & info->invert & XT_QTAGUID_SOCKET) { | ||
| 1648 | res = false; | ||
| 1649 | goto put_sock_ret_res; | ||
| 1650 | } | ||
| 1651 | filp = sk->sk_socket->file; | ||
| 1652 | if (filp == NULL) { | ||
| 1653 | MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum); | ||
| 1654 | account_for_uid(skb, sk, 0, par); | ||
| 1655 | res = ((info->match ^ info->invert) & | ||
| 1656 | (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0; | ||
| 1657 | atomic64_inc(&qtu_events.match_no_sk_file); | ||
| 1658 | goto put_sock_ret_res; | ||
| 1659 | } | ||
| 1660 | sock_uid = filp->f_cred->fsuid; | ||
| 1661 | /* | ||
| 1662 | * TODO: unhack how to force just accounting. | ||
| 1663 | * For now we only do iface stats when the uid-owner is not requested | ||
| 1664 | */ | ||
| 1665 | if (!(info->match & XT_QTAGUID_UID)) | ||
| 1666 | account_for_uid(skb, sk, sock_uid, par); | ||
| 1667 | |||
| 1668 | /* | ||
| 1669 | * The following two tests fail the match when: | ||
| 1670 | * id not in range AND no inverted condition requested | ||
| 1671 | * or id in range AND inverted condition requested | ||
| 1672 | * Thus (!a && b) || (a && !b) == a ^ b | ||
| 1673 | */ | ||
| 1674 | if (info->match & XT_QTAGUID_UID) | ||
| 1675 | if ((filp->f_cred->fsuid >= info->uid_min && | ||
| 1676 | filp->f_cred->fsuid <= info->uid_max) ^ | ||
| 1677 | !(info->invert & XT_QTAGUID_UID)) { | ||
| 1678 | MT_DEBUG("qtaguid[%d]: leaving uid not matching\n", | ||
| 1679 | par->hooknum); | ||
| 1680 | res = false; | ||
| 1681 | goto put_sock_ret_res; | ||
| 1682 | } | ||
| 1683 | if (info->match & XT_QTAGUID_GID) | ||
| 1684 | if ((filp->f_cred->fsgid >= info->gid_min && | ||
| 1685 | filp->f_cred->fsgid <= info->gid_max) ^ | ||
| 1686 | !(info->invert & XT_QTAGUID_GID)) { | ||
| 1687 | MT_DEBUG("qtaguid[%d]: leaving gid not matching\n", | ||
| 1688 | par->hooknum); | ||
| 1689 | res = false; | ||
| 1690 | goto put_sock_ret_res; | ||
| 1691 | } | ||
| 1692 | |||
| 1693 | MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum); | ||
| 1694 | res = true; | ||
| 1695 | |||
| 1696 | put_sock_ret_res: | ||
| 1697 | if (got_sock) | ||
| 1698 | xt_socket_put_sk(sk); | ||
| 1699 | ret_res: | ||
| 1700 | MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res); | ||
| 1701 | return res; | ||
| 1702 | } | ||
| 1703 | |||
| 1704 | #ifdef DDEBUG | ||
| 1705 | /* This function is not in xt_qtaguid_print.c because of locks visibility */ | ||
| 1706 | static void prdebug_full_state(int indent_level, const char *fmt, ...) | ||
| 1707 | { | ||
| 1708 | va_list args; | ||
| 1709 | char *fmt_buff; | ||
| 1710 | char *buff; | ||
| 1711 | |||
| 1712 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
| 1713 | return; | ||
| 1714 | |||
| 1715 | fmt_buff = kasprintf(GFP_ATOMIC, | ||
| 1716 | "qtaguid: %s(): %s {\n", __func__, fmt); | ||
| 1717 | BUG_ON(!fmt_buff); | ||
| 1718 | va_start(args, fmt); | ||
| 1719 | buff = kvasprintf(GFP_ATOMIC, | ||
| 1720 | fmt_buff, args); | ||
| 1721 | BUG_ON(!buff); | ||
| 1722 | pr_debug("%s", buff); | ||
| 1723 | kfree(fmt_buff); | ||
| 1724 | kfree(buff); | ||
| 1725 | va_end(args); | ||
| 1726 | |||
| 1727 | spin_lock_bh(&sock_tag_list_lock); | ||
| 1728 | prdebug_sock_tag_tree(indent_level, &sock_tag_tree); | ||
| 1729 | spin_unlock_bh(&sock_tag_list_lock); | ||
| 1730 | |||
| 1731 | spin_lock_bh(&sock_tag_list_lock); | ||
| 1732 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
| 1733 | prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree); | ||
| 1734 | prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree); | ||
| 1735 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
| 1736 | spin_unlock_bh(&sock_tag_list_lock); | ||
| 1737 | |||
| 1738 | spin_lock_bh(&iface_stat_list_lock); | ||
| 1739 | prdebug_iface_stat_list(indent_level, &iface_stat_list); | ||
| 1740 | spin_unlock_bh(&iface_stat_list_lock); | ||
| 1741 | |||
| 1742 | pr_debug("qtaguid: %s(): }\n", __func__); | ||
| 1743 | } | ||
| 1744 | #else | ||
| 1745 | static void prdebug_full_state(int indent_level, const char *fmt, ...) {} | ||
| 1746 | #endif | ||
| 1747 | |||
| 1748 | /* | ||
| 1749 | * Procfs reader to get all active socket tags using style "1)" as described in | ||
| 1750 | * fs/proc/generic.c | ||
| 1751 | */ | ||
| 1752 | static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned, | ||
| 1753 | off_t items_to_skip, int char_count, int *eof, | ||
| 1754 | void *data) | ||
| 1755 | { | ||
| 1756 | char *outp = page; | ||
| 1757 | int len; | ||
| 1758 | uid_t uid; | ||
| 1759 | struct rb_node *node; | ||
| 1760 | struct sock_tag *sock_tag_entry; | ||
| 1761 | int item_index = 0; | ||
| 1762 | int indent_level = 0; | ||
| 1763 | long f_count; | ||
| 1764 | |||
| 1765 | if (unlikely(module_passive)) { | ||
| 1766 | *eof = 1; | ||
| 1767 | return 0; | ||
| 1768 | } | ||
| 1769 | |||
| 1770 | if (*eof) | ||
| 1771 | return 0; | ||
| 1772 | |||
| 1773 | CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n", | ||
| 1774 | page, items_to_skip, char_count, *eof); | ||
| 1775 | |||
| 1776 | spin_lock_bh(&sock_tag_list_lock); | ||
| 1777 | for (node = rb_first(&sock_tag_tree); | ||
| 1778 | node; | ||
| 1779 | node = rb_next(node)) { | ||
| 1780 | if (item_index++ < items_to_skip) | ||
| 1781 | continue; | ||
| 1782 | sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); | ||
| 1783 | uid = get_uid_from_tag(sock_tag_entry->tag); | ||
| 1784 | CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) " | ||
| 1785 | "pid=%u\n", | ||
| 1786 | sock_tag_entry->sk, | ||
| 1787 | sock_tag_entry->tag, | ||
| 1788 | uid, | ||
| 1789 | sock_tag_entry->pid | ||
| 1790 | ); | ||
| 1791 | f_count = atomic_long_read( | ||
| 1792 | &sock_tag_entry->socket->file->f_count); | ||
| 1793 | len = snprintf(outp, char_count, | ||
| 1794 | "sock=%p tag=0x%llx (uid=%u) pid=%u " | ||
| 1795 | "f_count=%lu\n", | ||
| 1796 | sock_tag_entry->sk, | ||
| 1797 | sock_tag_entry->tag, uid, | ||
| 1798 | sock_tag_entry->pid, f_count); | ||
| 1799 | if (len >= char_count) { | ||
| 1800 | spin_unlock_bh(&sock_tag_list_lock); | ||
| 1801 | *outp = '\0'; | ||
| 1802 | return outp - page; | ||
| 1803 | } | ||
| 1804 | outp += len; | ||
| 1805 | char_count -= len; | ||
| 1806 | (*num_items_returned)++; | ||
| 1807 | } | ||
| 1808 | spin_unlock_bh(&sock_tag_list_lock); | ||
| 1809 | |||
| 1810 | if (item_index++ >= items_to_skip) { | ||
| 1811 | len = snprintf(outp, char_count, | ||
| 1812 | "events: sockets_tagged=%llu " | ||
| 1813 | "sockets_untagged=%llu " | ||
| 1814 | "counter_set_changes=%llu " | ||
| 1815 | "delete_cmds=%llu " | ||
| 1816 | "iface_events=%llu " | ||
| 1817 | "match_calls=%llu " | ||
| 1818 | "match_found_sk=%llu " | ||
| 1819 | "match_found_sk_in_ct=%llu " | ||
| 1820 | "match_found_no_sk_in_ct=%llu " | ||
| 1821 | "match_no_sk=%llu " | ||
| 1822 | "match_no_sk_file=%llu\n", | ||
| 1823 | atomic64_read(&qtu_events.sockets_tagged), | ||
| 1824 | atomic64_read(&qtu_events.sockets_untagged), | ||
| 1825 | atomic64_read(&qtu_events.counter_set_changes), | ||
| 1826 | atomic64_read(&qtu_events.delete_cmds), | ||
| 1827 | atomic64_read(&qtu_events.iface_events), | ||
| 1828 | atomic64_read(&qtu_events.match_calls), | ||
| 1829 | atomic64_read(&qtu_events.match_found_sk), | ||
| 1830 | atomic64_read(&qtu_events.match_found_sk_in_ct), | ||
| 1831 | atomic64_read( | ||
| 1832 | &qtu_events.match_found_no_sk_in_ct), | ||
| 1833 | atomic64_read(&qtu_events.match_no_sk), | ||
| 1834 | atomic64_read(&qtu_events.match_no_sk_file)); | ||
| 1835 | if (len >= char_count) { | ||
| 1836 | *outp = '\0'; | ||
| 1837 | return outp - page; | ||
| 1838 | } | ||
| 1839 | outp += len; | ||
| 1840 | char_count -= len; | ||
| 1841 | (*num_items_returned)++; | ||
| 1842 | } | ||
| 1843 | |||
| 1844 | /* Count the following as part of the last item_index */ | ||
| 1845 | if (item_index > items_to_skip) { | ||
| 1846 | prdebug_full_state(indent_level, "proc ctrl"); | ||
| 1847 | } | ||
| 1848 | |||
| 1849 | *eof = 1; | ||
| 1850 | return outp - page; | ||
| 1851 | } | ||
| 1852 | |||
| 1853 | /* | ||
| 1854 | * Delete socket tags, and stat tags associated with a given | ||
| 1855 | * accouting tag and uid. | ||
| 1856 | */ | ||
| 1857 | static int ctrl_cmd_delete(const char *input) | ||
| 1858 | { | ||
| 1859 | char cmd; | ||
| 1860 | uid_t uid; | ||
| 1861 | uid_t entry_uid; | ||
| 1862 | tag_t acct_tag; | ||
| 1863 | tag_t tag; | ||
| 1864 | int res, argc; | ||
| 1865 | struct iface_stat *iface_entry; | ||
| 1866 | struct rb_node *node; | ||
| 1867 | struct sock_tag *st_entry; | ||
| 1868 | struct rb_root st_to_free_tree = RB_ROOT; | ||
| 1869 | struct tag_stat *ts_entry; | ||
| 1870 | struct tag_counter_set *tcs_entry; | ||
| 1871 | struct tag_ref *tr_entry; | ||
| 1872 | struct uid_tag_data *utd_entry; | ||
| 1873 | |||
| 1874 | argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid); | ||
| 1875 | CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c " | ||
| 1876 | "user_tag=0x%llx uid=%u\n", input, argc, cmd, | ||
| 1877 | acct_tag, uid); | ||
| 1878 | if (argc < 2) { | ||
| 1879 | res = -EINVAL; | ||
| 1880 | goto err; | ||
| 1881 | } | ||
| 1882 | if (!valid_atag(acct_tag)) { | ||
| 1883 | pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input); | ||
| 1884 | res = -EINVAL; | ||
| 1885 | goto err; | ||
| 1886 | } | ||
| 1887 | if (argc < 3) { | ||
| 1888 | uid = current_fsuid(); | ||
| 1889 | } else if (!can_impersonate_uid(uid)) { | ||
| 1890 | pr_info("qtaguid: ctrl_delete(%s): " | ||
| 1891 | "insufficient priv from pid=%u tgid=%u uid=%u\n", | ||
| 1892 | input, current->pid, current->tgid, current_fsuid()); | ||
| 1893 | res = -EPERM; | ||
| 1894 | goto err; | ||
| 1895 | } | ||
| 1896 | |||
| 1897 | tag = combine_atag_with_uid(acct_tag, uid); | ||
| 1898 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | ||
| 1899 | "looking for tag=0x%llx (uid=%u)\n", | ||
| 1900 | input, tag, uid); | ||
| 1901 | |||
| 1902 | /* Delete socket tags */ | ||
| 1903 | spin_lock_bh(&sock_tag_list_lock); | ||
| 1904 | node = rb_first(&sock_tag_tree); | ||
| 1905 | while (node) { | ||
| 1906 | st_entry = rb_entry(node, struct sock_tag, sock_node); | ||
| 1907 | entry_uid = get_uid_from_tag(st_entry->tag); | ||
| 1908 | node = rb_next(node); | ||
| 1909 | if (entry_uid != uid) | ||
| 1910 | continue; | ||
| 1911 | |||
| 1912 | CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n", | ||
| 1913 | input, st_entry->tag, entry_uid); | ||
| 1914 | |||
| 1915 | if (!acct_tag || st_entry->tag == tag) { | ||
| 1916 | rb_erase(&st_entry->sock_node, &sock_tag_tree); | ||
| 1917 | /* Can't sockfd_put() within spinlock, do it later. */ | ||
| 1918 | sock_tag_tree_insert(st_entry, &st_to_free_tree); | ||
| 1919 | tr_entry = lookup_tag_ref(st_entry->tag, NULL); | ||
| 1920 | BUG_ON(tr_entry->num_sock_tags <= 0); | ||
| 1921 | tr_entry->num_sock_tags--; | ||
| 1922 | /* | ||
| 1923 | * TODO: remove if, and start failing. | ||
| 1924 | * This is a hack to work around the fact that in some | ||
| 1925 | * places we have "if (IS_ERR_OR_NULL(pqd_entry))" | ||
| 1926 | * and are trying to work around apps | ||
| 1927 | * that didn't open the /dev/xt_qtaguid. | ||
| 1928 | */ | ||
| 1929 | if (st_entry->list.next && st_entry->list.prev) | ||
| 1930 | list_del(&st_entry->list); | ||
| 1931 | } | ||
| 1932 | } | ||
| 1933 | spin_unlock_bh(&sock_tag_list_lock); | ||
| 1934 | |||
| 1935 | sock_tag_tree_erase(&st_to_free_tree); | ||
| 1936 | |||
| 1937 | /* Delete tag counter-sets */ | ||
| 1938 | spin_lock_bh(&tag_counter_set_list_lock); | ||
| 1939 | /* Counter sets are only on the uid tag, not full tag */ | ||
| 1940 | tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag); | ||
| 1941 | if (tcs_entry) { | ||
| 1942 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | ||
| 1943 | "erase tcs: tag=0x%llx (uid=%u) set=%d\n", | ||
| 1944 | input, | ||
| 1945 | tcs_entry->tn.tag, | ||
| 1946 | get_uid_from_tag(tcs_entry->tn.tag), | ||
| 1947 | tcs_entry->active_set); | ||
| 1948 | rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree); | ||
| 1949 | kfree(tcs_entry); | ||
| 1950 | } | ||
| 1951 | spin_unlock_bh(&tag_counter_set_list_lock); | ||
| 1952 | |||
| 1953 | /* | ||
| 1954 | * If acct_tag is 0, then all entries belonging to uid are | ||
| 1955 | * erased. | ||
| 1956 | */ | ||
| 1957 | spin_lock_bh(&iface_stat_list_lock); | ||
| 1958 | list_for_each_entry(iface_entry, &iface_stat_list, list) { | ||
| 1959 | spin_lock_bh(&iface_entry->tag_stat_list_lock); | ||
| 1960 | node = rb_first(&iface_entry->tag_stat_tree); | ||
| 1961 | while (node) { | ||
| 1962 | ts_entry = rb_entry(node, struct tag_stat, tn.node); | ||
| 1963 | entry_uid = get_uid_from_tag(ts_entry->tn.tag); | ||
| 1964 | node = rb_next(node); | ||
| 1965 | |||
| 1966 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | ||
| 1967 | "ts tag=0x%llx (uid=%u)\n", | ||
| 1968 | input, ts_entry->tn.tag, entry_uid); | ||
| 1969 | |||
| 1970 | if (entry_uid != uid) | ||
| 1971 | continue; | ||
| 1972 | if (!acct_tag || ts_entry->tn.tag == tag) { | ||
| 1973 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | ||
| 1974 | "erase ts: %s 0x%llx %u\n", | ||
| 1975 | input, iface_entry->ifname, | ||
| 1976 | get_atag_from_tag(ts_entry->tn.tag), | ||
| 1977 | entry_uid); | ||
| 1978 | rb_erase(&ts_entry->tn.node, | ||
| 1979 | &iface_entry->tag_stat_tree); | ||
| 1980 | kfree(ts_entry); | ||
| 1981 | } | ||
| 1982 | } | ||
| 1983 | spin_unlock_bh(&iface_entry->tag_stat_list_lock); | ||
| 1984 | } | ||
| 1985 | spin_unlock_bh(&iface_stat_list_lock); | ||
| 1986 | |||
| 1987 | /* Cleanup the uid_tag_data */ | ||
| 1988 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
| 1989 | node = rb_first(&uid_tag_data_tree); | ||
| 1990 | while (node) { | ||
| 1991 | utd_entry = rb_entry(node, struct uid_tag_data, node); | ||
| 1992 | entry_uid = utd_entry->uid; | ||
| 1993 | node = rb_next(node); | ||
| 1994 | |||
| 1995 | CT_DEBUG("qtaguid: ctrl_delete(%s): " | ||
| 1996 | "utd uid=%u\n", | ||
| 1997 | input, entry_uid); | ||
| 1998 | |||
| 1999 | if (entry_uid != uid) | ||
| 2000 | continue; | ||
| 2001 | /* | ||
| 2002 | * Go over the tag_refs, and those that don't have | ||
| 2003 | * sock_tags using them are freed. | ||
| 2004 | */ | ||
| 2005 | put_tag_ref_tree(tag, utd_entry); | ||
| 2006 | put_utd_entry(utd_entry); | ||
| 2007 | } | ||
| 2008 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
| 2009 | |||
| 2010 | atomic64_inc(&qtu_events.delete_cmds); | ||
| 2011 | res = 0; | ||
| 2012 | |||
| 2013 | err: | ||
| 2014 | return res; | ||
| 2015 | } | ||
| 2016 | |||
| 2017 | static int ctrl_cmd_counter_set(const char *input) | ||
| 2018 | { | ||
| 2019 | char cmd; | ||
| 2020 | uid_t uid = 0; | ||
| 2021 | tag_t tag; | ||
| 2022 | int res, argc; | ||
| 2023 | struct tag_counter_set *tcs; | ||
| 2024 | int counter_set; | ||
| 2025 | |||
| 2026 | argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid); | ||
| 2027 | CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c " | ||
| 2028 | "set=%d uid=%u\n", input, argc, cmd, | ||
| 2029 | counter_set, uid); | ||
| 2030 | if (argc != 3) { | ||
| 2031 | res = -EINVAL; | ||
| 2032 | goto err; | ||
| 2033 | } | ||
| 2034 | if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) { | ||
| 2035 | pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n", | ||
| 2036 | input); | ||
| 2037 | res = -EINVAL; | ||
| 2038 | goto err; | ||
| 2039 | } | ||
| 2040 | if (!can_manipulate_uids()) { | ||
| 2041 | pr_info("qtaguid: ctrl_counterset(%s): " | ||
| 2042 | "insufficient priv from pid=%u tgid=%u uid=%u\n", | ||
| 2043 | input, current->pid, current->tgid, current_fsuid()); | ||
| 2044 | res = -EPERM; | ||
| 2045 | goto err; | ||
| 2046 | } | ||
| 2047 | |||
| 2048 | tag = make_tag_from_uid(uid); | ||
| 2049 | spin_lock_bh(&tag_counter_set_list_lock); | ||
| 2050 | tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag); | ||
| 2051 | if (!tcs) { | ||
| 2052 | tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC); | ||
| 2053 | if (!tcs) { | ||
| 2054 | spin_unlock_bh(&tag_counter_set_list_lock); | ||
| 2055 | pr_err("qtaguid: ctrl_counterset(%s): " | ||
| 2056 | "failed to alloc counter set\n", | ||
| 2057 | input); | ||
| 2058 | res = -ENOMEM; | ||
| 2059 | goto err; | ||
| 2060 | } | ||
| 2061 | tcs->tn.tag = tag; | ||
| 2062 | tag_counter_set_tree_insert(tcs, &tag_counter_set_tree); | ||
| 2063 | CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx " | ||
| 2064 | "(uid=%u) set=%d\n", | ||
| 2065 | input, tag, get_uid_from_tag(tag), counter_set); | ||
| 2066 | } | ||
| 2067 | tcs->active_set = counter_set; | ||
| 2068 | spin_unlock_bh(&tag_counter_set_list_lock); | ||
| 2069 | atomic64_inc(&qtu_events.counter_set_changes); | ||
| 2070 | res = 0; | ||
| 2071 | |||
| 2072 | err: | ||
| 2073 | return res; | ||
| 2074 | } | ||
| 2075 | |||
| 2076 | static int ctrl_cmd_tag(const char *input) | ||
| 2077 | { | ||
| 2078 | char cmd; | ||
| 2079 | int sock_fd = 0; | ||
| 2080 | uid_t uid = 0; | ||
| 2081 | tag_t acct_tag = make_atag_from_value(0); | ||
| 2082 | tag_t full_tag; | ||
| 2083 | struct socket *el_socket; | ||
| 2084 | int res, argc; | ||
| 2085 | struct sock_tag *sock_tag_entry; | ||
| 2086 | struct tag_ref *tag_ref_entry; | ||
| 2087 | struct uid_tag_data *uid_tag_data_entry; | ||
| 2088 | struct proc_qtu_data *pqd_entry; | ||
| 2089 | |||
| 2090 | /* Unassigned args will get defaulted later. */ | ||
| 2091 | argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid); | ||
| 2092 | CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d " | ||
| 2093 | "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd, | ||
| 2094 | acct_tag, uid); | ||
| 2095 | if (argc < 2) { | ||
| 2096 | res = -EINVAL; | ||
| 2097 | goto err; | ||
| 2098 | } | ||
| 2099 | el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ | ||
| 2100 | if (!el_socket) { | ||
| 2101 | pr_info("qtaguid: ctrl_tag(%s): failed to lookup" | ||
| 2102 | " sock_fd=%d err=%d\n", input, sock_fd, res); | ||
| 2103 | goto err; | ||
| 2104 | } | ||
| 2105 | CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n", | ||
| 2106 | input, atomic_long_read(&el_socket->file->f_count), | ||
| 2107 | el_socket->sk); | ||
| 2108 | if (argc < 3) { | ||
| 2109 | acct_tag = make_atag_from_value(0); | ||
| 2110 | } else if (!valid_atag(acct_tag)) { | ||
| 2111 | pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input); | ||
| 2112 | res = -EINVAL; | ||
| 2113 | goto err_put; | ||
| 2114 | } | ||
| 2115 | CT_DEBUG("qtaguid: ctrl_tag(%s): " | ||
| 2116 | "pid=%u tgid=%u uid=%u euid=%u fsuid=%u " | ||
| 2117 | "in_group=%d in_egroup=%d\n", | ||
| 2118 | input, current->pid, current->tgid, current_uid(), | ||
| 2119 | current_euid(), current_fsuid(), | ||
| 2120 | in_group_p(proc_ctrl_write_gid), | ||
| 2121 | in_egroup_p(proc_ctrl_write_gid)); | ||
| 2122 | if (argc < 4) { | ||
| 2123 | uid = current_fsuid(); | ||
| 2124 | } else if (!can_impersonate_uid(uid)) { | ||
| 2125 | pr_info("qtaguid: ctrl_tag(%s): " | ||
| 2126 | "insufficient priv from pid=%u tgid=%u uid=%u\n", | ||
| 2127 | input, current->pid, current->tgid, current_fsuid()); | ||
| 2128 | res = -EPERM; | ||
| 2129 | goto err_put; | ||
| 2130 | } | ||
| 2131 | full_tag = combine_atag_with_uid(acct_tag, uid); | ||
| 2132 | |||
| 2133 | spin_lock_bh(&sock_tag_list_lock); | ||
| 2134 | sock_tag_entry = get_sock_stat_nl(el_socket->sk); | ||
| 2135 | tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry); | ||
| 2136 | if (IS_ERR(tag_ref_entry)) { | ||
| 2137 | res = PTR_ERR(tag_ref_entry); | ||
| 2138 | spin_unlock_bh(&sock_tag_list_lock); | ||
| 2139 | goto err_put; | ||
| 2140 | } | ||
| 2141 | tag_ref_entry->num_sock_tags++; | ||
| 2142 | if (sock_tag_entry) { | ||
| 2143 | struct tag_ref *prev_tag_ref_entry; | ||
| 2144 | |||
| 2145 | CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p " | ||
| 2146 | "st@%p ...->f_count=%ld\n", | ||
| 2147 | input, el_socket->sk, sock_tag_entry, | ||
| 2148 | atomic_long_read(&el_socket->file->f_count)); | ||
| 2149 | /* | ||
| 2150 | * This is a re-tagging, so release the sock_fd that was | ||
| 2151 | * locked at the time of the 1st tagging. | ||
| 2152 | * There is still the ref from this call's sockfd_lookup() so | ||
| 2153 | * it can be done within the spinlock. | ||
| 2154 | */ | ||
| 2155 | sockfd_put(sock_tag_entry->socket); | ||
| 2156 | prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, | ||
| 2157 | &uid_tag_data_entry); | ||
| 2158 | BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry)); | ||
| 2159 | BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0); | ||
| 2160 | prev_tag_ref_entry->num_sock_tags--; | ||
| 2161 | sock_tag_entry->tag = full_tag; | ||
| 2162 | } else { | ||
| 2163 | CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n", | ||
| 2164 | input, el_socket->sk); | ||
| 2165 | sock_tag_entry = kzalloc(sizeof(*sock_tag_entry), | ||
| 2166 | GFP_ATOMIC); | ||
| 2167 | if (!sock_tag_entry) { | ||
| 2168 | pr_err("qtaguid: ctrl_tag(%s): " | ||
| 2169 | "socket tag alloc failed\n", | ||
| 2170 | input); | ||
| 2171 | spin_unlock_bh(&sock_tag_list_lock); | ||
| 2172 | res = -ENOMEM; | ||
| 2173 | goto err_tag_unref_put; | ||
| 2174 | } | ||
| 2175 | sock_tag_entry->sk = el_socket->sk; | ||
| 2176 | sock_tag_entry->socket = el_socket; | ||
| 2177 | sock_tag_entry->pid = current->tgid; | ||
| 2178 | sock_tag_entry->tag = combine_atag_with_uid(acct_tag, | ||
| 2179 | uid); | ||
| 2180 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
| 2181 | pqd_entry = proc_qtu_data_tree_search( | ||
| 2182 | &proc_qtu_data_tree, current->tgid); | ||
| 2183 | /* | ||
| 2184 | * TODO: remove if, and start failing. | ||
| 2185 | * At first, we want to catch user-space code that is not | ||
| 2186 | * opening the /dev/xt_qtaguid. | ||
| 2187 | */ | ||
| 2188 | if (IS_ERR_OR_NULL(pqd_entry)) | ||
| 2189 | pr_warn_once( | ||
| 2190 | "qtaguid: %s(): " | ||
| 2191 | "User space forgot to open /dev/xt_qtaguid? " | ||
| 2192 | "pid=%u tgid=%u uid=%u\n", __func__, | ||
| 2193 | current->pid, current->tgid, | ||
| 2194 | current_fsuid()); | ||
| 2195 | else | ||
| 2196 | list_add(&sock_tag_entry->list, | ||
| 2197 | &pqd_entry->sock_tag_list); | ||
| 2198 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
| 2199 | |||
| 2200 | sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree); | ||
| 2201 | atomic64_inc(&qtu_events.sockets_tagged); | ||
| 2202 | } | ||
| 2203 | spin_unlock_bh(&sock_tag_list_lock); | ||
| 2204 | /* We keep the ref to the socket (file) until it is untagged */ | ||
| 2205 | CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n", | ||
| 2206 | input, sock_tag_entry, | ||
| 2207 | atomic_long_read(&el_socket->file->f_count)); | ||
| 2208 | return 0; | ||
| 2209 | |||
| 2210 | err_tag_unref_put: | ||
| 2211 | BUG_ON(tag_ref_entry->num_sock_tags <= 0); | ||
| 2212 | tag_ref_entry->num_sock_tags--; | ||
| 2213 | free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry); | ||
| 2214 | err_put: | ||
| 2215 | CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n", | ||
| 2216 | input, atomic_long_read(&el_socket->file->f_count) - 1); | ||
| 2217 | /* Release the sock_fd that was grabbed by sockfd_lookup(). */ | ||
| 2218 | sockfd_put(el_socket); | ||
| 2219 | return res; | ||
| 2220 | |||
| 2221 | err: | ||
| 2222 | CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input); | ||
| 2223 | return res; | ||
| 2224 | } | ||
| 2225 | |||
| 2226 | static int ctrl_cmd_untag(const char *input) | ||
| 2227 | { | ||
| 2228 | char cmd; | ||
| 2229 | int sock_fd = 0; | ||
| 2230 | struct socket *el_socket; | ||
| 2231 | int res, argc; | ||
| 2232 | struct sock_tag *sock_tag_entry; | ||
| 2233 | struct tag_ref *tag_ref_entry; | ||
| 2234 | struct uid_tag_data *utd_entry; | ||
| 2235 | struct proc_qtu_data *pqd_entry; | ||
| 2236 | |||
| 2237 | argc = sscanf(input, "%c %d", &cmd, &sock_fd); | ||
| 2238 | CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n", | ||
| 2239 | input, argc, cmd, sock_fd); | ||
| 2240 | if (argc < 2) { | ||
| 2241 | res = -EINVAL; | ||
| 2242 | goto err; | ||
| 2243 | } | ||
| 2244 | el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */ | ||
| 2245 | if (!el_socket) { | ||
| 2246 | pr_info("qtaguid: ctrl_untag(%s): failed to lookup" | ||
| 2247 | " sock_fd=%d err=%d\n", input, sock_fd, res); | ||
| 2248 | goto err; | ||
| 2249 | } | ||
| 2250 | CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n", | ||
| 2251 | input, atomic_long_read(&el_socket->file->f_count), | ||
| 2252 | el_socket->sk); | ||
| 2253 | spin_lock_bh(&sock_tag_list_lock); | ||
| 2254 | sock_tag_entry = get_sock_stat_nl(el_socket->sk); | ||
| 2255 | if (!sock_tag_entry) { | ||
| 2256 | spin_unlock_bh(&sock_tag_list_lock); | ||
| 2257 | res = -EINVAL; | ||
| 2258 | goto err_put; | ||
| 2259 | } | ||
| 2260 | /* | ||
| 2261 | * The socket already belongs to the current process | ||
| 2262 | * so it can do whatever it wants to it. | ||
| 2263 | */ | ||
| 2264 | rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree); | ||
| 2265 | |||
| 2266 | tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry); | ||
| 2267 | BUG_ON(!tag_ref_entry); | ||
| 2268 | BUG_ON(tag_ref_entry->num_sock_tags <= 0); | ||
| 2269 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
| 2270 | pqd_entry = proc_qtu_data_tree_search( | ||
| 2271 | &proc_qtu_data_tree, current->tgid); | ||
| 2272 | /* | ||
| 2273 | * TODO: remove if, and start failing. | ||
| 2274 | * At first, we want to catch user-space code that is not | ||
| 2275 | * opening the /dev/xt_qtaguid. | ||
| 2276 | */ | ||
| 2277 | if (IS_ERR_OR_NULL(pqd_entry)) | ||
| 2278 | pr_warn_once("qtaguid: %s(): " | ||
| 2279 | "User space forgot to open /dev/xt_qtaguid? " | ||
| 2280 | "pid=%u tgid=%u uid=%u\n", __func__, | ||
| 2281 | current->pid, current->tgid, current_fsuid()); | ||
| 2282 | else | ||
| 2283 | list_del(&sock_tag_entry->list); | ||
| 2284 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
| 2285 | /* | ||
| 2286 | * We don't free tag_ref from the utd_entry here, | ||
| 2287 | * only during a cmd_delete(). | ||
| 2288 | */ | ||
| 2289 | tag_ref_entry->num_sock_tags--; | ||
| 2290 | spin_unlock_bh(&sock_tag_list_lock); | ||
| 2291 | /* | ||
| 2292 | * Release the sock_fd that was grabbed at tag time, | ||
| 2293 | * and once more for the sockfd_lookup() here. | ||
| 2294 | */ | ||
| 2295 | sockfd_put(sock_tag_entry->socket); | ||
| 2296 | CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n", | ||
| 2297 | input, sock_tag_entry, | ||
| 2298 | atomic_long_read(&el_socket->file->f_count) - 1); | ||
| 2299 | sockfd_put(el_socket); | ||
| 2300 | |||
| 2301 | kfree(sock_tag_entry); | ||
| 2302 | atomic64_inc(&qtu_events.sockets_untagged); | ||
| 2303 | |||
| 2304 | return 0; | ||
| 2305 | |||
| 2306 | err_put: | ||
| 2307 | CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n", | ||
| 2308 | input, atomic_long_read(&el_socket->file->f_count) - 1); | ||
| 2309 | /* Release the sock_fd that was grabbed by sockfd_lookup(). */ | ||
| 2310 | sockfd_put(el_socket); | ||
| 2311 | return res; | ||
| 2312 | |||
| 2313 | err: | ||
| 2314 | CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input); | ||
| 2315 | return res; | ||
| 2316 | } | ||
| 2317 | |||
| 2318 | static int qtaguid_ctrl_parse(const char *input, int count) | ||
| 2319 | { | ||
| 2320 | char cmd; | ||
| 2321 | int res; | ||
| 2322 | |||
| 2323 | cmd = input[0]; | ||
| 2324 | /* Collect params for commands */ | ||
| 2325 | switch (cmd) { | ||
| 2326 | case 'd': | ||
| 2327 | res = ctrl_cmd_delete(input); | ||
| 2328 | break; | ||
| 2329 | |||
| 2330 | case 's': | ||
| 2331 | res = ctrl_cmd_counter_set(input); | ||
| 2332 | break; | ||
| 2333 | |||
| 2334 | case 't': | ||
| 2335 | res = ctrl_cmd_tag(input); | ||
| 2336 | break; | ||
| 2337 | |||
| 2338 | case 'u': | ||
| 2339 | res = ctrl_cmd_untag(input); | ||
| 2340 | break; | ||
| 2341 | |||
| 2342 | default: | ||
| 2343 | res = -EINVAL; | ||
| 2344 | goto err; | ||
| 2345 | } | ||
| 2346 | if (!res) | ||
| 2347 | res = count; | ||
| 2348 | err: | ||
| 2349 | CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res); | ||
| 2350 | return res; | ||
| 2351 | } | ||
| 2352 | |||
| 2353 | #define MAX_QTAGUID_CTRL_INPUT_LEN 255 | ||
| 2354 | static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer, | ||
| 2355 | unsigned long count, void *data) | ||
| 2356 | { | ||
| 2357 | char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN]; | ||
| 2358 | |||
| 2359 | if (unlikely(module_passive)) | ||
| 2360 | return count; | ||
| 2361 | |||
| 2362 | if (count >= MAX_QTAGUID_CTRL_INPUT_LEN) | ||
| 2363 | return -EINVAL; | ||
| 2364 | |||
| 2365 | if (copy_from_user(input_buf, buffer, count)) | ||
| 2366 | return -EFAULT; | ||
| 2367 | |||
| 2368 | input_buf[count] = '\0'; | ||
| 2369 | return qtaguid_ctrl_parse(input_buf, count); | ||
| 2370 | } | ||
| 2371 | |||
| 2372 | struct proc_print_info { | ||
| 2373 | char *outp; | ||
| 2374 | char **num_items_returned; | ||
| 2375 | struct iface_stat *iface_entry; | ||
| 2376 | struct tag_stat *ts_entry; | ||
| 2377 | int item_index; | ||
| 2378 | int items_to_skip; | ||
| 2379 | int char_count; | ||
| 2380 | }; | ||
| 2381 | |||
| 2382 | static int pp_stats_line(struct proc_print_info *ppi, int cnt_set) | ||
| 2383 | { | ||
| 2384 | int len; | ||
| 2385 | struct data_counters *cnts; | ||
| 2386 | |||
| 2387 | if (!ppi->item_index) { | ||
| 2388 | if (ppi->item_index++ < ppi->items_to_skip) | ||
| 2389 | return 0; | ||
| 2390 | len = snprintf(ppi->outp, ppi->char_count, | ||
| 2391 | "idx iface acct_tag_hex uid_tag_int cnt_set " | ||
| 2392 | "rx_bytes rx_packets " | ||
| 2393 | "tx_bytes tx_packets " | ||
| 2394 | "rx_tcp_bytes rx_tcp_packets " | ||
| 2395 | "rx_udp_bytes rx_udp_packets " | ||
| 2396 | "rx_other_bytes rx_other_packets " | ||
| 2397 | "tx_tcp_bytes tx_tcp_packets " | ||
| 2398 | "tx_udp_bytes tx_udp_packets " | ||
| 2399 | "tx_other_bytes tx_other_packets\n"); | ||
| 2400 | } else { | ||
| 2401 | tag_t tag = ppi->ts_entry->tn.tag; | ||
| 2402 | uid_t stat_uid = get_uid_from_tag(tag); | ||
| 2403 | |||
| 2404 | if (!can_read_other_uid_stats(stat_uid)) { | ||
| 2405 | CT_DEBUG("qtaguid: stats line: " | ||
| 2406 | "%s 0x%llx %u: insufficient priv " | ||
| 2407 | "from pid=%u tgid=%u uid=%u\n", | ||
| 2408 | ppi->iface_entry->ifname, | ||
| 2409 | get_atag_from_tag(tag), stat_uid, | ||
| 2410 | current->pid, current->tgid, current_fsuid()); | ||
| 2411 | return 0; | ||
| 2412 | } | ||
| 2413 | if (ppi->item_index++ < ppi->items_to_skip) | ||
| 2414 | return 0; | ||
| 2415 | cnts = &ppi->ts_entry->counters; | ||
| 2416 | len = snprintf( | ||
| 2417 | ppi->outp, ppi->char_count, | ||
| 2418 | "%d %s 0x%llx %u %u " | ||
| 2419 | "%llu %llu " | ||
| 2420 | "%llu %llu " | ||
| 2421 | "%llu %llu " | ||
| 2422 | "%llu %llu " | ||
| 2423 | "%llu %llu " | ||
| 2424 | "%llu %llu " | ||
| 2425 | "%llu %llu " | ||
| 2426 | "%llu %llu\n", | ||
| 2427 | ppi->item_index, | ||
| 2428 | ppi->iface_entry->ifname, | ||
| 2429 | get_atag_from_tag(tag), | ||
| 2430 | stat_uid, | ||
| 2431 | cnt_set, | ||
| 2432 | dc_sum_bytes(cnts, cnt_set, IFS_RX), | ||
| 2433 | dc_sum_packets(cnts, cnt_set, IFS_RX), | ||
| 2434 | dc_sum_bytes(cnts, cnt_set, IFS_TX), | ||
| 2435 | dc_sum_packets(cnts, cnt_set, IFS_TX), | ||
| 2436 | cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes, | ||
| 2437 | cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets, | ||
| 2438 | cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes, | ||
| 2439 | cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets, | ||
| 2440 | cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes, | ||
| 2441 | cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets, | ||
| 2442 | cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes, | ||
| 2443 | cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets, | ||
| 2444 | cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes, | ||
| 2445 | cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets, | ||
| 2446 | cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes, | ||
| 2447 | cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets); | ||
| 2448 | } | ||
| 2449 | return len; | ||
| 2450 | } | ||
| 2451 | |||
| 2452 | static bool pp_sets(struct proc_print_info *ppi) | ||
| 2453 | { | ||
| 2454 | int len; | ||
| 2455 | int counter_set; | ||
| 2456 | for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS; | ||
| 2457 | counter_set++) { | ||
| 2458 | len = pp_stats_line(ppi, counter_set); | ||
| 2459 | if (len >= ppi->char_count) { | ||
| 2460 | *ppi->outp = '\0'; | ||
| 2461 | return false; | ||
| 2462 | } | ||
| 2463 | if (len) { | ||
| 2464 | ppi->outp += len; | ||
| 2465 | ppi->char_count -= len; | ||
| 2466 | (*ppi->num_items_returned)++; | ||
| 2467 | } | ||
| 2468 | } | ||
| 2469 | return true; | ||
| 2470 | } | ||
| 2471 | |||
| 2472 | /* | ||
| 2473 | * Procfs reader to get all tag stats using style "1)" as described in | ||
| 2474 | * fs/proc/generic.c | ||
| 2475 | * Groups all protocols tx/rx bytes. | ||
| 2476 | */ | ||
| 2477 | static int qtaguid_stats_proc_read(char *page, char **num_items_returned, | ||
| 2478 | off_t items_to_skip, int char_count, int *eof, | ||
| 2479 | void *data) | ||
| 2480 | { | ||
| 2481 | struct proc_print_info ppi; | ||
| 2482 | int len; | ||
| 2483 | |||
| 2484 | ppi.outp = page; | ||
| 2485 | ppi.item_index = 0; | ||
| 2486 | ppi.char_count = char_count; | ||
| 2487 | ppi.num_items_returned = num_items_returned; | ||
| 2488 | ppi.items_to_skip = items_to_skip; | ||
| 2489 | |||
| 2490 | if (unlikely(module_passive)) { | ||
| 2491 | len = pp_stats_line(&ppi, 0); | ||
| 2492 | /* The header should always be shorter than the buffer. */ | ||
| 2493 | BUG_ON(len >= ppi.char_count); | ||
| 2494 | (*num_items_returned)++; | ||
| 2495 | *eof = 1; | ||
| 2496 | return len; | ||
| 2497 | } | ||
| 2498 | |||
| 2499 | CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld " | ||
| 2500 | "char_count=%d *eof=%d\n", page, *num_items_returned, | ||
| 2501 | items_to_skip, char_count, *eof); | ||
| 2502 | |||
| 2503 | if (*eof) | ||
| 2504 | return 0; | ||
| 2505 | |||
| 2506 | /* The idx is there to help debug when things go belly up. */ | ||
| 2507 | len = pp_stats_line(&ppi, 0); | ||
| 2508 | /* Don't advance the outp unless the whole line was printed */ | ||
| 2509 | if (len >= ppi.char_count) { | ||
| 2510 | *ppi.outp = '\0'; | ||
| 2511 | return ppi.outp - page; | ||
| 2512 | } | ||
| 2513 | if (len) { | ||
| 2514 | ppi.outp += len; | ||
| 2515 | ppi.char_count -= len; | ||
| 2516 | (*num_items_returned)++; | ||
| 2517 | } | ||
| 2518 | |||
| 2519 | spin_lock_bh(&iface_stat_list_lock); | ||
| 2520 | list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) { | ||
| 2521 | struct rb_node *node; | ||
| 2522 | spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock); | ||
| 2523 | for (node = rb_first(&ppi.iface_entry->tag_stat_tree); | ||
| 2524 | node; | ||
| 2525 | node = rb_next(node)) { | ||
| 2526 | ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node); | ||
| 2527 | if (!pp_sets(&ppi)) { | ||
| 2528 | spin_unlock_bh( | ||
| 2529 | &ppi.iface_entry->tag_stat_list_lock); | ||
| 2530 | spin_unlock_bh(&iface_stat_list_lock); | ||
| 2531 | return ppi.outp - page; | ||
| 2532 | } | ||
| 2533 | } | ||
| 2534 | spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock); | ||
| 2535 | } | ||
| 2536 | spin_unlock_bh(&iface_stat_list_lock); | ||
| 2537 | |||
| 2538 | *eof = 1; | ||
| 2539 | return ppi.outp - page; | ||
| 2540 | } | ||
| 2541 | |||
| 2542 | /*------------------------------------------*/ | ||
| 2543 | static int qtudev_open(struct inode *inode, struct file *file) | ||
| 2544 | { | ||
| 2545 | struct uid_tag_data *utd_entry; | ||
| 2546 | struct proc_qtu_data *pqd_entry; | ||
| 2547 | struct proc_qtu_data *new_pqd_entry; | ||
| 2548 | int res; | ||
| 2549 | bool utd_entry_found; | ||
| 2550 | |||
| 2551 | if (unlikely(qtu_proc_handling_passive)) | ||
| 2552 | return 0; | ||
| 2553 | |||
| 2554 | DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n", | ||
| 2555 | current->pid, current->tgid, current_fsuid()); | ||
| 2556 | |||
| 2557 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
| 2558 | |||
| 2559 | /* Look for existing uid data, or alloc one. */ | ||
| 2560 | utd_entry = get_uid_data(current_fsuid(), &utd_entry_found); | ||
| 2561 | if (IS_ERR_OR_NULL(utd_entry)) { | ||
| 2562 | res = PTR_ERR(utd_entry); | ||
| 2563 | goto err; | ||
| 2564 | } | ||
| 2565 | |||
| 2566 | /* Look for existing PID based proc_data */ | ||
| 2567 | pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree, | ||
| 2568 | current->tgid); | ||
| 2569 | if (pqd_entry) { | ||
| 2570 | pr_err("qtaguid: qtudev_open(): %u/%u %u " | ||
| 2571 | "%s already opened\n", | ||
| 2572 | current->pid, current->tgid, current_fsuid(), | ||
| 2573 | QTU_DEV_NAME); | ||
| 2574 | res = -EBUSY; | ||
| 2575 | goto err_unlock_free_utd; | ||
| 2576 | } | ||
| 2577 | |||
| 2578 | new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC); | ||
| 2579 | if (!new_pqd_entry) { | ||
| 2580 | pr_err("qtaguid: qtudev_open(): %u/%u %u: " | ||
| 2581 | "proc data alloc failed\n", | ||
| 2582 | current->pid, current->tgid, current_fsuid()); | ||
| 2583 | res = -ENOMEM; | ||
| 2584 | goto err_unlock_free_utd; | ||
| 2585 | } | ||
| 2586 | new_pqd_entry->pid = current->tgid; | ||
| 2587 | INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list); | ||
| 2588 | new_pqd_entry->parent_tag_data = utd_entry; | ||
| 2589 | utd_entry->num_pqd++; | ||
| 2590 | |||
| 2591 | proc_qtu_data_tree_insert(new_pqd_entry, | ||
| 2592 | &proc_qtu_data_tree); | ||
| 2593 | |||
| 2594 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
| 2595 | DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n", | ||
| 2596 | current_fsuid(), new_pqd_entry); | ||
| 2597 | file->private_data = new_pqd_entry; | ||
| 2598 | return 0; | ||
| 2599 | |||
| 2600 | err_unlock_free_utd: | ||
| 2601 | if (!utd_entry_found) { | ||
| 2602 | rb_erase(&utd_entry->node, &uid_tag_data_tree); | ||
| 2603 | kfree(utd_entry); | ||
| 2604 | } | ||
| 2605 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
| 2606 | err: | ||
| 2607 | return res; | ||
| 2608 | } | ||
| 2609 | |||
| 2610 | static int qtudev_release(struct inode *inode, struct file *file) | ||
| 2611 | { | ||
| 2612 | struct proc_qtu_data *pqd_entry = file->private_data; | ||
| 2613 | struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data; | ||
| 2614 | struct sock_tag *st_entry; | ||
| 2615 | struct rb_root st_to_free_tree = RB_ROOT; | ||
| 2616 | struct list_head *entry, *next; | ||
| 2617 | struct tag_ref *tr; | ||
| 2618 | |||
| 2619 | if (unlikely(qtu_proc_handling_passive)) | ||
| 2620 | return 0; | ||
| 2621 | |||
| 2622 | /* | ||
| 2623 | * Do not trust the current->pid, it might just be a kworker cleaning | ||
| 2624 | * up after a dead proc. | ||
| 2625 | */ | ||
| 2626 | DR_DEBUG("qtaguid: qtudev_release(): " | ||
| 2627 | "pid=%u tgid=%u uid=%u " | ||
| 2628 | "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n", | ||
| 2629 | current->pid, current->tgid, pqd_entry->parent_tag_data->uid, | ||
| 2630 | pqd_entry, pqd_entry->pid, utd_entry, | ||
| 2631 | utd_entry->num_active_tags); | ||
| 2632 | |||
| 2633 | spin_lock_bh(&sock_tag_list_lock); | ||
| 2634 | spin_lock_bh(&uid_tag_data_tree_lock); | ||
| 2635 | |||
| 2636 | list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) { | ||
| 2637 | st_entry = list_entry(entry, struct sock_tag, list); | ||
| 2638 | DR_DEBUG("qtaguid: %s(): " | ||
| 2639 | "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n", | ||
| 2640 | __func__, | ||
| 2641 | st_entry, st_entry->sk, | ||
| 2642 | current->pid, current->tgid, | ||
| 2643 | pqd_entry->parent_tag_data->uid); | ||
| 2644 | |||
| 2645 | utd_entry = uid_tag_data_tree_search( | ||
| 2646 | &uid_tag_data_tree, | ||
| 2647 | get_uid_from_tag(st_entry->tag)); | ||
| 2648 | BUG_ON(IS_ERR_OR_NULL(utd_entry)); | ||
| 2649 | DR_DEBUG("qtaguid: %s(): " | ||
| 2650 | "looking for tag=0x%llx in utd_entry=%p\n", __func__, | ||
| 2651 | st_entry->tag, utd_entry); | ||
| 2652 | tr = tag_ref_tree_search(&utd_entry->tag_ref_tree, | ||
| 2653 | st_entry->tag); | ||
| 2654 | BUG_ON(!tr); | ||
| 2655 | BUG_ON(tr->num_sock_tags <= 0); | ||
| 2656 | tr->num_sock_tags--; | ||
| 2657 | free_tag_ref_from_utd_entry(tr, utd_entry); | ||
| 2658 | |||
| 2659 | rb_erase(&st_entry->sock_node, &sock_tag_tree); | ||
| 2660 | list_del(&st_entry->list); | ||
| 2661 | /* Can't sockfd_put() within spinlock, do it later. */ | ||
| 2662 | sock_tag_tree_insert(st_entry, &st_to_free_tree); | ||
| 2663 | |||
| 2664 | /* | ||
| 2665 | * Try to free the utd_entry if no other proc_qtu_data is | ||
| 2666 | * using it (num_pqd is 0) and it doesn't have active tags | ||
| 2667 | * (num_active_tags is 0). | ||
| 2668 | */ | ||
| 2669 | put_utd_entry(utd_entry); | ||
| 2670 | } | ||
| 2671 | |||
| 2672 | rb_erase(&pqd_entry->node, &proc_qtu_data_tree); | ||
| 2673 | BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1); | ||
| 2674 | pqd_entry->parent_tag_data->num_pqd--; | ||
| 2675 | put_utd_entry(pqd_entry->parent_tag_data); | ||
| 2676 | kfree(pqd_entry); | ||
| 2677 | file->private_data = NULL; | ||
| 2678 | |||
| 2679 | spin_unlock_bh(&uid_tag_data_tree_lock); | ||
| 2680 | spin_unlock_bh(&sock_tag_list_lock); | ||
| 2681 | |||
| 2682 | |||
| 2683 | sock_tag_tree_erase(&st_to_free_tree); | ||
| 2684 | |||
| 2685 | prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__, | ||
| 2686 | current->pid, current->tgid); | ||
| 2687 | return 0; | ||
| 2688 | } | ||
| 2689 | |||
| 2690 | /*------------------------------------------*/ | ||
| 2691 | static const struct file_operations qtudev_fops = { | ||
| 2692 | .owner = THIS_MODULE, | ||
| 2693 | .open = qtudev_open, | ||
| 2694 | .release = qtudev_release, | ||
| 2695 | }; | ||
| 2696 | |||
| 2697 | static struct miscdevice qtu_device = { | ||
| 2698 | .minor = MISC_DYNAMIC_MINOR, | ||
| 2699 | .name = QTU_DEV_NAME, | ||
| 2700 | .fops = &qtudev_fops, | ||
| 2701 | /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */ | ||
| 2702 | }; | ||
| 2703 | |||
| 2704 | /*------------------------------------------*/ | ||
| 2705 | static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir) | ||
| 2706 | { | ||
| 2707 | int ret; | ||
| 2708 | *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net); | ||
| 2709 | if (!*res_procdir) { | ||
| 2710 | pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n"); | ||
| 2711 | ret = -ENOMEM; | ||
| 2712 | goto no_dir; | ||
| 2713 | } | ||
| 2714 | |||
| 2715 | xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms, | ||
| 2716 | *res_procdir); | ||
| 2717 | if (!xt_qtaguid_ctrl_file) { | ||
| 2718 | pr_err("qtaguid: failed to create xt_qtaguid/ctrl " | ||
| 2719 | " file\n"); | ||
| 2720 | ret = -ENOMEM; | ||
| 2721 | goto no_ctrl_entry; | ||
| 2722 | } | ||
| 2723 | xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read; | ||
| 2724 | xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write; | ||
| 2725 | |||
| 2726 | xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms, | ||
| 2727 | *res_procdir); | ||
| 2728 | if (!xt_qtaguid_stats_file) { | ||
| 2729 | pr_err("qtaguid: failed to create xt_qtaguid/stats " | ||
| 2730 | "file\n"); | ||
| 2731 | ret = -ENOMEM; | ||
| 2732 | goto no_stats_entry; | ||
| 2733 | } | ||
| 2734 | xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read; | ||
| 2735 | /* | ||
| 2736 | * TODO: add support counter hacking | ||
| 2737 | * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write; | ||
| 2738 | */ | ||
| 2739 | return 0; | ||
| 2740 | |||
| 2741 | no_stats_entry: | ||
| 2742 | remove_proc_entry("ctrl", *res_procdir); | ||
| 2743 | no_ctrl_entry: | ||
| 2744 | remove_proc_entry("xt_qtaguid", NULL); | ||
| 2745 | no_dir: | ||
| 2746 | return ret; | ||
| 2747 | } | ||
| 2748 | |||
| 2749 | static struct xt_match qtaguid_mt_reg __read_mostly = { | ||
| 2750 | /* | ||
| 2751 | * This module masquerades as the "owner" module so that iptables | ||
| 2752 | * tools can deal with it. | ||
| 2753 | */ | ||
| 2754 | .name = "owner", | ||
| 2755 | .revision = 1, | ||
| 2756 | .family = NFPROTO_UNSPEC, | ||
| 2757 | .match = qtaguid_mt, | ||
| 2758 | .matchsize = sizeof(struct xt_qtaguid_match_info), | ||
| 2759 | .me = THIS_MODULE, | ||
| 2760 | }; | ||
| 2761 | |||
| 2762 | static int __init qtaguid_mt_init(void) | ||
| 2763 | { | ||
| 2764 | if (qtaguid_proc_register(&xt_qtaguid_procdir) | ||
| 2765 | || iface_stat_init(xt_qtaguid_procdir) | ||
| 2766 | || xt_register_match(&qtaguid_mt_reg) | ||
| 2767 | || misc_register(&qtu_device)) | ||
| 2768 | return -1; | ||
| 2769 | return 0; | ||
| 2770 | } | ||
| 2771 | |||
| 2772 | /* | ||
| 2773 | * TODO: allow unloading of the module. | ||
| 2774 | * For now stats are permanent. | ||
| 2775 | * Kconfig forces'y/n' and never an 'm'. | ||
| 2776 | */ | ||
| 2777 | |||
| 2778 | module_init(qtaguid_mt_init); | ||
| 2779 | MODULE_AUTHOR("jpa <jpa@google.com>"); | ||
| 2780 | MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats"); | ||
| 2781 | MODULE_LICENSE("GPL"); | ||
| 2782 | MODULE_ALIAS("ipt_owner"); | ||
| 2783 | MODULE_ALIAS("ip6t_owner"); | ||
| 2784 | MODULE_ALIAS("ipt_qtaguid"); | ||
| 2785 | MODULE_ALIAS("ip6t_qtaguid"); | ||
diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h new file mode 100644 index 00000000000..02479d6d317 --- /dev/null +++ b/net/netfilter/xt_qtaguid_internal.h | |||
| @@ -0,0 +1,330 @@ | |||
| 1 | /* | ||
| 2 | * Kernel iptables module to track stats for packets based on user tags. | ||
| 3 | * | ||
| 4 | * (C) 2011 Google, Inc | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License version 2 as | ||
| 8 | * published by the Free Software Foundation. | ||
| 9 | */ | ||
| 10 | #ifndef __XT_QTAGUID_INTERNAL_H__ | ||
| 11 | #define __XT_QTAGUID_INTERNAL_H__ | ||
| 12 | |||
| 13 | #include <linux/types.h> | ||
| 14 | #include <linux/rbtree.h> | ||
| 15 | #include <linux/spinlock_types.h> | ||
| 16 | #include <linux/workqueue.h> | ||
| 17 | |||
| 18 | /* Iface handling */ | ||
| 19 | #define IDEBUG_MASK (1<<0) | ||
| 20 | /* Iptable Matching. Per packet. */ | ||
| 21 | #define MDEBUG_MASK (1<<1) | ||
| 22 | /* Red-black tree handling. Per packet. */ | ||
| 23 | #define RDEBUG_MASK (1<<2) | ||
| 24 | /* procfs ctrl/stats handling */ | ||
| 25 | #define CDEBUG_MASK (1<<3) | ||
| 26 | /* dev and resource tracking */ | ||
| 27 | #define DDEBUG_MASK (1<<4) | ||
| 28 | |||
| 29 | /* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */ | ||
| 30 | #define DEFAULT_DEBUG_MASK 0 | ||
| 31 | |||
| 32 | /* | ||
| 33 | * (Un)Define these *DEBUG to compile out/in the pr_debug calls. | ||
| 34 | * All undef: text size ~ 0x3030; all def: ~ 0x4404. | ||
| 35 | */ | ||
| 36 | #define IDEBUG | ||
| 37 | #define MDEBUG | ||
| 38 | #define RDEBUG | ||
| 39 | #define CDEBUG | ||
| 40 | #define DDEBUG | ||
| 41 | |||
| 42 | #define MSK_DEBUG(mask, ...) do { \ | ||
| 43 | if (unlikely(qtaguid_debug_mask & (mask))) \ | ||
| 44 | pr_debug(__VA_ARGS__); \ | ||
| 45 | } while (0) | ||
| 46 | #ifdef IDEBUG | ||
| 47 | #define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__) | ||
| 48 | #else | ||
| 49 | #define IF_DEBUG(...) no_printk(__VA_ARGS__) | ||
| 50 | #endif | ||
| 51 | #ifdef MDEBUG | ||
| 52 | #define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__) | ||
| 53 | #else | ||
| 54 | #define MT_DEBUG(...) no_printk(__VA_ARGS__) | ||
| 55 | #endif | ||
| 56 | #ifdef RDEBUG | ||
| 57 | #define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__) | ||
| 58 | #else | ||
| 59 | #define RB_DEBUG(...) no_printk(__VA_ARGS__) | ||
| 60 | #endif | ||
| 61 | #ifdef CDEBUG | ||
| 62 | #define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__) | ||
| 63 | #else | ||
| 64 | #define CT_DEBUG(...) no_printk(__VA_ARGS__) | ||
| 65 | #endif | ||
| 66 | #ifdef DDEBUG | ||
| 67 | #define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__) | ||
| 68 | #else | ||
| 69 | #define DR_DEBUG(...) no_printk(__VA_ARGS__) | ||
| 70 | #endif | ||
| 71 | |||
| 72 | extern uint qtaguid_debug_mask; | ||
| 73 | |||
| 74 | /*---------------------------------------------------------------------------*/ | ||
| 75 | /* | ||
| 76 | * Tags: | ||
| 77 | * | ||
| 78 | * They represent what the data usage counters will be tracked against. | ||
| 79 | * By default a tag is just based on the UID. | ||
| 80 | * The UID is used as the base for policing, and can not be ignored. | ||
| 81 | * So a tag will always at least represent a UID (uid_tag). | ||
| 82 | * | ||
| 83 | * A tag can be augmented with an "accounting tag" which is associated | ||
| 84 | * with a UID. | ||
| 85 | * User space can set the acct_tag portion of the tag which is then used | ||
| 86 | * with sockets: all data belonging to that socket will be counted against the | ||
| 87 | * tag. The policing is then based on the tag's uid_tag portion, | ||
| 88 | * and stats are collected for the acct_tag portion separately. | ||
| 89 | * | ||
| 90 | * There could be | ||
| 91 | * a: {acct_tag=1, uid_tag=10003} | ||
| 92 | * b: {acct_tag=2, uid_tag=10003} | ||
| 93 | * c: {acct_tag=3, uid_tag=10003} | ||
| 94 | * d: {acct_tag=0, uid_tag=10003} | ||
| 95 | * a, b, and c represent tags associated with specific sockets. | ||
| 96 | * d is for the totals for that uid, including all untagged traffic. | ||
| 97 | * Typically d is used with policing/quota rules. | ||
| 98 | * | ||
| 99 | * We want tag_t big enough to distinguish uid_t and acct_tag. | ||
| 100 | * It might become a struct if needed. | ||
| 101 | * Nothing should be using it as an int. | ||
| 102 | */ | ||
| 103 | typedef uint64_t tag_t; /* Only used via accessors */ | ||
| 104 | |||
| 105 | #define TAG_UID_MASK 0xFFFFFFFFULL | ||
| 106 | #define TAG_ACCT_MASK (~0xFFFFFFFFULL) | ||
| 107 | |||
| 108 | static inline int tag_compare(tag_t t1, tag_t t2) | ||
| 109 | { | ||
| 110 | return t1 < t2 ? -1 : t1 == t2 ? 0 : 1; | ||
| 111 | } | ||
| 112 | |||
| 113 | static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid) | ||
| 114 | { | ||
| 115 | return acct_tag | uid; | ||
| 116 | } | ||
| 117 | static inline tag_t make_tag_from_uid(uid_t uid) | ||
| 118 | { | ||
| 119 | return uid; | ||
| 120 | } | ||
| 121 | static inline uid_t get_uid_from_tag(tag_t tag) | ||
| 122 | { | ||
| 123 | return tag & TAG_UID_MASK; | ||
| 124 | } | ||
| 125 | static inline tag_t get_utag_from_tag(tag_t tag) | ||
| 126 | { | ||
| 127 | return tag & TAG_UID_MASK; | ||
| 128 | } | ||
| 129 | static inline tag_t get_atag_from_tag(tag_t tag) | ||
| 130 | { | ||
| 131 | return tag & TAG_ACCT_MASK; | ||
| 132 | } | ||
| 133 | |||
| 134 | static inline bool valid_atag(tag_t tag) | ||
| 135 | { | ||
| 136 | return !(tag & TAG_UID_MASK); | ||
| 137 | } | ||
| 138 | static inline tag_t make_atag_from_value(uint32_t value) | ||
| 139 | { | ||
| 140 | return (uint64_t)value << 32; | ||
| 141 | } | ||
| 142 | /*---------------------------------------------------------------------------*/ | ||
| 143 | |||
| 144 | /* | ||
| 145 | * Maximum number of socket tags that a UID is allowed to have active. | ||
| 146 | * Multiple processes belonging to the same UID contribute towards this limit. | ||
| 147 | * Special UIDs that can impersonate a UID also contribute (e.g. download | ||
| 148 | * manager, ...) | ||
| 149 | */ | ||
| 150 | #define DEFAULT_MAX_SOCK_TAGS 1024 | ||
| 151 | |||
| 152 | /* | ||
| 153 | * For now we only track 2 sets of counters. | ||
| 154 | * The default set is 0. | ||
| 155 | * Userspace can activate another set for a given uid being tracked. | ||
| 156 | */ | ||
| 157 | #define IFS_MAX_COUNTER_SETS 2 | ||
| 158 | |||
| 159 | enum ifs_tx_rx { | ||
| 160 | IFS_TX, | ||
| 161 | IFS_RX, | ||
| 162 | IFS_MAX_DIRECTIONS | ||
| 163 | }; | ||
| 164 | |||
| 165 | /* For now, TCP, UDP, the rest */ | ||
| 166 | enum ifs_proto { | ||
| 167 | IFS_TCP, | ||
| 168 | IFS_UDP, | ||
| 169 | IFS_PROTO_OTHER, | ||
| 170 | IFS_MAX_PROTOS | ||
| 171 | }; | ||
| 172 | |||
| 173 | struct byte_packet_counters { | ||
| 174 | uint64_t bytes; | ||
| 175 | uint64_t packets; | ||
| 176 | }; | ||
| 177 | |||
| 178 | struct data_counters { | ||
| 179 | struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; | ||
| 180 | }; | ||
| 181 | |||
| 182 | /* Generic X based nodes used as a base for rb_tree ops */ | ||
| 183 | struct tag_node { | ||
| 184 | struct rb_node node; | ||
| 185 | tag_t tag; | ||
| 186 | }; | ||
| 187 | |||
| 188 | struct tag_stat { | ||
| 189 | struct tag_node tn; | ||
| 190 | struct data_counters counters; | ||
| 191 | /* | ||
| 192 | * If this tag is acct_tag based, we need to count against the | ||
| 193 | * matching parent uid_tag. | ||
| 194 | */ | ||
| 195 | struct data_counters *parent_counters; | ||
| 196 | }; | ||
| 197 | |||
| 198 | struct iface_stat { | ||
| 199 | struct list_head list; /* in iface_stat_list */ | ||
| 200 | char *ifname; | ||
| 201 | bool active; | ||
| 202 | /* net_dev is only valid for active iface_stat */ | ||
| 203 | struct net_device *net_dev; | ||
| 204 | |||
| 205 | struct byte_packet_counters totals[IFS_MAX_DIRECTIONS]; | ||
| 206 | /* | ||
| 207 | * We keep the last_known, because some devices reset their counters | ||
| 208 | * just before NETDEV_UP, while some will reset just before | ||
| 209 | * NETDEV_REGISTER (which is more normal). | ||
| 210 | * So now, if the device didn't do a NETDEV_UNREGISTER and we see | ||
| 211 | * its current dev stats smaller that what was previously known, we | ||
| 212 | * assume an UNREGISTER and just use the last_known. | ||
| 213 | */ | ||
| 214 | struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS]; | ||
| 215 | /* last_known is usable when last_known_valid is true */ | ||
| 216 | bool last_known_valid; | ||
| 217 | |||
| 218 | struct proc_dir_entry *proc_ptr; | ||
| 219 | |||
| 220 | struct rb_root tag_stat_tree; | ||
| 221 | spinlock_t tag_stat_list_lock; | ||
| 222 | }; | ||
| 223 | |||
| 224 | /* This is needed to create proc_dir_entries from atomic context. */ | ||
| 225 | struct iface_stat_work { | ||
| 226 | struct work_struct iface_work; | ||
| 227 | struct iface_stat *iface_entry; | ||
| 228 | }; | ||
| 229 | |||
| 230 | /* | ||
| 231 | * Track tag that this socket is transferring data for, and not necessarily | ||
| 232 | * the uid that owns the socket. | ||
| 233 | * This is the tag against which tag_stat.counters will be billed. | ||
| 234 | * These structs need to be looked up by sock and pid. | ||
| 235 | */ | ||
| 236 | struct sock_tag { | ||
| 237 | struct rb_node sock_node; | ||
| 238 | struct sock *sk; /* Only used as a number, never dereferenced */ | ||
| 239 | /* The socket is needed for sockfd_put() */ | ||
| 240 | struct socket *socket; | ||
| 241 | /* Used to associate with a given pid */ | ||
| 242 | struct list_head list; /* in proc_qtu_data.sock_tag_list */ | ||
| 243 | pid_t pid; | ||
| 244 | |||
| 245 | tag_t tag; | ||
| 246 | }; | ||
| 247 | |||
| 248 | struct qtaguid_event_counts { | ||
| 249 | /* Various successful events */ | ||
| 250 | atomic64_t sockets_tagged; | ||
| 251 | atomic64_t sockets_untagged; | ||
| 252 | atomic64_t counter_set_changes; | ||
| 253 | atomic64_t delete_cmds; | ||
| 254 | atomic64_t iface_events; /* Number of NETDEV_* events handled */ | ||
| 255 | |||
| 256 | atomic64_t match_calls; /* Number of times iptables called mt */ | ||
| 257 | /* | ||
| 258 | * match_found_sk_*: numbers related to the netfilter matching | ||
| 259 | * function finding a sock for the sk_buff. | ||
| 260 | * Total skbs processed is sum(match_found*). | ||
| 261 | */ | ||
| 262 | atomic64_t match_found_sk; /* An sk was already in the sk_buff. */ | ||
| 263 | /* The connection tracker had or didn't have the sk. */ | ||
| 264 | atomic64_t match_found_sk_in_ct; | ||
| 265 | atomic64_t match_found_no_sk_in_ct; | ||
| 266 | /* | ||
| 267 | * No sk could be found. No apparent owner. Could happen with | ||
| 268 | * unsolicited traffic. | ||
| 269 | */ | ||
| 270 | atomic64_t match_no_sk; | ||
| 271 | /* | ||
| 272 | * The file ptr in the sk_socket wasn't there. | ||
| 273 | * This might happen for traffic while the socket is being closed. | ||
| 274 | */ | ||
| 275 | atomic64_t match_no_sk_file; | ||
| 276 | }; | ||
| 277 | |||
| 278 | /* Track the set active_set for the given tag. */ | ||
| 279 | struct tag_counter_set { | ||
| 280 | struct tag_node tn; | ||
| 281 | int active_set; | ||
| 282 | }; | ||
| 283 | |||
| 284 | /*----------------------------------------------*/ | ||
| 285 | /* | ||
| 286 | * The qtu uid data is used to track resources that are created directly or | ||
| 287 | * indirectly by processes (uid tracked). | ||
| 288 | * It is shared by the processes with the same uid. | ||
| 289 | * Some of the resource will be counted to prevent further rogue allocations, | ||
| 290 | * some will need freeing once the owner process (uid) exits. | ||
| 291 | */ | ||
| 292 | struct uid_tag_data { | ||
| 293 | struct rb_node node; | ||
| 294 | uid_t uid; | ||
| 295 | |||
| 296 | /* | ||
| 297 | * For the uid, how many accounting tags have been set. | ||
| 298 | */ | ||
| 299 | int num_active_tags; | ||
| 300 | /* Track the number of proc_qtu_data that reference it */ | ||
| 301 | int num_pqd; | ||
| 302 | struct rb_root tag_ref_tree; | ||
| 303 | /* No tag_node_tree_lock; use uid_tag_data_tree_lock */ | ||
| 304 | }; | ||
| 305 | |||
| 306 | struct tag_ref { | ||
| 307 | struct tag_node tn; | ||
| 308 | |||
| 309 | /* | ||
| 310 | * This tracks the number of active sockets that have a tag on them | ||
| 311 | * which matches this tag_ref.tn.tag. | ||
| 312 | * A tag ref can live on after the sockets are untagged. | ||
| 313 | * A tag ref can only be removed during a tag delete command. | ||
| 314 | */ | ||
| 315 | int num_sock_tags; | ||
| 316 | }; | ||
| 317 | |||
| 318 | struct proc_qtu_data { | ||
| 319 | struct rb_node node; | ||
| 320 | pid_t pid; | ||
| 321 | |||
| 322 | struct uid_tag_data *parent_tag_data; | ||
| 323 | |||
| 324 | /* Tracks the sock_tags that need freeing upon this proc's death */ | ||
| 325 | struct list_head sock_tag_list; | ||
| 326 | /* No spinlock_t sock_tag_list_lock; use the global one. */ | ||
| 327 | }; | ||
| 328 | |||
| 329 | /*----------------------------------------------*/ | ||
| 330 | #endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */ | ||
diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c new file mode 100644 index 00000000000..39176785c91 --- /dev/null +++ b/net/netfilter/xt_qtaguid_print.c | |||
| @@ -0,0 +1,556 @@ | |||
| 1 | /* | ||
| 2 | * Pretty printing Support for iptables xt_qtaguid module. | ||
| 3 | * | ||
| 4 | * (C) 2011 Google, Inc | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License version 2 as | ||
| 8 | * published by the Free Software Foundation. | ||
| 9 | */ | ||
| 10 | |||
| 11 | /* | ||
| 12 | * Most of the functions in this file just waste time if DEBUG is not defined. | ||
| 13 | * The matching xt_qtaguid_print.h will static inline empty funcs if the needed | ||
| 14 | * debug flags ore not defined. | ||
| 15 | * Those funcs that fail to allocate memory will panic as there is no need to | ||
| 16 | * hobble allong just pretending to do the requested work. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #define DEBUG | ||
| 20 | |||
| 21 | #include <linux/fs.h> | ||
| 22 | #include <linux/gfp.h> | ||
| 23 | #include <linux/net.h> | ||
| 24 | #include <linux/rbtree.h> | ||
| 25 | #include <linux/slab.h> | ||
| 26 | #include <linux/spinlock_types.h> | ||
| 27 | |||
| 28 | |||
| 29 | #include "xt_qtaguid_internal.h" | ||
| 30 | #include "xt_qtaguid_print.h" | ||
| 31 | |||
| 32 | #ifdef DDEBUG | ||
| 33 | |||
| 34 | static void _bug_on_err_or_null(void *ptr) | ||
| 35 | { | ||
| 36 | if (IS_ERR_OR_NULL(ptr)) { | ||
| 37 | pr_err("qtaguid: kmalloc failed\n"); | ||
| 38 | BUG(); | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | char *pp_tag_t(tag_t *tag) | ||
| 43 | { | ||
| 44 | char *res; | ||
| 45 | |||
| 46 | if (!tag) | ||
| 47 | res = kasprintf(GFP_ATOMIC, "tag_t@null{}"); | ||
| 48 | else | ||
| 49 | res = kasprintf(GFP_ATOMIC, | ||
| 50 | "tag_t@%p{tag=0x%llx, uid=%u}", | ||
| 51 | tag, *tag, get_uid_from_tag(*tag)); | ||
| 52 | _bug_on_err_or_null(res); | ||
| 53 | return res; | ||
| 54 | } | ||
| 55 | |||
| 56 | char *pp_data_counters(struct data_counters *dc, bool showValues) | ||
| 57 | { | ||
| 58 | char *res; | ||
| 59 | |||
| 60 | if (!dc) | ||
| 61 | res = kasprintf(GFP_ATOMIC, "data_counters@null{}"); | ||
| 62 | else if (showValues) | ||
| 63 | res = kasprintf( | ||
| 64 | GFP_ATOMIC, "data_counters@%p{" | ||
| 65 | "set0{" | ||
| 66 | "rx{" | ||
| 67 | "tcp{b=%llu, p=%llu}, " | ||
| 68 | "udp{b=%llu, p=%llu}," | ||
| 69 | "other{b=%llu, p=%llu}}, " | ||
| 70 | "tx{" | ||
| 71 | "tcp{b=%llu, p=%llu}, " | ||
| 72 | "udp{b=%llu, p=%llu}," | ||
| 73 | "other{b=%llu, p=%llu}}}, " | ||
| 74 | "set1{" | ||
| 75 | "rx{" | ||
| 76 | "tcp{b=%llu, p=%llu}, " | ||
| 77 | "udp{b=%llu, p=%llu}," | ||
| 78 | "other{b=%llu, p=%llu}}, " | ||
| 79 | "tx{" | ||
| 80 | "tcp{b=%llu, p=%llu}, " | ||
| 81 | "udp{b=%llu, p=%llu}," | ||
| 82 | "other{b=%llu, p=%llu}}}}", | ||
| 83 | dc, | ||
| 84 | dc->bpc[0][IFS_RX][IFS_TCP].bytes, | ||
| 85 | dc->bpc[0][IFS_RX][IFS_TCP].packets, | ||
| 86 | dc->bpc[0][IFS_RX][IFS_UDP].bytes, | ||
| 87 | dc->bpc[0][IFS_RX][IFS_UDP].packets, | ||
| 88 | dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes, | ||
| 89 | dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets, | ||
| 90 | dc->bpc[0][IFS_TX][IFS_TCP].bytes, | ||
| 91 | dc->bpc[0][IFS_TX][IFS_TCP].packets, | ||
| 92 | dc->bpc[0][IFS_TX][IFS_UDP].bytes, | ||
| 93 | dc->bpc[0][IFS_TX][IFS_UDP].packets, | ||
| 94 | dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes, | ||
| 95 | dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets, | ||
| 96 | dc->bpc[1][IFS_RX][IFS_TCP].bytes, | ||
| 97 | dc->bpc[1][IFS_RX][IFS_TCP].packets, | ||
| 98 | dc->bpc[1][IFS_RX][IFS_UDP].bytes, | ||
| 99 | dc->bpc[1][IFS_RX][IFS_UDP].packets, | ||
| 100 | dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes, | ||
| 101 | dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets, | ||
| 102 | dc->bpc[1][IFS_TX][IFS_TCP].bytes, | ||
| 103 | dc->bpc[1][IFS_TX][IFS_TCP].packets, | ||
| 104 | dc->bpc[1][IFS_TX][IFS_UDP].bytes, | ||
| 105 | dc->bpc[1][IFS_TX][IFS_UDP].packets, | ||
| 106 | dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes, | ||
| 107 | dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets); | ||
| 108 | else | ||
| 109 | res = kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc); | ||
| 110 | _bug_on_err_or_null(res); | ||
| 111 | return res; | ||
| 112 | } | ||
| 113 | |||
| 114 | char *pp_tag_node(struct tag_node *tn) | ||
| 115 | { | ||
| 116 | char *tag_str; | ||
| 117 | char *res; | ||
| 118 | |||
| 119 | if (!tn) { | ||
| 120 | res = kasprintf(GFP_ATOMIC, "tag_node@null{}"); | ||
| 121 | _bug_on_err_or_null(res); | ||
| 122 | return res; | ||
| 123 | } | ||
| 124 | tag_str = pp_tag_t(&tn->tag); | ||
| 125 | res = kasprintf(GFP_ATOMIC, | ||
| 126 | "tag_node@%p{tag=%s}", | ||
| 127 | tn, tag_str); | ||
| 128 | _bug_on_err_or_null(res); | ||
| 129 | kfree(tag_str); | ||
| 130 | return res; | ||
| 131 | } | ||
| 132 | |||
| 133 | char *pp_tag_ref(struct tag_ref *tr) | ||
| 134 | { | ||
| 135 | char *tn_str; | ||
| 136 | char *res; | ||
| 137 | |||
| 138 | if (!tr) { | ||
| 139 | res = kasprintf(GFP_ATOMIC, "tag_ref@null{}"); | ||
| 140 | _bug_on_err_or_null(res); | ||
| 141 | return res; | ||
| 142 | } | ||
| 143 | tn_str = pp_tag_node(&tr->tn); | ||
| 144 | res = kasprintf(GFP_ATOMIC, | ||
| 145 | "tag_ref@%p{%s, num_sock_tags=%d}", | ||
| 146 | tr, tn_str, tr->num_sock_tags); | ||
| 147 | _bug_on_err_or_null(res); | ||
| 148 | kfree(tn_str); | ||
| 149 | return res; | ||
| 150 | } | ||
| 151 | |||
| 152 | char *pp_tag_stat(struct tag_stat *ts) | ||
| 153 | { | ||
| 154 | char *tn_str; | ||
| 155 | char *counters_str; | ||
| 156 | char *parent_counters_str; | ||
| 157 | char *res; | ||
| 158 | |||
| 159 | if (!ts) { | ||
| 160 | res = kasprintf(GFP_ATOMIC, "tag_stat@null{}"); | ||
| 161 | _bug_on_err_or_null(res); | ||
| 162 | return res; | ||
| 163 | } | ||
| 164 | tn_str = pp_tag_node(&ts->tn); | ||
| 165 | counters_str = pp_data_counters(&ts->counters, true); | ||
| 166 | parent_counters_str = pp_data_counters(ts->parent_counters, false); | ||
| 167 | res = kasprintf(GFP_ATOMIC, | ||
| 168 | "tag_stat@%p{%s, counters=%s, parent_counters=%s}", | ||
| 169 | ts, tn_str, counters_str, parent_counters_str); | ||
| 170 | _bug_on_err_or_null(res); | ||
| 171 | kfree(tn_str); | ||
| 172 | kfree(counters_str); | ||
| 173 | kfree(parent_counters_str); | ||
| 174 | return res; | ||
| 175 | } | ||
| 176 | |||
| 177 | char *pp_iface_stat(struct iface_stat *is) | ||
| 178 | { | ||
| 179 | char *res; | ||
| 180 | if (!is) | ||
| 181 | res = kasprintf(GFP_ATOMIC, "iface_stat@null{}"); | ||
| 182 | else | ||
| 183 | res = kasprintf(GFP_ATOMIC, "iface_stat@%p{" | ||
| 184 | "list=list_head{...}, " | ||
| 185 | "ifname=%s, " | ||
| 186 | "total={rx={bytes=%llu, " | ||
| 187 | "packets=%llu}, " | ||
| 188 | "tx={bytes=%llu, " | ||
| 189 | "packets=%llu}}, " | ||
| 190 | "last_known_valid=%d, " | ||
| 191 | "last_known={rx={bytes=%llu, " | ||
| 192 | "packets=%llu}, " | ||
| 193 | "tx={bytes=%llu, " | ||
| 194 | "packets=%llu}}, " | ||
| 195 | "active=%d, " | ||
| 196 | "net_dev=%p, " | ||
| 197 | "proc_ptr=%p, " | ||
| 198 | "tag_stat_tree=rb_root{...}}", | ||
| 199 | is, | ||
| 200 | is->ifname, | ||
| 201 | is->totals[IFS_RX].bytes, | ||
| 202 | is->totals[IFS_RX].packets, | ||
| 203 | is->totals[IFS_TX].bytes, | ||
| 204 | is->totals[IFS_TX].packets, | ||
| 205 | is->last_known_valid, | ||
| 206 | is->last_known[IFS_RX].bytes, | ||
| 207 | is->last_known[IFS_RX].packets, | ||
| 208 | is->last_known[IFS_TX].bytes, | ||
| 209 | is->last_known[IFS_TX].packets, | ||
| 210 | is->active, | ||
| 211 | is->net_dev, | ||
| 212 | is->proc_ptr); | ||
| 213 | _bug_on_err_or_null(res); | ||
| 214 | return res; | ||
| 215 | } | ||
| 216 | |||
| 217 | char *pp_sock_tag(struct sock_tag *st) | ||
| 218 | { | ||
| 219 | char *tag_str; | ||
| 220 | char *res; | ||
| 221 | |||
| 222 | if (!st) { | ||
| 223 | res = kasprintf(GFP_ATOMIC, "sock_tag@null{}"); | ||
| 224 | _bug_on_err_or_null(res); | ||
| 225 | return res; | ||
| 226 | } | ||
| 227 | tag_str = pp_tag_t(&st->tag); | ||
| 228 | res = kasprintf(GFP_ATOMIC, "sock_tag@%p{" | ||
| 229 | "sock_node=rb_node{...}, " | ||
| 230 | "sk=%p socket=%p (f_count=%lu), list=list_head{...}, " | ||
| 231 | "pid=%u, tag=%s}", | ||
| 232 | st, st->sk, st->socket, atomic_long_read( | ||
| 233 | &st->socket->file->f_count), | ||
| 234 | st->pid, tag_str); | ||
| 235 | _bug_on_err_or_null(res); | ||
| 236 | kfree(tag_str); | ||
| 237 | return res; | ||
| 238 | } | ||
| 239 | |||
| 240 | char *pp_uid_tag_data(struct uid_tag_data *utd) | ||
| 241 | { | ||
| 242 | char *res; | ||
| 243 | |||
| 244 | if (!utd) | ||
| 245 | res = kasprintf(GFP_ATOMIC, "uid_tag_data@null{}"); | ||
| 246 | else | ||
| 247 | res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{" | ||
| 248 | "uid=%u, num_active_acct_tags=%d, " | ||
| 249 | "num_pqd=%d, " | ||
| 250 | "tag_node_tree=rb_root{...}, " | ||
| 251 | "proc_qtu_data_tree=rb_root{...}}", | ||
| 252 | utd, utd->uid, | ||
| 253 | utd->num_active_tags, utd->num_pqd); | ||
| 254 | _bug_on_err_or_null(res); | ||
| 255 | return res; | ||
| 256 | } | ||
| 257 | |||
| 258 | char *pp_proc_qtu_data(struct proc_qtu_data *pqd) | ||
| 259 | { | ||
| 260 | char *parent_tag_data_str; | ||
| 261 | char *res; | ||
| 262 | |||
| 263 | if (!pqd) { | ||
| 264 | res = kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}"); | ||
| 265 | _bug_on_err_or_null(res); | ||
| 266 | return res; | ||
| 267 | } | ||
| 268 | parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data); | ||
| 269 | res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{" | ||
| 270 | "node=rb_node{...}, pid=%u, " | ||
| 271 | "parent_tag_data=%s, " | ||
| 272 | "sock_tag_list=list_head{...}}", | ||
| 273 | pqd, pqd->pid, parent_tag_data_str | ||
| 274 | ); | ||
| 275 | _bug_on_err_or_null(res); | ||
| 276 | kfree(parent_tag_data_str); | ||
| 277 | return res; | ||
| 278 | } | ||
| 279 | |||
| 280 | /*------------------------------------------*/ | ||
| 281 | void prdebug_sock_tag_tree(int indent_level, | ||
| 282 | struct rb_root *sock_tag_tree) | ||
| 283 | { | ||
| 284 | struct rb_node *node; | ||
| 285 | struct sock_tag *sock_tag_entry; | ||
| 286 | char *str; | ||
| 287 | |||
| 288 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
| 289 | return; | ||
| 290 | |||
| 291 | if (RB_EMPTY_ROOT(sock_tag_tree)) { | ||
| 292 | str = "sock_tag_tree=rb_root{}"; | ||
| 293 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 294 | return; | ||
| 295 | } | ||
| 296 | |||
| 297 | str = "sock_tag_tree=rb_root{"; | ||
| 298 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 299 | indent_level++; | ||
| 300 | for (node = rb_first(sock_tag_tree); | ||
| 301 | node; | ||
| 302 | node = rb_next(node)) { | ||
| 303 | sock_tag_entry = rb_entry(node, struct sock_tag, sock_node); | ||
| 304 | str = pp_sock_tag(sock_tag_entry); | ||
| 305 | pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); | ||
| 306 | kfree(str); | ||
| 307 | } | ||
| 308 | indent_level--; | ||
| 309 | str = "}"; | ||
| 310 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 311 | } | ||
| 312 | |||
| 313 | void prdebug_sock_tag_list(int indent_level, | ||
| 314 | struct list_head *sock_tag_list) | ||
| 315 | { | ||
| 316 | struct sock_tag *sock_tag_entry; | ||
| 317 | char *str; | ||
| 318 | |||
| 319 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
| 320 | return; | ||
| 321 | |||
| 322 | if (list_empty(sock_tag_list)) { | ||
| 323 | str = "sock_tag_list=list_head{}"; | ||
| 324 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 325 | return; | ||
| 326 | } | ||
| 327 | |||
| 328 | str = "sock_tag_list=list_head{"; | ||
| 329 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 330 | indent_level++; | ||
| 331 | list_for_each_entry(sock_tag_entry, sock_tag_list, list) { | ||
| 332 | str = pp_sock_tag(sock_tag_entry); | ||
| 333 | pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); | ||
| 334 | kfree(str); | ||
| 335 | } | ||
| 336 | indent_level--; | ||
| 337 | str = "}"; | ||
| 338 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 339 | } | ||
| 340 | |||
| 341 | void prdebug_proc_qtu_data_tree(int indent_level, | ||
| 342 | struct rb_root *proc_qtu_data_tree) | ||
| 343 | { | ||
| 344 | char *str; | ||
| 345 | struct rb_node *node; | ||
| 346 | struct proc_qtu_data *proc_qtu_data_entry; | ||
| 347 | |||
| 348 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
| 349 | return; | ||
| 350 | |||
| 351 | if (RB_EMPTY_ROOT(proc_qtu_data_tree)) { | ||
| 352 | str = "proc_qtu_data_tree=rb_root{}"; | ||
| 353 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 354 | return; | ||
| 355 | } | ||
| 356 | |||
| 357 | str = "proc_qtu_data_tree=rb_root{"; | ||
| 358 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 359 | indent_level++; | ||
| 360 | for (node = rb_first(proc_qtu_data_tree); | ||
| 361 | node; | ||
| 362 | node = rb_next(node)) { | ||
| 363 | proc_qtu_data_entry = rb_entry(node, | ||
| 364 | struct proc_qtu_data, | ||
| 365 | node); | ||
| 366 | str = pp_proc_qtu_data(proc_qtu_data_entry); | ||
| 367 | pr_debug("%*d: %s,\n", indent_level*2, indent_level, | ||
| 368 | str); | ||
| 369 | kfree(str); | ||
| 370 | indent_level++; | ||
| 371 | prdebug_sock_tag_list(indent_level, | ||
| 372 | &proc_qtu_data_entry->sock_tag_list); | ||
| 373 | indent_level--; | ||
| 374 | |||
| 375 | } | ||
| 376 | indent_level--; | ||
| 377 | str = "}"; | ||
| 378 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 379 | } | ||
| 380 | |||
| 381 | void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) | ||
| 382 | { | ||
| 383 | char *str; | ||
| 384 | struct rb_node *node; | ||
| 385 | struct tag_ref *tag_ref_entry; | ||
| 386 | |||
| 387 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
| 388 | return; | ||
| 389 | |||
| 390 | if (RB_EMPTY_ROOT(tag_ref_tree)) { | ||
| 391 | str = "tag_ref_tree{}"; | ||
| 392 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 393 | return; | ||
| 394 | } | ||
| 395 | |||
| 396 | str = "tag_ref_tree{"; | ||
| 397 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 398 | indent_level++; | ||
| 399 | for (node = rb_first(tag_ref_tree); | ||
| 400 | node; | ||
| 401 | node = rb_next(node)) { | ||
| 402 | tag_ref_entry = rb_entry(node, | ||
| 403 | struct tag_ref, | ||
| 404 | tn.node); | ||
| 405 | str = pp_tag_ref(tag_ref_entry); | ||
| 406 | pr_debug("%*d: %s,\n", indent_level*2, indent_level, | ||
| 407 | str); | ||
| 408 | kfree(str); | ||
| 409 | } | ||
| 410 | indent_level--; | ||
| 411 | str = "}"; | ||
| 412 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 413 | } | ||
| 414 | |||
| 415 | void prdebug_uid_tag_data_tree(int indent_level, | ||
| 416 | struct rb_root *uid_tag_data_tree) | ||
| 417 | { | ||
| 418 | char *str; | ||
| 419 | struct rb_node *node; | ||
| 420 | struct uid_tag_data *uid_tag_data_entry; | ||
| 421 | |||
| 422 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
| 423 | return; | ||
| 424 | |||
| 425 | if (RB_EMPTY_ROOT(uid_tag_data_tree)) { | ||
| 426 | str = "uid_tag_data_tree=rb_root{}"; | ||
| 427 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 428 | return; | ||
| 429 | } | ||
| 430 | |||
| 431 | str = "uid_tag_data_tree=rb_root{"; | ||
| 432 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 433 | indent_level++; | ||
| 434 | for (node = rb_first(uid_tag_data_tree); | ||
| 435 | node; | ||
| 436 | node = rb_next(node)) { | ||
| 437 | uid_tag_data_entry = rb_entry(node, struct uid_tag_data, | ||
| 438 | node); | ||
| 439 | str = pp_uid_tag_data(uid_tag_data_entry); | ||
| 440 | pr_debug("%*d: %s,\n", indent_level*2, indent_level, str); | ||
| 441 | kfree(str); | ||
| 442 | if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) { | ||
| 443 | indent_level++; | ||
| 444 | prdebug_tag_ref_tree(indent_level, | ||
| 445 | &uid_tag_data_entry->tag_ref_tree); | ||
| 446 | indent_level--; | ||
| 447 | } | ||
| 448 | } | ||
| 449 | indent_level--; | ||
| 450 | str = "}"; | ||
| 451 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 452 | } | ||
| 453 | |||
| 454 | void prdebug_tag_stat_tree(int indent_level, | ||
| 455 | struct rb_root *tag_stat_tree) | ||
| 456 | { | ||
| 457 | char *str; | ||
| 458 | struct rb_node *node; | ||
| 459 | struct tag_stat *ts_entry; | ||
| 460 | |||
| 461 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
| 462 | return; | ||
| 463 | |||
| 464 | if (RB_EMPTY_ROOT(tag_stat_tree)) { | ||
| 465 | str = "tag_stat_tree{}"; | ||
| 466 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 467 | return; | ||
| 468 | } | ||
| 469 | |||
| 470 | str = "tag_stat_tree{"; | ||
| 471 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 472 | indent_level++; | ||
| 473 | for (node = rb_first(tag_stat_tree); | ||
| 474 | node; | ||
| 475 | node = rb_next(node)) { | ||
| 476 | ts_entry = rb_entry(node, struct tag_stat, tn.node); | ||
| 477 | str = pp_tag_stat(ts_entry); | ||
| 478 | pr_debug("%*d: %s\n", indent_level*2, indent_level, | ||
| 479 | str); | ||
| 480 | kfree(str); | ||
| 481 | } | ||
| 482 | indent_level--; | ||
| 483 | str = "}"; | ||
| 484 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 485 | } | ||
| 486 | |||
| 487 | void prdebug_iface_stat_list(int indent_level, | ||
| 488 | struct list_head *iface_stat_list) | ||
| 489 | { | ||
| 490 | char *str; | ||
| 491 | struct iface_stat *iface_entry; | ||
| 492 | |||
| 493 | if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK)) | ||
| 494 | return; | ||
| 495 | |||
| 496 | if (list_empty(iface_stat_list)) { | ||
| 497 | str = "iface_stat_list=list_head{}"; | ||
| 498 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 499 | return; | ||
| 500 | } | ||
| 501 | |||
| 502 | str = "iface_stat_list=list_head{"; | ||
| 503 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 504 | indent_level++; | ||
| 505 | list_for_each_entry(iface_entry, iface_stat_list, list) { | ||
| 506 | str = pp_iface_stat(iface_entry); | ||
| 507 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 508 | kfree(str); | ||
| 509 | |||
| 510 | spin_lock_bh(&iface_entry->tag_stat_list_lock); | ||
| 511 | if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) { | ||
| 512 | indent_level++; | ||
| 513 | prdebug_tag_stat_tree(indent_level, | ||
| 514 | &iface_entry->tag_stat_tree); | ||
| 515 | indent_level--; | ||
| 516 | } | ||
| 517 | spin_unlock_bh(&iface_entry->tag_stat_list_lock); | ||
| 518 | } | ||
| 519 | indent_level--; | ||
| 520 | str = "}"; | ||
| 521 | pr_debug("%*d: %s\n", indent_level*2, indent_level, str); | ||
| 522 | } | ||
| 523 | |||
| 524 | #endif /* ifdef DDEBUG */ | ||
| 525 | /*------------------------------------------*/ | ||
| 526 | static const char * const netdev_event_strings[] = { | ||
| 527 | "netdev_unknown", | ||
| 528 | "NETDEV_UP", | ||
| 529 | "NETDEV_DOWN", | ||
| 530 | "NETDEV_REBOOT", | ||
| 531 | "NETDEV_CHANGE", | ||
| 532 | "NETDEV_REGISTER", | ||
| 533 | "NETDEV_UNREGISTER", | ||
| 534 | "NETDEV_CHANGEMTU", | ||
| 535 | "NETDEV_CHANGEADDR", | ||
| 536 | "NETDEV_GOING_DOWN", | ||
| 537 | "NETDEV_CHANGENAME", | ||
| 538 | "NETDEV_FEAT_CHANGE", | ||
| 539 | "NETDEV_BONDING_FAILOVER", | ||
| 540 | "NETDEV_PRE_UP", | ||
| 541 | "NETDEV_PRE_TYPE_CHANGE", | ||
| 542 | "NETDEV_POST_TYPE_CHANGE", | ||
| 543 | "NETDEV_POST_INIT", | ||
| 544 | "NETDEV_UNREGISTER_BATCH", | ||
| 545 | "NETDEV_RELEASE", | ||
| 546 | "NETDEV_NOTIFY_PEERS", | ||
| 547 | "NETDEV_JOIN", | ||
| 548 | }; | ||
| 549 | |||
| 550 | const char *netdev_evt_str(int netdev_event) | ||
| 551 | { | ||
| 552 | if (netdev_event < 0 | ||
| 553 | || netdev_event >= ARRAY_SIZE(netdev_event_strings)) | ||
| 554 | return "bad event num"; | ||
| 555 | return netdev_event_strings[netdev_event]; | ||
| 556 | } | ||
diff --git a/net/netfilter/xt_qtaguid_print.h b/net/netfilter/xt_qtaguid_print.h new file mode 100644 index 00000000000..b63871a0be5 --- /dev/null +++ b/net/netfilter/xt_qtaguid_print.h | |||
| @@ -0,0 +1,120 @@ | |||
| 1 | /* | ||
| 2 | * Pretty printing Support for iptables xt_qtaguid module. | ||
| 3 | * | ||
| 4 | * (C) 2011 Google, Inc | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License version 2 as | ||
| 8 | * published by the Free Software Foundation. | ||
| 9 | */ | ||
| 10 | #ifndef __XT_QTAGUID_PRINT_H__ | ||
| 11 | #define __XT_QTAGUID_PRINT_H__ | ||
| 12 | |||
| 13 | #include "xt_qtaguid_internal.h" | ||
| 14 | |||
| 15 | #ifdef DDEBUG | ||
| 16 | |||
| 17 | char *pp_tag_t(tag_t *tag); | ||
| 18 | char *pp_data_counters(struct data_counters *dc, bool showValues); | ||
| 19 | char *pp_tag_node(struct tag_node *tn); | ||
| 20 | char *pp_tag_ref(struct tag_ref *tr); | ||
| 21 | char *pp_tag_stat(struct tag_stat *ts); | ||
| 22 | char *pp_iface_stat(struct iface_stat *is); | ||
| 23 | char *pp_sock_tag(struct sock_tag *st); | ||
| 24 | char *pp_uid_tag_data(struct uid_tag_data *qtd); | ||
| 25 | char *pp_proc_qtu_data(struct proc_qtu_data *pqd); | ||
| 26 | |||
| 27 | /*------------------------------------------*/ | ||
| 28 | void prdebug_sock_tag_list(int indent_level, | ||
| 29 | struct list_head *sock_tag_list); | ||
| 30 | void prdebug_sock_tag_tree(int indent_level, | ||
| 31 | struct rb_root *sock_tag_tree); | ||
| 32 | void prdebug_proc_qtu_data_tree(int indent_level, | ||
| 33 | struct rb_root *proc_qtu_data_tree); | ||
| 34 | void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree); | ||
| 35 | void prdebug_uid_tag_data_tree(int indent_level, | ||
| 36 | struct rb_root *uid_tag_data_tree); | ||
| 37 | void prdebug_tag_stat_tree(int indent_level, | ||
| 38 | struct rb_root *tag_stat_tree); | ||
| 39 | void prdebug_iface_stat_list(int indent_level, | ||
| 40 | struct list_head *iface_stat_list); | ||
| 41 | |||
| 42 | #else | ||
| 43 | |||
| 44 | /*------------------------------------------*/ | ||
| 45 | static inline char *pp_tag_t(tag_t *tag) | ||
| 46 | { | ||
| 47 | return NULL; | ||
| 48 | } | ||
| 49 | static inline char *pp_data_counters(struct data_counters *dc, bool showValues) | ||
| 50 | { | ||
| 51 | return NULL; | ||
| 52 | } | ||
| 53 | static inline char *pp_tag_node(struct tag_node *tn) | ||
| 54 | { | ||
| 55 | return NULL; | ||
| 56 | } | ||
| 57 | static inline char *pp_tag_ref(struct tag_ref *tr) | ||
| 58 | { | ||
| 59 | return NULL; | ||
| 60 | } | ||
| 61 | static inline char *pp_tag_stat(struct tag_stat *ts) | ||
| 62 | { | ||
| 63 | return NULL; | ||
| 64 | } | ||
| 65 | static inline char *pp_iface_stat(struct iface_stat *is) | ||
| 66 | { | ||
| 67 | return NULL; | ||
| 68 | } | ||
| 69 | static inline char *pp_sock_tag(struct sock_tag *st) | ||
| 70 | { | ||
| 71 | return NULL; | ||
| 72 | } | ||
| 73 | static inline char *pp_uid_tag_data(struct uid_tag_data *qtd) | ||
| 74 | { | ||
| 75 | return NULL; | ||
| 76 | } | ||
| 77 | static inline char *pp_proc_qtu_data(struct proc_qtu_data *pqd) | ||
| 78 | { | ||
| 79 | return NULL; | ||
| 80 | } | ||
| 81 | |||
| 82 | /*------------------------------------------*/ | ||
| 83 | static inline | ||
| 84 | void prdebug_sock_tag_list(int indent_level, | ||
| 85 | struct list_head *sock_tag_list) | ||
| 86 | { | ||
| 87 | } | ||
| 88 | static inline | ||
| 89 | void prdebug_sock_tag_tree(int indent_level, | ||
| 90 | struct rb_root *sock_tag_tree) | ||
| 91 | { | ||
| 92 | } | ||
| 93 | static inline | ||
| 94 | void prdebug_proc_qtu_data_tree(int indent_level, | ||
| 95 | struct rb_root *proc_qtu_data_tree) | ||
| 96 | { | ||
| 97 | } | ||
| 98 | static inline | ||
| 99 | void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree) | ||
| 100 | { | ||
| 101 | } | ||
| 102 | static inline | ||
| 103 | void prdebug_uid_tag_data_tree(int indent_level, | ||
| 104 | struct rb_root *uid_tag_data_tree) | ||
| 105 | { | ||
| 106 | } | ||
| 107 | static inline | ||
| 108 | void prdebug_tag_stat_tree(int indent_level, | ||
| 109 | struct rb_root *tag_stat_tree) | ||
| 110 | { | ||
| 111 | } | ||
| 112 | static inline | ||
| 113 | void prdebug_iface_stat_list(int indent_level, | ||
| 114 | struct list_head *iface_stat_list) | ||
| 115 | { | ||
| 116 | } | ||
| 117 | #endif | ||
| 118 | /*------------------------------------------*/ | ||
| 119 | const char *netdev_evt_str(int netdev_event); | ||
| 120 | #endif /* ifndef __XT_QTAGUID_PRINT_H__ */ | ||
diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c new file mode 100644 index 00000000000..3c72bea2dd6 --- /dev/null +++ b/net/netfilter/xt_quota2.c | |||
| @@ -0,0 +1,381 @@ | |||
| 1 | /* | ||
| 2 | * xt_quota2 - enhanced xt_quota that can count upwards and in packets | ||
| 3 | * as a minimal accounting match. | ||
| 4 | * by Jan Engelhardt <jengelh@medozas.de>, 2008 | ||
| 5 | * | ||
| 6 | * Originally based on xt_quota.c: | ||
| 7 | * netfilter module to enforce network quotas | ||
| 8 | * Sam Johnston <samj@samj.net> | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or modify | ||
| 11 | * it under the terms of the GNU General Public License; either | ||
| 12 | * version 2 of the License, as published by the Free Software Foundation. | ||
| 13 | */ | ||
| 14 | #include <linux/list.h> | ||
| 15 | #include <linux/proc_fs.h> | ||
| 16 | #include <linux/skbuff.h> | ||
| 17 | #include <linux/spinlock.h> | ||
| 18 | #include <asm/atomic.h> | ||
| 19 | |||
| 20 | #include <linux/netfilter/x_tables.h> | ||
| 21 | #include <linux/netfilter/xt_quota2.h> | ||
| 22 | #ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG | ||
| 23 | #include <linux/netfilter_ipv4/ipt_ULOG.h> | ||
| 24 | #endif | ||
| 25 | |||
| 26 | /** | ||
| 27 | * @lock: lock to protect quota writers from each other | ||
| 28 | */ | ||
| 29 | struct xt_quota_counter { | ||
| 30 | u_int64_t quota; | ||
| 31 | spinlock_t lock; | ||
| 32 | struct list_head list; | ||
| 33 | atomic_t ref; | ||
| 34 | char name[sizeof(((struct xt_quota_mtinfo2 *)NULL)->name)]; | ||
| 35 | struct proc_dir_entry *procfs_entry; | ||
| 36 | }; | ||
| 37 | |||
| 38 | #ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG | ||
| 39 | /* Harald's favorite number +1 :D From ipt_ULOG.C */ | ||
| 40 | static int qlog_nl_event = 112; | ||
| 41 | module_param_named(event_num, qlog_nl_event, uint, S_IRUGO | S_IWUSR); | ||
| 42 | MODULE_PARM_DESC(event_num, | ||
| 43 | "Event number for NETLINK_NFLOG message. 0 disables log." | ||
| 44 | "111 is what ipt_ULOG uses."); | ||
| 45 | static struct sock *nflognl; | ||
| 46 | #endif | ||
| 47 | |||
| 48 | static LIST_HEAD(counter_list); | ||
| 49 | static DEFINE_SPINLOCK(counter_list_lock); | ||
| 50 | |||
| 51 | static struct proc_dir_entry *proc_xt_quota; | ||
| 52 | static unsigned int quota_list_perms = S_IRUGO | S_IWUSR; | ||
| 53 | static unsigned int quota_list_uid = 0; | ||
| 54 | static unsigned int quota_list_gid = 0; | ||
| 55 | module_param_named(perms, quota_list_perms, uint, S_IRUGO | S_IWUSR); | ||
| 56 | module_param_named(uid, quota_list_uid, uint, S_IRUGO | S_IWUSR); | ||
| 57 | module_param_named(gid, quota_list_gid, uint, S_IRUGO | S_IWUSR); | ||
| 58 | |||
| 59 | |||
| 60 | #ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG | ||
| 61 | static void quota2_log(unsigned int hooknum, | ||
| 62 | const struct sk_buff *skb, | ||
| 63 | const struct net_device *in, | ||
| 64 | const struct net_device *out, | ||
| 65 | const char *prefix) | ||
| 66 | { | ||
| 67 | ulog_packet_msg_t *pm; | ||
| 68 | struct sk_buff *log_skb; | ||
| 69 | size_t size; | ||
| 70 | struct nlmsghdr *nlh; | ||
| 71 | |||
| 72 | if (!qlog_nl_event) | ||
| 73 | return; | ||
| 74 | |||
| 75 | size = NLMSG_SPACE(sizeof(*pm)); | ||
| 76 | size = max(size, (size_t)NLMSG_GOODSIZE); | ||
| 77 | log_skb = alloc_skb(size, GFP_ATOMIC); | ||
| 78 | if (!log_skb) { | ||
| 79 | pr_err("xt_quota2: cannot alloc skb for logging\n"); | ||
| 80 | return; | ||
| 81 | } | ||
| 82 | |||
| 83 | /* NLMSG_PUT() uses "goto nlmsg_failure" */ | ||
| 84 | nlh = NLMSG_PUT(log_skb, /*pid*/0, /*seq*/0, qlog_nl_event, | ||
| 85 | sizeof(*pm)); | ||
| 86 | pm = NLMSG_DATA(nlh); | ||
| 87 | if (skb->tstamp.tv64 == 0) | ||
| 88 | __net_timestamp((struct sk_buff *)skb); | ||
| 89 | pm->data_len = 0; | ||
| 90 | pm->hook = hooknum; | ||
| 91 | if (prefix != NULL) | ||
| 92 | strlcpy(pm->prefix, prefix, sizeof(pm->prefix)); | ||
| 93 | else | ||
| 94 | *(pm->prefix) = '\0'; | ||
| 95 | if (in) | ||
| 96 | strlcpy(pm->indev_name, in->name, sizeof(pm->indev_name)); | ||
| 97 | else | ||
| 98 | pm->indev_name[0] = '\0'; | ||
| 99 | |||
| 100 | if (out) | ||
| 101 | strlcpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); | ||
| 102 | else | ||
| 103 | pm->outdev_name[0] = '\0'; | ||
| 104 | |||
| 105 | NETLINK_CB(log_skb).dst_group = 1; | ||
| 106 | pr_debug("throwing 1 packets to netlink group 1\n"); | ||
| 107 | netlink_broadcast(nflognl, log_skb, 0, 1, GFP_ATOMIC); | ||
| 108 | |||
| 109 | nlmsg_failure: /* Used within NLMSG_PUT() */ | ||
| 110 | pr_debug("xt_quota2: error during NLMSG_PUT\n"); | ||
| 111 | } | ||
| 112 | #else | ||
| 113 | static void quota2_log(unsigned int hooknum, | ||
| 114 | const struct sk_buff *skb, | ||
| 115 | const struct net_device *in, | ||
| 116 | const struct net_device *out, | ||
| 117 | const char *prefix) | ||
| 118 | { | ||
| 119 | } | ||
| 120 | #endif /* if+else CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG */ | ||
| 121 | |||
| 122 | static int quota_proc_read(char *page, char **start, off_t offset, | ||
| 123 | int count, int *eof, void *data) | ||
| 124 | { | ||
| 125 | struct xt_quota_counter *e = data; | ||
| 126 | int ret; | ||
| 127 | |||
| 128 | spin_lock_bh(&e->lock); | ||
| 129 | ret = snprintf(page, PAGE_SIZE, "%llu\n", e->quota); | ||
| 130 | spin_unlock_bh(&e->lock); | ||
| 131 | return ret; | ||
| 132 | } | ||
| 133 | |||
| 134 | static int quota_proc_write(struct file *file, const char __user *input, | ||
| 135 | unsigned long size, void *data) | ||
| 136 | { | ||
| 137 | struct xt_quota_counter *e = data; | ||
| 138 | char buf[sizeof("18446744073709551616")]; | ||
| 139 | |||
| 140 | if (size > sizeof(buf)) | ||
| 141 | size = sizeof(buf); | ||
| 142 | if (copy_from_user(buf, input, size) != 0) | ||
| 143 | return -EFAULT; | ||
| 144 | buf[sizeof(buf)-1] = '\0'; | ||
| 145 | |||
| 146 | spin_lock_bh(&e->lock); | ||
| 147 | e->quota = simple_strtoull(buf, NULL, 0); | ||
| 148 | spin_unlock_bh(&e->lock); | ||
| 149 | return size; | ||
| 150 | } | ||
| 151 | |||
| 152 | static struct xt_quota_counter * | ||
| 153 | q2_new_counter(const struct xt_quota_mtinfo2 *q, bool anon) | ||
| 154 | { | ||
| 155 | struct xt_quota_counter *e; | ||
| 156 | unsigned int size; | ||
| 157 | |||
| 158 | /* Do not need all the procfs things for anonymous counters. */ | ||
| 159 | size = anon ? offsetof(typeof(*e), list) : sizeof(*e); | ||
| 160 | e = kmalloc(size, GFP_KERNEL); | ||
| 161 | if (e == NULL) | ||
| 162 | return NULL; | ||
| 163 | |||
| 164 | e->quota = q->quota; | ||
| 165 | spin_lock_init(&e->lock); | ||
| 166 | if (!anon) { | ||
| 167 | INIT_LIST_HEAD(&e->list); | ||
| 168 | atomic_set(&e->ref, 1); | ||
| 169 | strlcpy(e->name, q->name, sizeof(e->name)); | ||
| 170 | } | ||
| 171 | return e; | ||
| 172 | } | ||
| 173 | |||
| 174 | /** | ||
| 175 | * q2_get_counter - get ref to counter or create new | ||
| 176 | * @name: name of counter | ||
| 177 | */ | ||
| 178 | static struct xt_quota_counter * | ||
| 179 | q2_get_counter(const struct xt_quota_mtinfo2 *q) | ||
| 180 | { | ||
| 181 | struct proc_dir_entry *p; | ||
| 182 | struct xt_quota_counter *e = NULL; | ||
| 183 | struct xt_quota_counter *new_e; | ||
| 184 | |||
| 185 | if (*q->name == '\0') | ||
| 186 | return q2_new_counter(q, true); | ||
| 187 | |||
| 188 | /* No need to hold a lock while getting a new counter */ | ||
| 189 | new_e = q2_new_counter(q, false); | ||
| 190 | if (new_e == NULL) | ||
| 191 | goto out; | ||
| 192 | |||
| 193 | spin_lock_bh(&counter_list_lock); | ||
| 194 | list_for_each_entry(e, &counter_list, list) | ||
| 195 | if (strcmp(e->name, q->name) == 0) { | ||
| 196 | atomic_inc(&e->ref); | ||
| 197 | spin_unlock_bh(&counter_list_lock); | ||
| 198 | kfree(new_e); | ||
| 199 | pr_debug("xt_quota2: old counter name=%s", e->name); | ||
| 200 | return e; | ||
| 201 | } | ||
| 202 | e = new_e; | ||
| 203 | pr_debug("xt_quota2: new_counter name=%s", e->name); | ||
| 204 | list_add_tail(&e->list, &counter_list); | ||
| 205 | /* The entry having a refcount of 1 is not directly destructible. | ||
| 206 | * This func has not yet returned the new entry, thus iptables | ||
| 207 | * has not references for destroying this entry. | ||
| 208 | * For another rule to try to destroy it, it would 1st need for this | ||
| 209 | * func* to be re-invoked, acquire a new ref for the same named quota. | ||
| 210 | * Nobody will access the e->procfs_entry either. | ||
| 211 | * So release the lock. */ | ||
| 212 | spin_unlock_bh(&counter_list_lock); | ||
| 213 | |||
| 214 | /* create_proc_entry() is not spin_lock happy */ | ||
| 215 | p = e->procfs_entry = create_proc_entry(e->name, quota_list_perms, | ||
| 216 | proc_xt_quota); | ||
| 217 | |||
| 218 | if (IS_ERR_OR_NULL(p)) { | ||
| 219 | spin_lock_bh(&counter_list_lock); | ||
| 220 | list_del(&e->list); | ||
| 221 | spin_unlock_bh(&counter_list_lock); | ||
| 222 | goto out; | ||
| 223 | } | ||
| 224 | p->data = e; | ||
| 225 | p->read_proc = quota_proc_read; | ||
| 226 | p->write_proc = quota_proc_write; | ||
| 227 | p->uid = quota_list_uid; | ||
| 228 | p->gid = quota_list_gid; | ||
| 229 | return e; | ||
| 230 | |||
| 231 | out: | ||
| 232 | kfree(e); | ||
| 233 | return NULL; | ||
| 234 | } | ||
| 235 | |||
| 236 | static int quota_mt2_check(const struct xt_mtchk_param *par) | ||
| 237 | { | ||
| 238 | struct xt_quota_mtinfo2 *q = par->matchinfo; | ||
| 239 | |||
| 240 | pr_debug("xt_quota2: check() flags=0x%04x", q->flags); | ||
| 241 | |||
| 242 | if (q->flags & ~XT_QUOTA_MASK) | ||
| 243 | return -EINVAL; | ||
| 244 | |||
| 245 | q->name[sizeof(q->name)-1] = '\0'; | ||
| 246 | if (*q->name == '.' || strchr(q->name, '/') != NULL) { | ||
| 247 | printk(KERN_ERR "xt_quota.3: illegal name\n"); | ||
| 248 | return -EINVAL; | ||
| 249 | } | ||
| 250 | |||
| 251 | q->master = q2_get_counter(q); | ||
| 252 | if (q->master == NULL) { | ||
| 253 | printk(KERN_ERR "xt_quota.3: memory alloc failure\n"); | ||
| 254 | return -ENOMEM; | ||
| 255 | } | ||
| 256 | |||
| 257 | return 0; | ||
| 258 | } | ||
| 259 | |||
| 260 | static void quota_mt2_destroy(const struct xt_mtdtor_param *par) | ||
| 261 | { | ||
| 262 | struct xt_quota_mtinfo2 *q = par->matchinfo; | ||
| 263 | struct xt_quota_counter *e = q->master; | ||
| 264 | |||
| 265 | if (*q->name == '\0') { | ||
| 266 | kfree(e); | ||
| 267 | return; | ||
| 268 | } | ||
| 269 | |||
| 270 | spin_lock_bh(&counter_list_lock); | ||
| 271 | if (!atomic_dec_and_test(&e->ref)) { | ||
| 272 | spin_unlock_bh(&counter_list_lock); | ||
| 273 | return; | ||
| 274 | } | ||
| 275 | |||
| 276 | list_del(&e->list); | ||
| 277 | remove_proc_entry(e->name, proc_xt_quota); | ||
| 278 | spin_unlock_bh(&counter_list_lock); | ||
| 279 | kfree(e); | ||
| 280 | } | ||
| 281 | |||
| 282 | static bool | ||
| 283 | quota_mt2(const struct sk_buff *skb, struct xt_action_param *par) | ||
| 284 | { | ||
| 285 | struct xt_quota_mtinfo2 *q = (void *)par->matchinfo; | ||
| 286 | struct xt_quota_counter *e = q->master; | ||
| 287 | bool ret = q->flags & XT_QUOTA_INVERT; | ||
| 288 | |||
| 289 | spin_lock_bh(&e->lock); | ||
| 290 | if (q->flags & XT_QUOTA_GROW) { | ||
| 291 | /* | ||
| 292 | * While no_change is pointless in "grow" mode, we will | ||
| 293 | * implement it here simply to have a consistent behavior. | ||
| 294 | */ | ||
| 295 | if (!(q->flags & XT_QUOTA_NO_CHANGE)) { | ||
| 296 | e->quota += (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; | ||
| 297 | } | ||
| 298 | ret = true; | ||
| 299 | } else { | ||
| 300 | if (e->quota >= skb->len) { | ||
| 301 | if (!(q->flags & XT_QUOTA_NO_CHANGE)) | ||
| 302 | e->quota -= (q->flags & XT_QUOTA_PACKET) ? 1 : skb->len; | ||
| 303 | ret = !ret; | ||
| 304 | } else { | ||
| 305 | /* We are transitioning, log that fact. */ | ||
| 306 | if (e->quota) { | ||
| 307 | quota2_log(par->hooknum, | ||
| 308 | skb, | ||
| 309 | par->in, | ||
| 310 | par->out, | ||
| 311 | q->name); | ||
| 312 | } | ||
| 313 | /* we do not allow even small packets from now on */ | ||
| 314 | e->quota = 0; | ||
| 315 | } | ||
| 316 | } | ||
| 317 | spin_unlock_bh(&e->lock); | ||
| 318 | return ret; | ||
| 319 | } | ||
| 320 | |||
| 321 | static struct xt_match quota_mt2_reg[] __read_mostly = { | ||
| 322 | { | ||
| 323 | .name = "quota2", | ||
| 324 | .revision = 3, | ||
| 325 | .family = NFPROTO_IPV4, | ||
| 326 | .checkentry = quota_mt2_check, | ||
| 327 | .match = quota_mt2, | ||
| 328 | .destroy = quota_mt2_destroy, | ||
| 329 | .matchsize = sizeof(struct xt_quota_mtinfo2), | ||
| 330 | .me = THIS_MODULE, | ||
| 331 | }, | ||
| 332 | { | ||
| 333 | .name = "quota2", | ||
| 334 | .revision = 3, | ||
| 335 | .family = NFPROTO_IPV6, | ||
| 336 | .checkentry = quota_mt2_check, | ||
| 337 | .match = quota_mt2, | ||
| 338 | .destroy = quota_mt2_destroy, | ||
| 339 | .matchsize = sizeof(struct xt_quota_mtinfo2), | ||
| 340 | .me = THIS_MODULE, | ||
| 341 | }, | ||
| 342 | }; | ||
| 343 | |||
| 344 | static int __init quota_mt2_init(void) | ||
| 345 | { | ||
| 346 | int ret; | ||
| 347 | pr_debug("xt_quota2: init()"); | ||
| 348 | |||
| 349 | #ifdef CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG | ||
| 350 | nflognl = netlink_kernel_create(&init_net, | ||
| 351 | NETLINK_NFLOG, 1, NULL, | ||
| 352 | NULL, THIS_MODULE); | ||
| 353 | if (!nflognl) | ||
| 354 | return -ENOMEM; | ||
| 355 | #endif | ||
| 356 | |||
| 357 | proc_xt_quota = proc_mkdir("xt_quota", init_net.proc_net); | ||
| 358 | if (proc_xt_quota == NULL) | ||
| 359 | return -EACCES; | ||
| 360 | |||
| 361 | ret = xt_register_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); | ||
| 362 | if (ret < 0) | ||
| 363 | remove_proc_entry("xt_quota", init_net.proc_net); | ||
| 364 | pr_debug("xt_quota2: init() %d", ret); | ||
| 365 | return ret; | ||
| 366 | } | ||
| 367 | |||
| 368 | static void __exit quota_mt2_exit(void) | ||
| 369 | { | ||
| 370 | xt_unregister_matches(quota_mt2_reg, ARRAY_SIZE(quota_mt2_reg)); | ||
| 371 | remove_proc_entry("xt_quota", init_net.proc_net); | ||
| 372 | } | ||
| 373 | |||
| 374 | module_init(quota_mt2_init); | ||
| 375 | module_exit(quota_mt2_exit); | ||
| 376 | MODULE_DESCRIPTION("Xtables: countdown quota match; up counter"); | ||
| 377 | MODULE_AUTHOR("Sam Johnston <samj@samj.net>"); | ||
| 378 | MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); | ||
| 379 | MODULE_LICENSE("GPL"); | ||
| 380 | MODULE_ALIAS("ipt_quota2"); | ||
| 381 | MODULE_ALIAS("ip6t_quota2"); | ||
