aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp/ackvec.h
blob: 0adf4b56c34cfea4df0c9e57eed67e6682f05e9f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#ifndef _ACKVEC_H
#define _ACKVEC_H
/*
 *  net/dccp/ackvec.h
 *
 *  An implementation of the DCCP protocol
 *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
 *
 *	This program is free software; you can redistribute it and/or modify it
 *	under the terms of the GNU General Public License version 2 as
 *	published by the Free Software Foundation.
 */

#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/time.h>
#include <linux/types.h>

/* Read about the ECN nonce to see why it is 253 */
#define DCCP_MAX_ACKVEC_LEN 253

#define DCCP_ACKVEC_STATE_RECEIVED	0
#define DCCP_ACKVEC_STATE_ECN_MARKED	(1 << 6)
#define DCCP_ACKVEC_STATE_NOT_RECEIVED	(3 << 6)

#define DCCP_ACKVEC_STATE_MASK		0xC0 /* 11000000 */
#define DCCP_ACKVEC_LEN_MASK		0x3F /* 00111111 */

/** struct dccp_ackvec - ack vector
 *
 * This data structure is the one defined in the DCCP draft
 * Appendix A.
 *
 * @dccpav_buf_head - circular buffer head
 * @dccpav_buf_tail - circular buffer tail
 * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the
 * 		       buffer (i.e. %dccpav_buf_head)
 * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked
 * 		       by the buffer with State 0
 *
 * Additionally, the HC-Receiver must keep some information about the
 * Ack Vectors it has recently sent. For each packet sent carrying an
 * Ack Vector, it remembers four variables:
 *
 * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement.
 * @dccpav_records - list of dccp_ackvec_record
 * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
 *
 * @dccpav_time		- the time in usecs
 * @dccpav_buf - circular buffer of acknowledgeable packets
 */
struct dccp_ackvec {
	u64		dccpav_buf_ackno;
	struct list_head dccpav_records;
	struct timeval	dccpav_time;
	u8		dccpav_buf_head;
	u8		dccpav_buf_tail;
	u8		dccpav_ack_ptr;
	u8		dccpav_sent_len;
	u8		dccpav_vec_len;
	u8		dccpav_buf_nonce;
	u8		dccpav_ack_nonce;
	u8		dccpav_buf[DCCP_MAX_ACKVEC_LEN];
};

/** struct dccp_ackvec_record - ack vector record
 *
 * ACK vector record as defined in Appendix A of spec.
 *
 * The list is sorted by dccpavr_ack_seqno
 *
 * @dccpavr_node - node in dccpav_records
 * @dccpavr_ack_seqno - sequence number of the packet this record was sent on
 * @dccpavr_ack_ackno - sequence number being acknowledged
 * @dccpavr_ack_ptr - pointer into dccpav_buf where this record starts
 * @dccpavr_ack_nonce - dccpav_ack_nonce at the time this record was sent
 * @dccpavr_sent_len - lenght of the record in dccpav_buf
 */
struct dccp_ackvec_record {
	struct list_head dccpavr_node;
	u64		 dccpavr_ack_seqno;
	u64		 dccpavr_ack_ackno;
	u8		 dccpavr_ack_ptr;
	u8		 dccpavr_ack_nonce;
	u8		 dccpavr_sent_len;
};

struct sock;
struct sk_buff;

#ifdef CONFIG_IP_DCCP_ACKVEC
extern int dccp_ackvec_init(void);
extern void dccp_ackvec_exit(void);

extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
extern void dccp_ackvec_free(struct dccp_ackvec *av);

extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
			   const u64 ackno, const u8 state);

extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
					struct sock *sk, const u64 ackno);
extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
			     const u8 opt, const u8 *value, const u8 len);

extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb);

static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
{
	return av->dccpav_sent_len != av->dccpav_vec_len;
}
#else /* CONFIG_IP_DCCP_ACKVEC */
static inline int dccp_ackvec_init(void)
{
	return 0;
}

static inline void dccp_ackvec_exit(void)
{
}

static inline struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
{
	return NULL;
}

static inline void dccp_ackvec_free(struct dccp_ackvec *av)
{
}

static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
				  const u64 ackno, const u8 state)
{
	return -1;
}

static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av,
					       struct sock *sk, const u64 ackno)
{
}

static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
				    const u8 opt, const u8 *value, const u8 len)
{
	return -1;
}

static inline int dccp_insert_option_ackvec(const struct sock *sk,
					    const struct sk_buff *skb)
{
	return -1;
}

static inline int dccp_ackvec_pending(const struct dccp_ackvec *av)
{
	return 0;
}
#endif /* CONFIG_IP_DCCP_ACKVEC */
#endif /* _ACKVEC_H */
_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; static struct nla_policy cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] __read_mostly = { [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, }; struct listener { struct list_head list; pid_t pid; char valid; }; struct listener_list { struct rw_semaphore sem; struct list_head list; }; static DEFINE_PER_CPU(struct listener_list, listener_array); enum actions { REGISTER, DEREGISTER, CPU_DONT_CARE }; static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, size_t size) { struct sk_buff *skb; void *reply; /* * If new attributes are added, please revisit this allocation */ skb = genlmsg_new(size, GFP_KERNEL); if (!skb) return -ENOMEM; if (!info) { int seq = get_cpu_var(taskstats_seqnum)++; put_cpu_var(taskstats_seqnum); reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); } else reply = genlmsg_put_reply(skb, info, &family, 0, cmd); if (reply == NULL) { nlmsg_free(skb); return -EINVAL; } *skbp = skb; return 0; } /* * Send taskstats data in @skb to listener with nl_pid @pid */ static int send_reply(struct sk_buff *skb, pid_t pid) { struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); void *reply = genlmsg_data(genlhdr); int rc; rc = genlmsg_end(skb, reply); if (rc < 0) { nlmsg_free(skb); return rc; } return genlmsg_unicast(skb, pid); } /* * Send taskstats data in @skb to listeners registered for @cpu's exit data */ static void send_cpu_listeners(struct sk_buff *skb, struct listener_list *listeners) { struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); struct listener *s, *tmp; struct sk_buff *skb_next, *skb_cur = skb; void *reply = genlmsg_data(genlhdr); int rc, delcount = 0; rc = genlmsg_end(skb, reply); if (rc < 0) { nlmsg_free(skb); return; } rc = 0; down_read(&listeners->sem); list_for_each_entry(s, &listeners->list, list) { skb_next = NULL; if (!list_is_last(&s->list, &listeners->list)) { skb_next = skb_clone(skb_cur, GFP_KERNEL); if (!skb_next) break; } rc = genlmsg_unicast(skb_cur, s->pid); if (rc == -ECONNREFUSED) { s->valid = 0; delcount++; } skb_cur = skb_next; } up_read(&listeners->sem); if (skb_cur) nlmsg_free(skb_cur); if (!delcount) return; /* Delete invalidated entries */ down_write(&listeners->sem); list_for_each_entry_safe(s, tmp, &listeners->list, list) { if (!s->valid) { list_del(&s->list); kfree(s); } } up_write(&listeners->sem); } static int fill_pid(pid_t pid, struct task_struct *tsk, struct taskstats *stats) { int rc = 0; if (!tsk) { rcu_read_lock(); tsk = find_task_by_vpid(pid); if (tsk) get_task_struct(tsk); rcu_read_unlock(); if (!tsk) return -ESRCH; } else get_task_struct(tsk); memset(stats, 0, sizeof(*stats)); /* * Each accounting subsystem adds calls to its functions to * fill in relevant parts of struct taskstsats as follows * * per-task-foo(stats, tsk); */ delayacct_add_tsk(stats, tsk); /* fill in basic acct fields */ stats->version = TASKSTATS_VERSION; stats->nvcsw = tsk->nvcsw; stats->nivcsw = tsk->nivcsw; bacct_add_tsk(stats, tsk); /* fill in extended acct fields */ xacct_add_tsk(stats, tsk); /* Define err: label here if needed */ put_task_struct(tsk); return rc; } static int fill_tgid(pid_t tgid, struct task_struct *first, struct taskstats *stats) { struct task_struct *tsk; unsigned long flags; int rc = -ESRCH; /* * Add additional stats from live tasks except zombie thread group * leaders who are already counted with the dead tasks */ rcu_read_lock(); if (!first) first = find_task_by_vpid(tgid); if (!first || !lock_task_sighand(first, &flags)) goto out; if (first->signal->stats) memcpy(stats, first->signal->stats, sizeof(*stats)); else memset(stats, 0, sizeof(*stats)); tsk = first; do { if (tsk->exit_state) continue; /* * Accounting subsystem can call its functions here to * fill in relevant parts of struct taskstsats as follows * * per-task-foo(stats, tsk); */ delayacct_add_tsk(stats, tsk); stats->nvcsw += tsk->nvcsw; stats->nivcsw += tsk->nivcsw; } while_each_thread(first, tsk); unlock_task_sighand(first, &flags); rc = 0; out: rcu_read_unlock(); stats->version = TASKSTATS_VERSION; /* * Accounting subsystems can also add calls here to modify * fields of taskstats. */ return rc; } static void fill_tgid_exit(struct task_struct *tsk) { unsigned long flags; spin_lock_irqsave(&tsk->sighand->siglock, flags); if (!tsk->signal->stats) goto ret; /* * Each accounting subsystem calls its functions here to * accumalate its per-task stats for tsk, into the per-tgid structure * * per-task-foo(tsk->signal->stats, tsk); */ delayacct_add_tsk(tsk->signal->stats, tsk); ret: spin_unlock_irqrestore(&tsk->sighand->siglock, flags); return; } static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd) { struct listener_list *listeners; struct listener *s, *tmp; unsigned int cpu; cpumask_t mask = *maskp; if (!cpus_subset(mask, cpu_possible_map)) return -EINVAL; if (isadd == REGISTER) { for_each_cpu_mask_nr(cpu, mask) { s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, cpu_to_node(cpu)); if (!s) goto cleanup; s->pid = pid; INIT_LIST_HEAD(&s->list); s->valid = 1; listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); list_add(&s->list, &listeners->list); up_write(&listeners->sem); } return 0; } /* Deregister or cleanup */ cleanup: for_each_cpu_mask_nr(cpu, mask) { listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); list_for_each_entry_safe(s, tmp, &listeners->list, list) { if (s->pid == pid) { list_del(&s->list); kfree(s); break; } } up_write(&listeners->sem); } return 0; } static int parse(struct nlattr *na, cpumask_t *mask) { char *data; int len; int ret; if (na == NULL) return 1; len = nla_len(na); if (len > TASKSTATS_CPUMASK_MAXLEN) return -E2BIG; if (len < 1) return -EINVAL; data = kmalloc(len, GFP_KERNEL); if (!data) return -ENOMEM; nla_strlcpy(data, na, len); ret = cpulist_parse(data, *mask); kfree(data); return ret; } static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) { struct nlattr *na, *ret; int aggr; aggr = (type == TASKSTATS_TYPE_PID) ? TASKSTATS_TYPE_AGGR_PID : TASKSTATS_TYPE_AGGR_TGID; na = nla_nest_start(skb, aggr); if (!na) goto err; if (nla_put(skb, type, sizeof(pid), &pid) < 0) goto err; ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); if (!ret) goto err; nla_nest_end(skb, na); return nla_data(ret); err: return NULL; } static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) { int rc = 0; struct sk_buff *rep_skb; struct cgroupstats *stats; struct nlattr *na; size_t size; u32 fd; struct file *file; int fput_needed; na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; if (!na) return -EINVAL; fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); file = fget_light(fd, &fput_needed); if (!file) return 0; size = nla_total_size(sizeof(struct cgroupstats)); rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) goto err; na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, sizeof(struct cgroupstats)); stats = nla_data(na); memset(stats, 0, sizeof(*stats)); rc = cgroupstats_build(stats, file->f_dentry); if (rc < 0) { nlmsg_free(rep_skb); goto err; } rc = send_reply(rep_skb, info->snd_pid); err: fput_light(file, fput_needed); return rc; } static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) { int rc = 0; struct sk_buff *rep_skb; struct taskstats *stats; size_t size; cpumask_t mask; rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask); if (rc < 0) return rc; if (rc == 0) return add_del_listener(info->snd_pid, &mask, REGISTER); rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask); if (rc < 0) return rc; if (rc == 0) return add_del_listener(info->snd_pid, &mask, DEREGISTER); /* * Size includes space for nested attributes */ size = nla_total_size(sizeof(u32)) + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) return rc; rc = -EINVAL; if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); if (!stats) goto err;