aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorJiri Pirko <jpirko@redhat.com>2011-11-11 17:16:48 -0500
committerDavid S. Miller <davem@davemloft.net>2011-11-13 16:10:10 -0500
commit3d249d4ca7d0ed6629a135ea1ea21c72286c0d80 (patch)
treee89c8b6bbf4f8d0e4a3428304c26305eca76e4b8 /include/linux
parent5d70b88cd41ef0f2ac0caaab4fd492dd686feee6 (diff)
net: introduce ethernet teaming device
This patch introduces new network device called team. It supposes to be very fast, simple, userspace-driven alternative to existing bonding driver. Userspace library called libteam with couple of demo apps is available here: https://github.com/jpirko/libteam Note it's still in its dipers atm. team<->libteam use generic netlink for communication. That and rtnl suppose to be the only way to configure team device, no sysfs etc. Python binding of libteam was recently introduced. Daemon providing arpmon/miimon active-backup functionality will be introduced shortly. All what's necessary is already implemented in kernel team driver. v7->v8: - check ndo_ndo_vlan_rx_[add/kill]_vid functions before calling them. - use dev_kfree_skb_any() instead of dev_kfree_skb() v6->v7: - transmit and receive functions are not checked in hot paths. That also resolves memory leak on transmit when no port is present v5->v6: - changed couple of _rcu calls to non _rcu ones in non-readers v4->v5: - team_change_mtu() uses team->lock while travesing though port list - mac address changes are moved completely to jurisdiction of userspace daemon. This way the daemon can do FOM1, FOM2 and possibly other weird things with mac addresses. Only round-robin mode sets up all ports to bond's address then enslaved. - Extended Kconfig text v3->v4: - remove redundant synchronize_rcu from __team_change_mode() - revert "set and clear of mode_ops happens per pointer, not per byte" - extend comment of function __team_change_mode() v2->v3: - team_change_mtu() uses rcu version of list traversal to unwind - set and clear of mode_ops happens per pointer, not per byte - port hashlist changed to be embedded into team structure - error branch in team_port_enter() does cleanup now - fixed rtln->rtnl v1->v2: - modes are made as modules. Makes team more modular and extendable. - several commenters' nitpicks found on v1 were fixed - several other bugs were fixed. - note I ignored Eric's comment about roundrobin port selector as Eric's way may be easily implemented as another mode (mode "random") in future. Signed-off-by: Jiri Pirko <jpirko@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/if.h1
-rw-r--r--include/linux/if_team.h242
3 files changed, 244 insertions, 0 deletions
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 619b5657af77..0b091b32267d 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -185,6 +185,7 @@ header-y += if_pppol2tp.h
185header-y += if_pppox.h 185header-y += if_pppox.h
186header-y += if_slip.h 186header-y += if_slip.h
187header-y += if_strip.h 187header-y += if_strip.h
188header-y += if_team.h
188header-y += if_tr.h 189header-y += if_tr.h
189header-y += if_tun.h 190header-y += if_tun.h
190header-y += if_tunnel.h 191header-y += if_tunnel.h
diff --git a/include/linux/if.h b/include/linux/if.h
index db20bd4fd16b..06b6ef60c821 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -79,6 +79,7 @@
79#define IFF_TX_SKB_SHARING 0x10000 /* The interface supports sharing 79#define IFF_TX_SKB_SHARING 0x10000 /* The interface supports sharing
80 * skbs on transmit */ 80 * skbs on transmit */
81#define IFF_UNICAST_FLT 0x20000 /* Supports unicast filtering */ 81#define IFF_UNICAST_FLT 0x20000 /* Supports unicast filtering */
82#define IFF_TEAM_PORT 0x40000 /* device used as team port */
82 83
83#define IF_GET_IFACE 0x0001 /* for querying only */ 84#define IF_GET_IFACE 0x0001 /* for querying only */
84#define IF_GET_PROTO 0x0002 85#define IF_GET_PROTO 0x0002
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
new file mode 100644
index 000000000000..14f6388f5460
--- /dev/null
+++ b/include/linux/if_team.h
@@ -0,0 +1,242 @@
1/*
2 * include/linux/if_team.h - Network team device driver header
3 * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 */
10
11#ifndef _LINUX_IF_TEAM_H_
12#define _LINUX_IF_TEAM_H_
13
14#ifdef __KERNEL__
15
16struct team_pcpu_stats {
17 u64 rx_packets;
18 u64 rx_bytes;
19 u64 rx_multicast;
20 u64 tx_packets;
21 u64 tx_bytes;
22 struct u64_stats_sync syncp;
23 u32 rx_dropped;
24 u32 tx_dropped;
25};
26
27struct team;
28
29struct team_port {
30 struct net_device *dev;
31 struct hlist_node hlist; /* node in hash list */
32 struct list_head list; /* node in ordinary list */
33 struct team *team;
34 int index;
35
36 /*
37 * A place for storing original values of the device before it
38 * become a port.
39 */
40 struct {
41 unsigned char dev_addr[MAX_ADDR_LEN];
42 unsigned int mtu;
43 } orig;
44
45 bool linkup;
46 u32 speed;
47 u8 duplex;
48
49 struct rcu_head rcu;
50};
51
52struct team_mode_ops {
53 int (*init)(struct team *team);
54 void (*exit)(struct team *team);
55 rx_handler_result_t (*receive)(struct team *team,
56 struct team_port *port,
57 struct sk_buff *skb);
58 bool (*transmit)(struct team *team, struct sk_buff *skb);
59 int (*port_enter)(struct team *team, struct team_port *port);
60 void (*port_leave)(struct team *team, struct team_port *port);
61 void (*port_change_mac)(struct team *team, struct team_port *port);
62};
63
64enum team_option_type {
65 TEAM_OPTION_TYPE_U32,
66 TEAM_OPTION_TYPE_STRING,
67};
68
69struct team_option {
70 struct list_head list;
71 const char *name;
72 enum team_option_type type;
73 int (*getter)(struct team *team, void *arg);
74 int (*setter)(struct team *team, void *arg);
75};
76
77struct team_mode {
78 struct list_head list;
79 const char *kind;
80 struct module *owner;
81 size_t priv_size;
82 const struct team_mode_ops *ops;
83};
84
85#define TEAM_PORT_HASHBITS 4
86#define TEAM_PORT_HASHENTRIES (1 << TEAM_PORT_HASHBITS)
87
88#define TEAM_MODE_PRIV_LONGS 4
89#define TEAM_MODE_PRIV_SIZE (sizeof(long) * TEAM_MODE_PRIV_LONGS)
90
91struct team {
92 struct net_device *dev; /* associated netdevice */
93 struct team_pcpu_stats __percpu *pcpu_stats;
94
95 spinlock_t lock; /* used for overall locking, e.g. port lists write */
96
97 /*
98 * port lists with port count
99 */
100 int port_count;
101 struct hlist_head port_hlist[TEAM_PORT_HASHENTRIES];
102 struct list_head port_list;
103
104 struct list_head option_list;
105
106 const struct team_mode *mode;
107 struct team_mode_ops ops;
108 long mode_priv[TEAM_MODE_PRIV_LONGS];
109};
110
111static inline struct hlist_head *team_port_index_hash(struct team *team,
112 int port_index)
113{
114 return &team->port_hlist[port_index & (TEAM_PORT_HASHENTRIES - 1)];
115}
116
117static inline struct team_port *team_get_port_by_index(struct team *team,
118 int port_index)
119{
120 struct hlist_node *p;
121 struct team_port *port;
122 struct hlist_head *head = team_port_index_hash(team, port_index);
123
124 hlist_for_each_entry(port, p, head, hlist)
125 if (port->index == port_index)
126 return port;
127 return NULL;
128}
129static inline struct team_port *team_get_port_by_index_rcu(struct team *team,
130 int port_index)
131{
132 struct hlist_node *p;
133 struct team_port *port;
134 struct hlist_head *head = team_port_index_hash(team, port_index);
135
136 hlist_for_each_entry_rcu(port, p, head, hlist)
137 if (port->index == port_index)
138 return port;
139 return NULL;
140}
141
142extern int team_port_set_team_mac(struct team_port *port);
143extern void team_options_register(struct team *team,
144 struct team_option *option,
145 size_t option_count);
146extern void team_options_unregister(struct team *team,
147 struct team_option *option,
148 size_t option_count);
149extern int team_mode_register(struct team_mode *mode);
150extern int team_mode_unregister(struct team_mode *mode);
151
152#endif /* __KERNEL__ */
153
154#define TEAM_STRING_MAX_LEN 32
155
156/**********************************
157 * NETLINK_GENERIC netlink family.
158 **********************************/
159
160enum {
161 TEAM_CMD_NOOP,
162 TEAM_CMD_OPTIONS_SET,
163 TEAM_CMD_OPTIONS_GET,
164 TEAM_CMD_PORT_LIST_GET,
165
166 __TEAM_CMD_MAX,
167 TEAM_CMD_MAX = (__TEAM_CMD_MAX - 1),
168};
169
170enum {
171 TEAM_ATTR_UNSPEC,
172 TEAM_ATTR_TEAM_IFINDEX, /* u32 */
173 TEAM_ATTR_LIST_OPTION, /* nest */
174 TEAM_ATTR_LIST_PORT, /* nest */
175
176 __TEAM_ATTR_MAX,
177 TEAM_ATTR_MAX = __TEAM_ATTR_MAX - 1,
178};
179
180/* Nested layout of get/set msg:
181 *
182 * [TEAM_ATTR_LIST_OPTION]
183 * [TEAM_ATTR_ITEM_OPTION]
184 * [TEAM_ATTR_OPTION_*], ...
185 * [TEAM_ATTR_ITEM_OPTION]
186 * [TEAM_ATTR_OPTION_*], ...
187 * ...
188 * [TEAM_ATTR_LIST_PORT]
189 * [TEAM_ATTR_ITEM_PORT]
190 * [TEAM_ATTR_PORT_*], ...
191 * [TEAM_ATTR_ITEM_PORT]
192 * [TEAM_ATTR_PORT_*], ...
193 * ...
194 */
195
196enum {
197 TEAM_ATTR_ITEM_OPTION_UNSPEC,
198 TEAM_ATTR_ITEM_OPTION, /* nest */
199
200 __TEAM_ATTR_ITEM_OPTION_MAX,
201 TEAM_ATTR_ITEM_OPTION_MAX = __TEAM_ATTR_ITEM_OPTION_MAX - 1,
202};
203
204enum {
205 TEAM_ATTR_OPTION_UNSPEC,
206 TEAM_ATTR_OPTION_NAME, /* string */
207 TEAM_ATTR_OPTION_CHANGED, /* flag */
208 TEAM_ATTR_OPTION_TYPE, /* u8 */
209 TEAM_ATTR_OPTION_DATA, /* dynamic */
210
211 __TEAM_ATTR_OPTION_MAX,
212 TEAM_ATTR_OPTION_MAX = __TEAM_ATTR_OPTION_MAX - 1,
213};
214
215enum {
216 TEAM_ATTR_ITEM_PORT_UNSPEC,
217 TEAM_ATTR_ITEM_PORT, /* nest */
218
219 __TEAM_ATTR_ITEM_PORT_MAX,
220 TEAM_ATTR_ITEM_PORT_MAX = __TEAM_ATTR_ITEM_PORT_MAX - 1,
221};
222
223enum {
224 TEAM_ATTR_PORT_UNSPEC,
225 TEAM_ATTR_PORT_IFINDEX, /* u32 */
226 TEAM_ATTR_PORT_CHANGED, /* flag */
227 TEAM_ATTR_PORT_LINKUP, /* flag */
228 TEAM_ATTR_PORT_SPEED, /* u32 */
229 TEAM_ATTR_PORT_DUPLEX, /* u8 */
230
231 __TEAM_ATTR_PORT_MAX,
232 TEAM_ATTR_PORT_MAX = __TEAM_ATTR_PORT_MAX - 1,
233};
234
235/*
236 * NETLINK_GENERIC related info
237 */
238#define TEAM_GENL_NAME "team"
239#define TEAM_GENL_VERSION 0x1
240#define TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME "change_event"
241
242#endif /* _LINUX_IF_TEAM_H_ */