diff options
Diffstat (limited to 'include/net/sock.h')
-rw-r--r-- | include/net/sock.h | 113 |
1 files changed, 90 insertions, 23 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index e9b1dbab90d..312cb25cbd1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -88,6 +88,7 @@ do { spin_lock_init(&((__sk)->sk_lock.slock)); \ | |||
88 | } while(0) | 88 | } while(0) |
89 | 89 | ||
90 | struct sock; | 90 | struct sock; |
91 | struct proto; | ||
91 | 92 | ||
92 | /** | 93 | /** |
93 | * struct sock_common - minimal network layer representation of sockets | 94 | * struct sock_common - minimal network layer representation of sockets |
@@ -98,10 +99,11 @@ struct sock; | |||
98 | * @skc_node: main hash linkage for various protocol lookup tables | 99 | * @skc_node: main hash linkage for various protocol lookup tables |
99 | * @skc_bind_node: bind hash linkage for various protocol lookup tables | 100 | * @skc_bind_node: bind hash linkage for various protocol lookup tables |
100 | * @skc_refcnt: reference count | 101 | * @skc_refcnt: reference count |
102 | * @skc_prot: protocol handlers inside a network family | ||
101 | * | 103 | * |
102 | * This is the minimal network layer representation of sockets, the header | 104 | * This is the minimal network layer representation of sockets, the header |
103 | * for struct sock and struct tcp_tw_bucket. | 105 | * for struct sock and struct inet_timewait_sock. |
104 | */ | 106 | */ |
105 | struct sock_common { | 107 | struct sock_common { |
106 | unsigned short skc_family; | 108 | unsigned short skc_family; |
107 | volatile unsigned char skc_state; | 109 | volatile unsigned char skc_state; |
@@ -110,11 +112,12 @@ struct sock_common { | |||
110 | struct hlist_node skc_node; | 112 | struct hlist_node skc_node; |
111 | struct hlist_node skc_bind_node; | 113 | struct hlist_node skc_bind_node; |
112 | atomic_t skc_refcnt; | 114 | atomic_t skc_refcnt; |
115 | struct proto *skc_prot; | ||
113 | }; | 116 | }; |
114 | 117 | ||
115 | /** | 118 | /** |
116 | * struct sock - network layer representation of sockets | 119 | * struct sock - network layer representation of sockets |
117 | * @__sk_common: shared layout with tcp_tw_bucket | 120 | * @__sk_common: shared layout with inet_timewait_sock |
118 | * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN | 121 | * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN |
119 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings | 122 | * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings |
120 | * @sk_lock: synchronizer | 123 | * @sk_lock: synchronizer |
@@ -136,11 +139,10 @@ struct sock_common { | |||
136 | * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets | 139 | * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets |
137 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) | 140 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) |
138 | * @sk_lingertime: %SO_LINGER l_linger setting | 141 | * @sk_lingertime: %SO_LINGER l_linger setting |
139 | * @sk_hashent: hash entry in several tables (e.g. tcp_ehash) | 142 | * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash) |
140 | * @sk_backlog: always used with the per-socket spinlock held | 143 | * @sk_backlog: always used with the per-socket spinlock held |
141 | * @sk_callback_lock: used with the callbacks in the end of this struct | 144 | * @sk_callback_lock: used with the callbacks in the end of this struct |
142 | * @sk_error_queue: rarely used | 145 | * @sk_error_queue: rarely used |
143 | * @sk_prot: protocol handlers inside a network family | ||
144 | * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) | 146 | * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) |
145 | * @sk_err: last error | 147 | * @sk_err: last error |
146 | * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' | 148 | * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' |
@@ -173,7 +175,7 @@ struct sock_common { | |||
173 | */ | 175 | */ |
174 | struct sock { | 176 | struct sock { |
175 | /* | 177 | /* |
176 | * Now struct tcp_tw_bucket also uses sock_common, so please just | 178 | * Now struct inet_timewait_sock also uses sock_common, so please just |
177 | * don't add nothing before this first member (__sk_common) --acme | 179 | * don't add nothing before this first member (__sk_common) --acme |
178 | */ | 180 | */ |
179 | struct sock_common __sk_common; | 181 | struct sock_common __sk_common; |
@@ -184,6 +186,7 @@ struct sock { | |||
184 | #define sk_node __sk_common.skc_node | 186 | #define sk_node __sk_common.skc_node |
185 | #define sk_bind_node __sk_common.skc_bind_node | 187 | #define sk_bind_node __sk_common.skc_bind_node |
186 | #define sk_refcnt __sk_common.skc_refcnt | 188 | #define sk_refcnt __sk_common.skc_refcnt |
189 | #define sk_prot __sk_common.skc_prot | ||
187 | unsigned char sk_shutdown : 2, | 190 | unsigned char sk_shutdown : 2, |
188 | sk_no_check : 2, | 191 | sk_no_check : 2, |
189 | sk_userlocks : 4; | 192 | sk_userlocks : 4; |
@@ -218,7 +221,6 @@ struct sock { | |||
218 | struct sk_buff *tail; | 221 | struct sk_buff *tail; |
219 | } sk_backlog; | 222 | } sk_backlog; |
220 | struct sk_buff_head sk_error_queue; | 223 | struct sk_buff_head sk_error_queue; |
221 | struct proto *sk_prot; | ||
222 | struct proto *sk_prot_creator; | 224 | struct proto *sk_prot_creator; |
223 | rwlock_t sk_callback_lock; | 225 | rwlock_t sk_callback_lock; |
224 | int sk_err, | 226 | int sk_err, |
@@ -253,28 +255,28 @@ struct sock { | |||
253 | /* | 255 | /* |
254 | * Hashed lists helper routines | 256 | * Hashed lists helper routines |
255 | */ | 257 | */ |
256 | static inline struct sock *__sk_head(struct hlist_head *head) | 258 | static inline struct sock *__sk_head(const struct hlist_head *head) |
257 | { | 259 | { |
258 | return hlist_entry(head->first, struct sock, sk_node); | 260 | return hlist_entry(head->first, struct sock, sk_node); |
259 | } | 261 | } |
260 | 262 | ||
261 | static inline struct sock *sk_head(struct hlist_head *head) | 263 | static inline struct sock *sk_head(const struct hlist_head *head) |
262 | { | 264 | { |
263 | return hlist_empty(head) ? NULL : __sk_head(head); | 265 | return hlist_empty(head) ? NULL : __sk_head(head); |
264 | } | 266 | } |
265 | 267 | ||
266 | static inline struct sock *sk_next(struct sock *sk) | 268 | static inline struct sock *sk_next(const struct sock *sk) |
267 | { | 269 | { |
268 | return sk->sk_node.next ? | 270 | return sk->sk_node.next ? |
269 | hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; | 271 | hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; |
270 | } | 272 | } |
271 | 273 | ||
272 | static inline int sk_unhashed(struct sock *sk) | 274 | static inline int sk_unhashed(const struct sock *sk) |
273 | { | 275 | { |
274 | return hlist_unhashed(&sk->sk_node); | 276 | return hlist_unhashed(&sk->sk_node); |
275 | } | 277 | } |
276 | 278 | ||
277 | static inline int sk_hashed(struct sock *sk) | 279 | static inline int sk_hashed(const struct sock *sk) |
278 | { | 280 | { |
279 | return sk->sk_node.pprev != NULL; | 281 | return sk->sk_node.pprev != NULL; |
280 | } | 282 | } |
@@ -554,6 +556,10 @@ struct proto { | |||
554 | kmem_cache_t *slab; | 556 | kmem_cache_t *slab; |
555 | unsigned int obj_size; | 557 | unsigned int obj_size; |
556 | 558 | ||
559 | kmem_cache_t *twsk_slab; | ||
560 | unsigned int twsk_obj_size; | ||
561 | atomic_t *orphan_count; | ||
562 | |||
557 | struct request_sock_ops *rsk_prot; | 563 | struct request_sock_ops *rsk_prot; |
558 | 564 | ||
559 | struct module *owner; | 565 | struct module *owner; |
@@ -561,7 +567,9 @@ struct proto { | |||
561 | char name[32]; | 567 | char name[32]; |
562 | 568 | ||
563 | struct list_head node; | 569 | struct list_head node; |
564 | 570 | #ifdef SOCK_REFCNT_DEBUG | |
571 | atomic_t socks; | ||
572 | #endif | ||
565 | struct { | 573 | struct { |
566 | int inuse; | 574 | int inuse; |
567 | u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; | 575 | u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; |
@@ -571,6 +579,31 @@ struct proto { | |||
571 | extern int proto_register(struct proto *prot, int alloc_slab); | 579 | extern int proto_register(struct proto *prot, int alloc_slab); |
572 | extern void proto_unregister(struct proto *prot); | 580 | extern void proto_unregister(struct proto *prot); |
573 | 581 | ||
582 | #ifdef SOCK_REFCNT_DEBUG | ||
583 | static inline void sk_refcnt_debug_inc(struct sock *sk) | ||
584 | { | ||
585 | atomic_inc(&sk->sk_prot->socks); | ||
586 | } | ||
587 | |||
588 | static inline void sk_refcnt_debug_dec(struct sock *sk) | ||
589 | { | ||
590 | atomic_dec(&sk->sk_prot->socks); | ||
591 | printk(KERN_DEBUG "%s socket %p released, %d are still alive\n", | ||
592 | sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); | ||
593 | } | ||
594 | |||
595 | static inline void sk_refcnt_debug_release(const struct sock *sk) | ||
596 | { | ||
597 | if (atomic_read(&sk->sk_refcnt) != 1) | ||
598 | printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", | ||
599 | sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); | ||
600 | } | ||
601 | #else /* SOCK_REFCNT_DEBUG */ | ||
602 | #define sk_refcnt_debug_inc(sk) do { } while (0) | ||
603 | #define sk_refcnt_debug_dec(sk) do { } while (0) | ||
604 | #define sk_refcnt_debug_release(sk) do { } while (0) | ||
605 | #endif /* SOCK_REFCNT_DEBUG */ | ||
606 | |||
574 | /* Called with local bh disabled */ | 607 | /* Called with local bh disabled */ |
575 | static __inline__ void sock_prot_inc_use(struct proto *prot) | 608 | static __inline__ void sock_prot_inc_use(struct proto *prot) |
576 | { | 609 | { |
@@ -582,6 +615,15 @@ static __inline__ void sock_prot_dec_use(struct proto *prot) | |||
582 | prot->stats[smp_processor_id()].inuse--; | 615 | prot->stats[smp_processor_id()].inuse--; |
583 | } | 616 | } |
584 | 617 | ||
618 | /* With per-bucket locks this operation is not-atomic, so that | ||
619 | * this version is not worse. | ||
620 | */ | ||
621 | static inline void __sk_prot_rehash(struct sock *sk) | ||
622 | { | ||
623 | sk->sk_prot->unhash(sk); | ||
624 | sk->sk_prot->hash(sk); | ||
625 | } | ||
626 | |||
585 | /* About 10 seconds */ | 627 | /* About 10 seconds */ |
586 | #define SOCK_DESTROY_TIME (10*HZ) | 628 | #define SOCK_DESTROY_TIME (10*HZ) |
587 | 629 | ||
@@ -693,6 +735,8 @@ extern struct sock *sk_alloc(int family, | |||
693 | unsigned int __nocast priority, | 735 | unsigned int __nocast priority, |
694 | struct proto *prot, int zero_it); | 736 | struct proto *prot, int zero_it); |
695 | extern void sk_free(struct sock *sk); | 737 | extern void sk_free(struct sock *sk); |
738 | extern struct sock *sk_clone(const struct sock *sk, | ||
739 | const unsigned int __nocast priority); | ||
696 | 740 | ||
697 | extern struct sk_buff *sock_wmalloc(struct sock *sk, | 741 | extern struct sk_buff *sock_wmalloc(struct sock *sk, |
698 | unsigned long size, int force, | 742 | unsigned long size, int force, |
@@ -986,6 +1030,16 @@ sk_dst_check(struct sock *sk, u32 cookie) | |||
986 | return dst; | 1030 | return dst; |
987 | } | 1031 | } |
988 | 1032 | ||
1033 | static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst) | ||
1034 | { | ||
1035 | __sk_dst_set(sk, dst); | ||
1036 | sk->sk_route_caps = dst->dev->features; | ||
1037 | if (sk->sk_route_caps & NETIF_F_TSO) { | ||
1038 | if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) | ||
1039 | sk->sk_route_caps &= ~NETIF_F_TSO; | ||
1040 | } | ||
1041 | } | ||
1042 | |||
989 | static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) | 1043 | static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) |
990 | { | 1044 | { |
991 | sk->sk_wmem_queued += skb->truesize; | 1045 | sk->sk_wmem_queued += skb->truesize; |
@@ -1146,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, | |||
1146 | int hdr_len; | 1200 | int hdr_len; |
1147 | 1201 | ||
1148 | hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); | 1202 | hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); |
1149 | skb = alloc_skb(size + hdr_len, gfp); | 1203 | skb = alloc_skb_fclone(size + hdr_len, gfp); |
1150 | if (skb) { | 1204 | if (skb) { |
1151 | skb->truesize += mem; | 1205 | skb->truesize += mem; |
1152 | if (sk->sk_forward_alloc >= (int)skb->truesize || | 1206 | if (sk->sk_forward_alloc >= (int)skb->truesize || |
@@ -1228,16 +1282,19 @@ static inline int sock_intr_errno(long timeo) | |||
1228 | static __inline__ void | 1282 | static __inline__ void |
1229 | sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) | 1283 | sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) |
1230 | { | 1284 | { |
1231 | struct timeval *stamp = &skb->stamp; | 1285 | struct timeval stamp; |
1286 | |||
1287 | skb_get_timestamp(skb, &stamp); | ||
1232 | if (sock_flag(sk, SOCK_RCVTSTAMP)) { | 1288 | if (sock_flag(sk, SOCK_RCVTSTAMP)) { |
1233 | /* Race occurred between timestamp enabling and packet | 1289 | /* Race occurred between timestamp enabling and packet |
1234 | receiving. Fill in the current time for now. */ | 1290 | receiving. Fill in the current time for now. */ |
1235 | if (stamp->tv_sec == 0) | 1291 | if (stamp.tv_sec == 0) |
1236 | do_gettimeofday(stamp); | 1292 | do_gettimeofday(&stamp); |
1293 | skb_set_timestamp(skb, &stamp); | ||
1237 | put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), | 1294 | put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), |
1238 | stamp); | 1295 | &stamp); |
1239 | } else | 1296 | } else |
1240 | sk->sk_stamp = *stamp; | 1297 | sk->sk_stamp = stamp; |
1241 | } | 1298 | } |
1242 | 1299 | ||
1243 | /** | 1300 | /** |
@@ -1262,11 +1319,11 @@ extern int sock_get_timestamp(struct sock *, struct timeval __user *); | |||
1262 | */ | 1319 | */ |
1263 | 1320 | ||
1264 | #if 0 | 1321 | #if 0 |
1265 | #define NETDEBUG(x) do { } while (0) | 1322 | #define NETDEBUG(fmt, args...) do { } while (0) |
1266 | #define LIMIT_NETDEBUG(x) do {} while(0) | 1323 | #define LIMIT_NETDEBUG(fmt, args...) do { } while(0) |
1267 | #else | 1324 | #else |
1268 | #define NETDEBUG(x) do { x; } while (0) | 1325 | #define NETDEBUG(fmt, args...) printk(fmt,##args) |
1269 | #define LIMIT_NETDEBUG(x) do { if (net_ratelimit()) { x; } } while(0) | 1326 | #define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0) |
1270 | #endif | 1327 | #endif |
1271 | 1328 | ||
1272 | /* | 1329 | /* |
@@ -1313,4 +1370,14 @@ static inline int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsign | |||
1313 | } | 1370 | } |
1314 | #endif | 1371 | #endif |
1315 | 1372 | ||
1373 | extern void sk_init(void); | ||
1374 | |||
1375 | #ifdef CONFIG_SYSCTL | ||
1376 | extern struct ctl_table core_table[]; | ||
1377 | extern int sysctl_optmem_max; | ||
1378 | #endif | ||
1379 | |||
1380 | extern __u32 sysctl_wmem_default; | ||
1381 | extern __u32 sysctl_rmem_default; | ||
1382 | |||
1316 | #endif /* _SOCK_H */ | 1383 | #endif /* _SOCK_H */ |