aboutsummaryrefslogtreecommitdiffstats
path: root/net/tipc
diff options
context:
space:
mode:
Diffstat (limited to 'net/tipc')
-rw-r--r--net/tipc/core.c11
-rw-r--r--net/tipc/core.h23
-rw-r--r--net/tipc/eth_media.c4
-rw-r--r--net/tipc/link.c111
-rw-r--r--net/tipc/msg.c16
-rw-r--r--net/tipc/msg.h50
-rw-r--r--net/tipc/port.c43
-rw-r--r--net/tipc/ref.c211
-rw-r--r--net/tipc/ref.h89
-rw-r--r--net/tipc/socket.c1188
10 files changed, 993 insertions, 753 deletions
diff --git a/net/tipc/core.c b/net/tipc/core.c
index d2d7d32c02c7..740aac5cdfb6 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -48,16 +48,8 @@
48#include "subscr.h" 48#include "subscr.h"
49#include "config.h" 49#include "config.h"
50 50
51int tipc_eth_media_start(void);
52void tipc_eth_media_stop(void);
53int tipc_handler_start(void);
54void tipc_handler_stop(void);
55int tipc_socket_init(void);
56void tipc_socket_stop(void);
57int tipc_netlink_start(void);
58void tipc_netlink_stop(void);
59 51
60#define TIPC_MOD_VER "1.6.2" 52#define TIPC_MOD_VER "1.6.3"
61 53
62#ifndef CONFIG_TIPC_ZONES 54#ifndef CONFIG_TIPC_ZONES
63#define CONFIG_TIPC_ZONES 3 55#define CONFIG_TIPC_ZONES 3
@@ -277,7 +269,6 @@ EXPORT_SYMBOL(tipc_register_media);
277/* TIPC API for external APIs (see tipc_port.h) */ 269/* TIPC API for external APIs (see tipc_port.h) */
278 270
279EXPORT_SYMBOL(tipc_createport_raw); 271EXPORT_SYMBOL(tipc_createport_raw);
280EXPORT_SYMBOL(tipc_set_msg_option);
281EXPORT_SYMBOL(tipc_reject_msg); 272EXPORT_SYMBOL(tipc_reject_msg);
282EXPORT_SYMBOL(tipc_send_buf_fast); 273EXPORT_SYMBOL(tipc_send_buf_fast);
283EXPORT_SYMBOL(tipc_acknowledge); 274EXPORT_SYMBOL(tipc_acknowledge);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index feabca580820..325404fd4eb5 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -180,6 +180,12 @@ extern int tipc_core_start(void);
180extern void tipc_core_stop(void); 180extern void tipc_core_stop(void);
181extern int tipc_core_start_net(void); 181extern int tipc_core_start_net(void);
182extern void tipc_core_stop_net(void); 182extern void tipc_core_stop_net(void);
183extern int tipc_handler_start(void);
184extern void tipc_handler_stop(void);
185extern int tipc_netlink_start(void);
186extern void tipc_netlink_stop(void);
187extern int tipc_socket_init(void);
188extern void tipc_socket_stop(void);
183 189
184static inline int delimit(int val, int min, int max) 190static inline int delimit(int val, int min, int max)
185{ 191{
@@ -310,7 +316,7 @@ static inline struct sk_buff *buf_acquire(u32 size)
310 struct sk_buff *skb; 316 struct sk_buff *skb;
311 unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; 317 unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
312 318
313 skb = alloc_skb(buf_size, GFP_ATOMIC); 319 skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
314 if (skb) { 320 if (skb) {
315 skb_reserve(skb, BUF_HEADROOM); 321 skb_reserve(skb, BUF_HEADROOM);
316 skb_put(skb, size); 322 skb_put(skb, size);
@@ -328,8 +334,19 @@ static inline struct sk_buff *buf_acquire(u32 size)
328 334
329static inline void buf_discard(struct sk_buff *skb) 335static inline void buf_discard(struct sk_buff *skb)
330{ 336{
331 if (likely(skb != NULL)) 337 kfree_skb(skb);
332 kfree_skb(skb); 338}
339
340/**
341 * buf_linearize - convert a TIPC message buffer into a single contiguous piece
342 * @skb: message buffer
343 *
344 * Returns 0 on success.
345 */
346
347static inline int buf_linearize(struct sk_buff *skb)
348{
349 return skb_linearize(skb);
333} 350}
334 351
335#endif 352#endif
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 3bbef2ab22ae..9cd35eec3e7f 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -101,7 +101,7 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
101 struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; 101 struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv;
102 u32 size; 102 u32 size;
103 103
104 if (dev->nd_net != &init_net) { 104 if (dev_net(dev) != &init_net) {
105 kfree_skb(buf); 105 kfree_skb(buf);
106 return 0; 106 return 0;
107 } 107 }
@@ -198,7 +198,7 @@ static int recv_notification(struct notifier_block *nb, unsigned long evt,
198 struct eth_bearer *eb_ptr = &eth_bearers[0]; 198 struct eth_bearer *eb_ptr = &eth_bearers[0];
199 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS]; 199 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
200 200
201 if (dev->nd_net != &init_net) 201 if (dev_net(dev) != &init_net)
202 return NOTIFY_DONE; 202 return NOTIFY_DONE;
203 203
204 while ((eb_ptr->dev != dev)) { 204 while ((eb_ptr->dev != dev)) {
diff --git a/net/tipc/link.c b/net/tipc/link.c
index cefa99824c58..2a26a16e269f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1785,6 +1785,56 @@ static struct sk_buff *link_insert_deferred_queue(struct link *l_ptr,
1785 return buf; 1785 return buf;
1786} 1786}
1787 1787
1788/**
1789 * link_recv_buf_validate - validate basic format of received message
1790 *
1791 * This routine ensures a TIPC message has an acceptable header, and at least
1792 * as much data as the header indicates it should. The routine also ensures
1793 * that the entire message header is stored in the main fragment of the message
1794 * buffer, to simplify future access to message header fields.
1795 *
1796 * Note: Having extra info present in the message header or data areas is OK.
1797 * TIPC will ignore the excess, under the assumption that it is optional info
1798 * introduced by a later release of the protocol.
1799 */
1800
1801static int link_recv_buf_validate(struct sk_buff *buf)
1802{
1803 static u32 min_data_hdr_size[8] = {
1804 SHORT_H_SIZE, MCAST_H_SIZE, LONG_H_SIZE, DIR_MSG_H_SIZE,
1805 MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE, MAX_H_SIZE
1806 };
1807
1808 struct tipc_msg *msg;
1809 u32 tipc_hdr[2];
1810 u32 size;
1811 u32 hdr_size;
1812 u32 min_hdr_size;
1813
1814 if (unlikely(buf->len < MIN_H_SIZE))
1815 return 0;
1816
1817 msg = skb_header_pointer(buf, 0, sizeof(tipc_hdr), tipc_hdr);
1818 if (msg == NULL)
1819 return 0;
1820
1821 if (unlikely(msg_version(msg) != TIPC_VERSION))
1822 return 0;
1823
1824 size = msg_size(msg);
1825 hdr_size = msg_hdr_sz(msg);
1826 min_hdr_size = msg_isdata(msg) ?
1827 min_data_hdr_size[msg_type(msg)] : INT_H_SIZE;
1828
1829 if (unlikely((hdr_size < min_hdr_size) ||
1830 (size < hdr_size) ||
1831 (buf->len < size) ||
1832 (size - hdr_size > TIPC_MAX_USER_MSG_SIZE)))
1833 return 0;
1834
1835 return pskb_may_pull(buf, hdr_size);
1836}
1837
1788void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) 1838void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1789{ 1839{
1790 read_lock_bh(&tipc_net_lock); 1840 read_lock_bh(&tipc_net_lock);
@@ -1794,9 +1844,9 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1794 struct link *l_ptr; 1844 struct link *l_ptr;
1795 struct sk_buff *crs; 1845 struct sk_buff *crs;
1796 struct sk_buff *buf = head; 1846 struct sk_buff *buf = head;
1797 struct tipc_msg *msg = buf_msg(buf); 1847 struct tipc_msg *msg;
1798 u32 seq_no = msg_seqno(msg); 1848 u32 seq_no;
1799 u32 ackd = msg_ack(msg); 1849 u32 ackd;
1800 u32 released = 0; 1850 u32 released = 0;
1801 int type; 1851 int type;
1802 1852
@@ -1804,12 +1854,21 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1804 TIPC_SKB_CB(buf)->handle = b_ptr; 1854 TIPC_SKB_CB(buf)->handle = b_ptr;
1805 1855
1806 head = head->next; 1856 head = head->next;
1807 if (unlikely(msg_version(msg) != TIPC_VERSION)) 1857
1858 /* Ensure message is well-formed */
1859
1860 if (unlikely(!link_recv_buf_validate(buf)))
1808 goto cont; 1861 goto cont;
1809#if 0 1862
1810 if (msg_user(msg) != LINK_PROTOCOL) 1863 /* Ensure message data is a single contiguous unit */
1811#endif 1864
1812 msg_dbg(msg,"<REC<"); 1865 if (unlikely(buf_linearize(buf))) {
1866 goto cont;
1867 }
1868
1869 /* Handle arrival of a non-unicast link message */
1870
1871 msg = buf_msg(buf);
1813 1872
1814 if (unlikely(msg_non_seq(msg))) { 1873 if (unlikely(msg_non_seq(msg))) {
1815 link_recv_non_seq(buf); 1874 link_recv_non_seq(buf);
@@ -1820,19 +1879,26 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1820 (msg_destnode(msg) != tipc_own_addr))) 1879 (msg_destnode(msg) != tipc_own_addr)))
1821 goto cont; 1880 goto cont;
1822 1881
1882 /* Locate unicast link endpoint that should handle message */
1883
1823 n_ptr = tipc_node_find(msg_prevnode(msg)); 1884 n_ptr = tipc_node_find(msg_prevnode(msg));
1824 if (unlikely(!n_ptr)) 1885 if (unlikely(!n_ptr))
1825 goto cont; 1886 goto cont;
1826
1827 tipc_node_lock(n_ptr); 1887 tipc_node_lock(n_ptr);
1888
1828 l_ptr = n_ptr->links[b_ptr->identity]; 1889 l_ptr = n_ptr->links[b_ptr->identity];
1829 if (unlikely(!l_ptr)) { 1890 if (unlikely(!l_ptr)) {
1830 tipc_node_unlock(n_ptr); 1891 tipc_node_unlock(n_ptr);
1831 goto cont; 1892 goto cont;
1832 } 1893 }
1833 /* 1894
1834 * Release acked messages 1895 /* Validate message sequence number info */
1835 */ 1896
1897 seq_no = msg_seqno(msg);
1898 ackd = msg_ack(msg);
1899
1900 /* Release acked messages */
1901
1836 if (less(n_ptr->bclink.acked, msg_bcast_ack(msg))) { 1902 if (less(n_ptr->bclink.acked, msg_bcast_ack(msg))) {
1837 if (tipc_node_is_up(n_ptr) && n_ptr->bclink.supported) 1903 if (tipc_node_is_up(n_ptr) && n_ptr->bclink.supported)
1838 tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg)); 1904 tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg));
@@ -1851,6 +1917,9 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1851 l_ptr->first_out = crs; 1917 l_ptr->first_out = crs;
1852 l_ptr->out_queue_size -= released; 1918 l_ptr->out_queue_size -= released;
1853 } 1919 }
1920
1921 /* Try sending any messages link endpoint has pending */
1922
1854 if (unlikely(l_ptr->next_out)) 1923 if (unlikely(l_ptr->next_out))
1855 tipc_link_push_queue(l_ptr); 1924 tipc_link_push_queue(l_ptr);
1856 if (unlikely(!list_empty(&l_ptr->waiting_ports))) 1925 if (unlikely(!list_empty(&l_ptr->waiting_ports)))
@@ -1860,6 +1929,8 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1860 tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); 1929 tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
1861 } 1930 }
1862 1931
1932 /* Now (finally!) process the incoming message */
1933
1863protocol_check: 1934protocol_check:
1864 if (likely(link_working_working(l_ptr))) { 1935 if (likely(link_working_working(l_ptr))) {
1865 if (likely(seq_no == mod(l_ptr->next_in_no))) { 1936 if (likely(seq_no == mod(l_ptr->next_in_no))) {
@@ -2832,15 +2903,15 @@ static void link_set_supervision_props(struct link *l_ptr, u32 tolerance)
2832void tipc_link_set_queue_limits(struct link *l_ptr, u32 window) 2903void tipc_link_set_queue_limits(struct link *l_ptr, u32 window)
2833{ 2904{
2834 /* Data messages from this node, inclusive FIRST_FRAGM */ 2905 /* Data messages from this node, inclusive FIRST_FRAGM */
2835 l_ptr->queue_limit[DATA_LOW] = window; 2906 l_ptr->queue_limit[TIPC_LOW_IMPORTANCE] = window;
2836 l_ptr->queue_limit[DATA_MEDIUM] = (window / 3) * 4; 2907 l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE] = (window / 3) * 4;
2837 l_ptr->queue_limit[DATA_HIGH] = (window / 3) * 5; 2908 l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE] = (window / 3) * 5;
2838 l_ptr->queue_limit[DATA_CRITICAL] = (window / 3) * 6; 2909 l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE] = (window / 3) * 6;
2839 /* Transiting data messages,inclusive FIRST_FRAGM */ 2910 /* Transiting data messages,inclusive FIRST_FRAGM */
2840 l_ptr->queue_limit[DATA_LOW + 4] = 300; 2911 l_ptr->queue_limit[TIPC_LOW_IMPORTANCE + 4] = 300;
2841 l_ptr->queue_limit[DATA_MEDIUM + 4] = 600; 2912 l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE + 4] = 600;
2842 l_ptr->queue_limit[DATA_HIGH + 4] = 900; 2913 l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE + 4] = 900;
2843 l_ptr->queue_limit[DATA_CRITICAL + 4] = 1200; 2914 l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE + 4] = 1200;
2844 l_ptr->queue_limit[CONN_MANAGER] = 1200; 2915 l_ptr->queue_limit[CONN_MANAGER] = 1200;
2845 l_ptr->queue_limit[ROUTE_DISTRIBUTOR] = 1200; 2916 l_ptr->queue_limit[ROUTE_DISTRIBUTOR] = 1200;
2846 l_ptr->queue_limit[CHANGEOVER_PROTOCOL] = 2500; 2917 l_ptr->queue_limit[CHANGEOVER_PROTOCOL] = 2500;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 782485468fb2..696a8633df75 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -73,10 +73,10 @@ void tipc_msg_print(struct print_buf *buf, struct tipc_msg *msg, const char *str
73 tipc_printf(buf, "NO(%u/%u):",msg_long_msgno(msg), 73 tipc_printf(buf, "NO(%u/%u):",msg_long_msgno(msg),
74 msg_fragm_no(msg)); 74 msg_fragm_no(msg));
75 break; 75 break;
76 case DATA_LOW: 76 case TIPC_LOW_IMPORTANCE:
77 case DATA_MEDIUM: 77 case TIPC_MEDIUM_IMPORTANCE:
78 case DATA_HIGH: 78 case TIPC_HIGH_IMPORTANCE:
79 case DATA_CRITICAL: 79 case TIPC_CRITICAL_IMPORTANCE:
80 tipc_printf(buf, "DAT%u:", msg_user(msg)); 80 tipc_printf(buf, "DAT%u:", msg_user(msg));
81 if (msg_short(msg)) { 81 if (msg_short(msg)) {
82 tipc_printf(buf, "CON:"); 82 tipc_printf(buf, "CON:");
@@ -229,10 +229,10 @@ void tipc_msg_print(struct print_buf *buf, struct tipc_msg *msg, const char *str
229 switch (usr) { 229 switch (usr) {
230 case CONN_MANAGER: 230 case CONN_MANAGER:
231 case NAME_DISTRIBUTOR: 231 case NAME_DISTRIBUTOR:
232 case DATA_LOW: 232 case TIPC_LOW_IMPORTANCE:
233 case DATA_MEDIUM: 233 case TIPC_MEDIUM_IMPORTANCE:
234 case DATA_HIGH: 234 case TIPC_HIGH_IMPORTANCE:
235 case DATA_CRITICAL: 235 case TIPC_CRITICAL_IMPORTANCE:
236 if (msg_short(msg)) 236 if (msg_short(msg))
237 break; /* No error */ 237 break; /* No error */
238 switch (msg_errcode(msg)) { 238 switch (msg_errcode(msg)) {
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index e9ef6df26562..6ad070d87702 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -40,18 +40,16 @@
40#include "core.h" 40#include "core.h"
41 41
42#define TIPC_VERSION 2 42#define TIPC_VERSION 2
43#define DATA_LOW TIPC_LOW_IMPORTANCE 43
44#define DATA_MEDIUM TIPC_MEDIUM_IMPORTANCE 44#define SHORT_H_SIZE 24 /* Connected, in-cluster messages */
45#define DATA_HIGH TIPC_HIGH_IMPORTANCE
46#define DATA_CRITICAL TIPC_CRITICAL_IMPORTANCE
47#define SHORT_H_SIZE 24 /* Connected,in cluster */
48#define DIR_MSG_H_SIZE 32 /* Directly addressed messages */ 45#define DIR_MSG_H_SIZE 32 /* Directly addressed messages */
49#define CONN_MSG_H_SIZE 36 /* Routed connected msgs*/ 46#define LONG_H_SIZE 40 /* Named messages */
50#define LONG_H_SIZE 40 /* Named Messages */
51#define MCAST_H_SIZE 44 /* Multicast messages */ 47#define MCAST_H_SIZE 44 /* Multicast messages */
52#define MAX_H_SIZE 60 /* Inclusive full options */ 48#define INT_H_SIZE 40 /* Internal messages */
49#define MIN_H_SIZE 24 /* Smallest legal TIPC header size */
50#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
51
53#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) 52#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
54#define LINK_CONFIG 13
55 53
56 54
57/* 55/*
@@ -72,8 +70,10 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w,
72 u32 pos, u32 mask, u32 val) 70 u32 pos, u32 mask, u32 val)
73{ 71{
74 val = (val & mask) << pos; 72 val = (val & mask) << pos;
75 m->hdr[w] &= ~htonl(mask << pos); 73 val = htonl(val);
76 m->hdr[w] |= htonl(val); 74 mask = htonl(mask << pos);
75 m->hdr[w] &= ~mask;
76 m->hdr[w] |= val;
77} 77}
78 78
79/* 79/*
@@ -87,7 +87,7 @@ static inline u32 msg_version(struct tipc_msg *m)
87 87
88static inline void msg_set_version(struct tipc_msg *m) 88static inline void msg_set_version(struct tipc_msg *m)
89{ 89{
90 msg_set_bits(m, 0, 29, 0xf, TIPC_VERSION); 90 msg_set_bits(m, 0, 29, 7, TIPC_VERSION);
91} 91}
92 92
93static inline u32 msg_user(struct tipc_msg *m) 93static inline u32 msg_user(struct tipc_msg *m)
@@ -97,7 +97,7 @@ static inline u32 msg_user(struct tipc_msg *m)
97 97
98static inline u32 msg_isdata(struct tipc_msg *m) 98static inline u32 msg_isdata(struct tipc_msg *m)
99{ 99{
100 return (msg_user(m) <= DATA_CRITICAL); 100 return (msg_user(m) <= TIPC_CRITICAL_IMPORTANCE);
101} 101}
102 102
103static inline void msg_set_user(struct tipc_msg *m, u32 n) 103static inline void msg_set_user(struct tipc_msg *m, u32 n)
@@ -190,18 +190,6 @@ static inline void msg_set_lookup_scope(struct tipc_msg *m, u32 n)
190 msg_set_bits(m, 1, 19, 0x3, n); 190 msg_set_bits(m, 1, 19, 0x3, n);
191} 191}
192 192
193static inline void msg_set_options(struct tipc_msg *m, const char *opt, u32 sz)
194{
195 u32 hsz = msg_hdr_sz(m);
196 char *to = (char *)&m->hdr[hsz/4];
197
198 if ((hsz < DIR_MSG_H_SIZE) || ((hsz + sz) > MAX_H_SIZE))
199 return;
200 msg_set_bits(m, 1, 16, 0x7, (hsz - 28)/4);
201 msg_set_hdr_sz(m, hsz + sz);
202 memcpy(to, opt, sz);
203}
204
205static inline u32 msg_bcast_ack(struct tipc_msg *m) 193static inline u32 msg_bcast_ack(struct tipc_msg *m)
206{ 194{
207 return msg_bits(m, 1, 0, 0xffff); 195 return msg_bits(m, 1, 0, 0xffff);
@@ -330,17 +318,6 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m)
330 return (struct tipc_msg *)msg_data(m); 318 return (struct tipc_msg *)msg_data(m);
331} 319}
332 320
333static inline void msg_expand(struct tipc_msg *m, u32 destnode)
334{
335 if (!msg_short(m))
336 return;
337 msg_set_hdr_sz(m, LONG_H_SIZE);
338 msg_set_orignode(m, msg_prevnode(m));
339 msg_set_destnode(m, destnode);
340 memset(&m->hdr[8], 0, 12);
341}
342
343
344 321
345/* 322/*
346 TIPC internal message header format, version 2 323 TIPC internal message header format, version 2
@@ -388,7 +365,6 @@ static inline void msg_expand(struct tipc_msg *m, u32 destnode)
388#define NAME_DISTRIBUTOR 11 365#define NAME_DISTRIBUTOR 11
389#define MSG_FRAGMENTER 12 366#define MSG_FRAGMENTER 12
390#define LINK_CONFIG 13 367#define LINK_CONFIG 13
391#define INT_H_SIZE 40
392#define DSC_H_SIZE 40 368#define DSC_H_SIZE 40
393 369
394/* 370/*
diff --git a/net/tipc/port.c b/net/tipc/port.c
index f508614ca59b..2f5806410c64 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -242,7 +242,8 @@ u32 tipc_createport_raw(void *usr_handle,
242 p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; 242 p_ptr->publ.max_pkt = MAX_PKT_DEFAULT;
243 p_ptr->publ.ref = ref; 243 p_ptr->publ.ref = ref;
244 msg = &p_ptr->publ.phdr; 244 msg = &p_ptr->publ.phdr;
245 msg_init(msg, DATA_LOW, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, 0); 245 msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE,
246 0);
246 msg_set_orignode(msg, tipc_own_addr); 247 msg_set_orignode(msg, tipc_own_addr);
247 msg_set_prevnode(msg, tipc_own_addr); 248 msg_set_prevnode(msg, tipc_own_addr);
248 msg_set_origport(msg, ref); 249 msg_set_origport(msg, ref);
@@ -413,13 +414,6 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
413 return buf; 414 return buf;
414} 415}
415 416
416int tipc_set_msg_option(struct tipc_port *tp_ptr, const char *opt, const u32 sz)
417{
418 msg_expand(&tp_ptr->phdr, msg_destnode(&tp_ptr->phdr));
419 msg_set_options(&tp_ptr->phdr, opt, sz);
420 return TIPC_OK;
421}
422
423int tipc_reject_msg(struct sk_buff *buf, u32 err) 417int tipc_reject_msg(struct sk_buff *buf, u32 err)
424{ 418{
425 struct tipc_msg *msg = buf_msg(buf); 419 struct tipc_msg *msg = buf_msg(buf);
@@ -632,7 +626,7 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
632 msg_orignode(msg), 626 msg_orignode(msg),
633 msg_destport(msg), 627 msg_destport(msg),
634 tipc_own_addr, 628 tipc_own_addr,
635 DATA_HIGH, 629 TIPC_HIGH_IMPORTANCE,
636 TIPC_CONN_MSG, 630 TIPC_CONN_MSG,
637 err, 631 err,
638 0, 632 0,
@@ -1246,6 +1240,28 @@ exit:
1246 return res; 1240 return res;
1247} 1241}
1248 1242
1243/**
1244 * tipc_disconnect_port - disconnect port from peer
1245 *
1246 * Port must be locked.
1247 */
1248
1249int tipc_disconnect_port(struct tipc_port *tp_ptr)
1250{
1251 int res;
1252
1253 if (tp_ptr->connected) {
1254 tp_ptr->connected = 0;
1255 /* let timer expire on it's own to avoid deadlock! */
1256 tipc_nodesub_unsubscribe(
1257 &((struct port *)tp_ptr)->subscription);
1258 res = TIPC_OK;
1259 } else {
1260 res = -ENOTCONN;
1261 }
1262 return res;
1263}
1264
1249/* 1265/*
1250 * tipc_disconnect(): Disconnect port form peer. 1266 * tipc_disconnect(): Disconnect port form peer.
1251 * This is a node local operation. 1267 * This is a node local operation.
@@ -1254,17 +1270,12 @@ exit:
1254int tipc_disconnect(u32 ref) 1270int tipc_disconnect(u32 ref)
1255{ 1271{
1256 struct port *p_ptr; 1272 struct port *p_ptr;
1257 int res = -ENOTCONN; 1273 int res;
1258 1274
1259 p_ptr = tipc_port_lock(ref); 1275 p_ptr = tipc_port_lock(ref);
1260 if (!p_ptr) 1276 if (!p_ptr)
1261 return -EINVAL; 1277 return -EINVAL;
1262 if (p_ptr->publ.connected) { 1278 res = tipc_disconnect_port((struct tipc_port *)p_ptr);
1263 p_ptr->publ.connected = 0;
1264 /* let timer expire on it's own to avoid deadlock! */
1265 tipc_nodesub_unsubscribe(&p_ptr->subscription);
1266 res = TIPC_OK;
1267 }
1268 tipc_port_unlock(p_ptr); 1279 tipc_port_unlock(p_ptr);
1269 return res; 1280 return res;
1270} 1281}
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index c38744c96ed1..89cbab24d08f 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -2,7 +2,7 @@
2 * net/tipc/ref.c: TIPC object registry code 2 * net/tipc/ref.c: TIPC object registry code
3 * 3 *
4 * Copyright (c) 1991-2006, Ericsson AB 4 * Copyright (c) 1991-2006, Ericsson AB
5 * Copyright (c) 2004-2005, Wind River Systems 5 * Copyright (c) 2004-2007, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -36,32 +36,60 @@
36 36
37#include "core.h" 37#include "core.h"
38#include "ref.h" 38#include "ref.h"
39#include "port.h" 39
40#include "subscr.h" 40/**
41#include "name_distr.h" 41 * struct reference - TIPC object reference entry
42#include "name_table.h" 42 * @object: pointer to object associated with reference entry
43#include "config.h" 43 * @lock: spinlock controlling access to object
44#include "discover.h" 44 * @ref: reference value for object (combines instance & array index info)
45#include "bearer.h" 45 */
46#include "node.h" 46
47#include "bcast.h" 47struct reference {
48 void *object;
49 spinlock_t lock;
50 u32 ref;
51};
52
53/**
54 * struct tipc_ref_table - table of TIPC object reference entries
55 * @entries: pointer to array of reference entries
56 * @capacity: array index of first unusable entry
57 * @init_point: array index of first uninitialized entry
58 * @first_free: array index of first unused object reference entry
59 * @last_free: array index of last unused object reference entry
60 * @index_mask: bitmask for array index portion of reference values
61 * @start_mask: initial value for instance value portion of reference values
62 */
63
64struct ref_table {
65 struct reference *entries;
66 u32 capacity;
67 u32 init_point;
68 u32 first_free;
69 u32 last_free;
70 u32 index_mask;
71 u32 start_mask;
72};
48 73
49/* 74/*
50 * Object reference table consists of 2**N entries. 75 * Object reference table consists of 2**N entries.
51 * 76 *
52 * A used entry has object ptr != 0, reference == XXXX|own index 77 * State Object ptr Reference
53 * (XXXX changes each time entry is acquired) 78 * ----- ---------- ---------
54 * A free entry has object ptr == 0, reference == YYYY|next free index 79 * In use non-NULL XXXX|own index
55 * (YYYY is one more than last used XXXX) 80 * (XXXX changes each time entry is acquired)
81 * Free NULL YYYY|next free index
82 * (YYYY is one more than last used XXXX)
83 * Uninitialized NULL 0
56 * 84 *
57 * Free list is initially chained from entry (2**N)-1 to entry 1. 85 * Entry 0 is not used; this allows index 0 to denote the end of the free list.
58 * Entry 0 is not used to allow index 0 to indicate the end of the free list.
59 * 86 *
60 * Note: Any accidental reference of the form XXXX|0--0 won't match entry 0 87 * Note that a reference value of 0 does not necessarily indicate that an
61 * because entry 0's reference field has the form XXXX|1--1. 88 * entry is uninitialized, since the last entry in the free list could also
89 * have a reference value of 0 (although this is unlikely).
62 */ 90 */
63 91
64struct ref_table tipc_ref_table = { NULL }; 92static struct ref_table tipc_ref_table = { NULL };
65 93
66static DEFINE_RWLOCK(ref_table_lock); 94static DEFINE_RWLOCK(ref_table_lock);
67 95
@@ -72,29 +100,29 @@ static DEFINE_RWLOCK(ref_table_lock);
72int tipc_ref_table_init(u32 requested_size, u32 start) 100int tipc_ref_table_init(u32 requested_size, u32 start)
73{ 101{
74 struct reference *table; 102 struct reference *table;
75 u32 sz = 1 << 4; 103 u32 actual_size;
76 u32 index_mask;
77 int i;
78 104
79 while (sz < requested_size) { 105 /* account for unused entry, then round up size to a power of 2 */
80 sz <<= 1; 106
81 } 107 requested_size++;
82 table = vmalloc(sz * sizeof(*table)); 108 for (actual_size = 16; actual_size < requested_size; actual_size <<= 1)
109 /* do nothing */ ;
110
111 /* allocate table & mark all entries as uninitialized */
112
113 table = __vmalloc(actual_size * sizeof(struct reference),
114 GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
83 if (table == NULL) 115 if (table == NULL)
84 return -ENOMEM; 116 return -ENOMEM;
85 117
86 write_lock_bh(&ref_table_lock);
87 index_mask = sz - 1;
88 for (i = sz - 1; i >= 0; i--) {
89 table[i].object = NULL;
90 spin_lock_init(&table[i].lock);
91 table[i].data.next_plus_upper = (start & ~index_mask) + i - 1;
92 }
93 tipc_ref_table.entries = table; 118 tipc_ref_table.entries = table;
94 tipc_ref_table.index_mask = index_mask; 119 tipc_ref_table.capacity = requested_size;
95 tipc_ref_table.first_free = sz - 1; 120 tipc_ref_table.init_point = 1;
96 tipc_ref_table.last_free = 1; 121 tipc_ref_table.first_free = 0;
97 write_unlock_bh(&ref_table_lock); 122 tipc_ref_table.last_free = 0;
123 tipc_ref_table.index_mask = actual_size - 1;
124 tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask;
125
98 return TIPC_OK; 126 return TIPC_OK;
99} 127}
100 128
@@ -125,7 +153,7 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
125 u32 index; 153 u32 index;
126 u32 index_mask; 154 u32 index_mask;
127 u32 next_plus_upper; 155 u32 next_plus_upper;
128 u32 reference = 0; 156 u32 ref;
129 157
130 if (!object) { 158 if (!object) {
131 err("Attempt to acquire reference to non-existent object\n"); 159 err("Attempt to acquire reference to non-existent object\n");
@@ -136,6 +164,8 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
136 return 0; 164 return 0;
137 } 165 }
138 166
167 /* take a free entry, if available; otherwise initialize a new entry */
168
139 write_lock_bh(&ref_table_lock); 169 write_lock_bh(&ref_table_lock);
140 if (tipc_ref_table.first_free) { 170 if (tipc_ref_table.first_free) {
141 index = tipc_ref_table.first_free; 171 index = tipc_ref_table.first_free;
@@ -143,17 +173,29 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock)
143 index_mask = tipc_ref_table.index_mask; 173 index_mask = tipc_ref_table.index_mask;
144 /* take lock in case a previous user of entry still holds it */ 174 /* take lock in case a previous user of entry still holds it */
145 spin_lock_bh(&entry->lock); 175 spin_lock_bh(&entry->lock);
146 next_plus_upper = entry->data.next_plus_upper; 176 next_plus_upper = entry->ref;
147 tipc_ref_table.first_free = next_plus_upper & index_mask; 177 tipc_ref_table.first_free = next_plus_upper & index_mask;
148 reference = (next_plus_upper & ~index_mask) + index; 178 ref = (next_plus_upper & ~index_mask) + index;
149 entry->data.reference = reference; 179 entry->ref = ref;
150 entry->object = object; 180 entry->object = object;
151 if (lock != NULL)
152 *lock = &entry->lock;
153 spin_unlock_bh(&entry->lock); 181 spin_unlock_bh(&entry->lock);
182 *lock = &entry->lock;
183 }
184 else if (tipc_ref_table.init_point < tipc_ref_table.capacity) {
185 index = tipc_ref_table.init_point++;
186 entry = &(tipc_ref_table.entries[index]);
187 spin_lock_init(&entry->lock);
188 ref = tipc_ref_table.start_mask + index;
189 entry->ref = ref;
190 entry->object = object;
191 *lock = &entry->lock;
192 }
193 else {
194 ref = 0;
154 } 195 }
155 write_unlock_bh(&ref_table_lock); 196 write_unlock_bh(&ref_table_lock);
156 return reference; 197
198 return ref;
157} 199}
158 200
159/** 201/**
@@ -169,42 +211,99 @@ void tipc_ref_discard(u32 ref)
169 u32 index; 211 u32 index;
170 u32 index_mask; 212 u32 index_mask;
171 213
172 if (!ref) {
173 err("Attempt to discard reference 0\n");
174 return;
175 }
176 if (!tipc_ref_table.entries) { 214 if (!tipc_ref_table.entries) {
177 err("Reference table not found during discard attempt\n"); 215 err("Reference table not found during discard attempt\n");
178 return; 216 return;
179 } 217 }
180 218
181 write_lock_bh(&ref_table_lock);
182 index_mask = tipc_ref_table.index_mask; 219 index_mask = tipc_ref_table.index_mask;
183 index = ref & index_mask; 220 index = ref & index_mask;
184 entry = &(tipc_ref_table.entries[index]); 221 entry = &(tipc_ref_table.entries[index]);
185 222
223 write_lock_bh(&ref_table_lock);
224
186 if (!entry->object) { 225 if (!entry->object) {
187 err("Attempt to discard reference to non-existent object\n"); 226 err("Attempt to discard reference to non-existent object\n");
188 goto exit; 227 goto exit;
189 } 228 }
190 if (entry->data.reference != ref) { 229 if (entry->ref != ref) {
191 err("Attempt to discard non-existent reference\n"); 230 err("Attempt to discard non-existent reference\n");
192 goto exit; 231 goto exit;
193 } 232 }
194 233
195 /* mark entry as unused */ 234 /*
235 * mark entry as unused; increment instance part of entry's reference
236 * to invalidate any subsequent references
237 */
238
196 entry->object = NULL; 239 entry->object = NULL;
240 entry->ref = (ref & ~index_mask) + (index_mask + 1);
241
242 /* append entry to free entry list */
243
197 if (tipc_ref_table.first_free == 0) 244 if (tipc_ref_table.first_free == 0)
198 tipc_ref_table.first_free = index; 245 tipc_ref_table.first_free = index;
199 else 246 else
200 /* next_plus_upper is always XXXX|0--0 for last free entry */ 247 tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index;
201 tipc_ref_table.entries[tipc_ref_table.last_free].data.next_plus_upper
202 |= index;
203 tipc_ref_table.last_free = index; 248 tipc_ref_table.last_free = index;
204 249
205 /* increment upper bits of entry to invalidate subsequent references */
206 entry->data.next_plus_upper = (ref & ~index_mask) + (index_mask + 1);
207exit: 250exit:
208 write_unlock_bh(&ref_table_lock); 251 write_unlock_bh(&ref_table_lock);
209} 252}
210 253
254/**
255 * tipc_ref_lock - lock referenced object and return pointer to it
256 */
257
258void *tipc_ref_lock(u32 ref)
259{
260 if (likely(tipc_ref_table.entries)) {
261 struct reference *entry;
262
263 entry = &tipc_ref_table.entries[ref &
264 tipc_ref_table.index_mask];
265 if (likely(entry->ref != 0)) {
266 spin_lock_bh(&entry->lock);
267 if (likely((entry->ref == ref) && (entry->object)))
268 return entry->object;
269 spin_unlock_bh(&entry->lock);
270 }
271 }
272 return NULL;
273}
274
275/**
276 * tipc_ref_unlock - unlock referenced object
277 */
278
279void tipc_ref_unlock(u32 ref)
280{
281 if (likely(tipc_ref_table.entries)) {
282 struct reference *entry;
283
284 entry = &tipc_ref_table.entries[ref &
285 tipc_ref_table.index_mask];
286 if (likely((entry->ref == ref) && (entry->object)))
287 spin_unlock_bh(&entry->lock);
288 else
289 err("Attempt to unlock non-existent reference\n");
290 }
291}
292
293/**
294 * tipc_ref_deref - return pointer referenced object (without locking it)
295 */
296
297void *tipc_ref_deref(u32 ref)
298{
299 if (likely(tipc_ref_table.entries)) {
300 struct reference *entry;
301
302 entry = &tipc_ref_table.entries[ref &
303 tipc_ref_table.index_mask];
304 if (likely(entry->ref == ref))
305 return entry->object;
306 }
307 return NULL;
308}
309
diff --git a/net/tipc/ref.h b/net/tipc/ref.h
index 38f3a7f4a78d..7e3798ea93b9 100644
--- a/net/tipc/ref.h
+++ b/net/tipc/ref.h
@@ -2,7 +2,7 @@
2 * net/tipc/ref.h: Include file for TIPC object registry code 2 * net/tipc/ref.h: Include file for TIPC object registry code
3 * 3 *
4 * Copyright (c) 1991-2006, Ericsson AB 4 * Copyright (c) 1991-2006, Ericsson AB
5 * Copyright (c) 2005, Wind River Systems 5 * Copyright (c) 2005-2006, Wind River Systems
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
@@ -37,95 +37,14 @@
37#ifndef _TIPC_REF_H 37#ifndef _TIPC_REF_H
38#define _TIPC_REF_H 38#define _TIPC_REF_H
39 39
40/**
41 * struct reference - TIPC object reference entry
42 * @object: pointer to object associated with reference entry
43 * @lock: spinlock controlling access to object
44 * @data: reference value associated with object (or link to next unused entry)
45 */
46
47struct reference {
48 void *object;
49 spinlock_t lock;
50 union {
51 u32 next_plus_upper;
52 u32 reference;
53 } data;
54};
55
56/**
57 * struct tipc_ref_table - table of TIPC object reference entries
58 * @entries: pointer to array of reference entries
59 * @index_mask: bitmask for array index portion of reference values
60 * @first_free: array index of first unused object reference entry
61 * @last_free: array index of last unused object reference entry
62 */
63
64struct ref_table {
65 struct reference *entries;
66 u32 index_mask;
67 u32 first_free;
68 u32 last_free;
69};
70
71extern struct ref_table tipc_ref_table;
72
73int tipc_ref_table_init(u32 requested_size, u32 start); 40int tipc_ref_table_init(u32 requested_size, u32 start);
74void tipc_ref_table_stop(void); 41void tipc_ref_table_stop(void);
75 42
76u32 tipc_ref_acquire(void *object, spinlock_t **lock); 43u32 tipc_ref_acquire(void *object, spinlock_t **lock);
77void tipc_ref_discard(u32 ref); 44void tipc_ref_discard(u32 ref);
78 45
79 46void *tipc_ref_lock(u32 ref);
80/** 47void tipc_ref_unlock(u32 ref);
81 * tipc_ref_lock - lock referenced object and return pointer to it 48void *tipc_ref_deref(u32 ref);
82 */
83
84static inline void *tipc_ref_lock(u32 ref)
85{
86 if (likely(tipc_ref_table.entries)) {
87 struct reference *r =
88 &tipc_ref_table.entries[ref & tipc_ref_table.index_mask];
89
90 spin_lock_bh(&r->lock);
91 if (likely(r->data.reference == ref))
92 return r->object;
93 spin_unlock_bh(&r->lock);
94 }
95 return NULL;
96}
97
98/**
99 * tipc_ref_unlock - unlock referenced object
100 */
101
102static inline void tipc_ref_unlock(u32 ref)
103{
104 if (likely(tipc_ref_table.entries)) {
105 struct reference *r =
106 &tipc_ref_table.entries[ref & tipc_ref_table.index_mask];
107
108 if (likely(r->data.reference == ref))
109 spin_unlock_bh(&r->lock);
110 else
111 err("tipc_ref_unlock() invoked using obsolete reference\n");
112 }
113}
114
115/**
116 * tipc_ref_deref - return pointer referenced object (without locking it)
117 */
118
119static inline void *tipc_ref_deref(u32 ref)
120{
121 if (likely(tipc_ref_table.entries)) {
122 struct reference *r =
123 &tipc_ref_table.entries[ref & tipc_ref_table.index_mask];
124
125 if (likely(r->data.reference == ref))
126 return r->object;
127 }
128 return NULL;
129}
130 49
131#endif 50#endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 22909036b9bc..05853159536a 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -43,7 +43,6 @@
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/poll.h> 44#include <linux/poll.h>
45#include <linux/fcntl.h> 45#include <linux/fcntl.h>
46#include <asm/semaphore.h>
47#include <asm/string.h> 46#include <asm/string.h>
48#include <asm/atomic.h> 47#include <asm/atomic.h>
49#include <net/sock.h> 48#include <net/sock.h>
@@ -58,16 +57,18 @@
58#define SS_LISTENING -1 /* socket is listening */ 57#define SS_LISTENING -1 /* socket is listening */
59#define SS_READY -2 /* socket is connectionless */ 58#define SS_READY -2 /* socket is connectionless */
60 59
61#define OVERLOAD_LIMIT_BASE 5000 60#define OVERLOAD_LIMIT_BASE 5000
61#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
62 62
63struct tipc_sock { 63struct tipc_sock {
64 struct sock sk; 64 struct sock sk;
65 struct tipc_port *p; 65 struct tipc_port *p;
66 struct semaphore sem;
67}; 66};
68 67
69#define tipc_sk(sk) ((struct tipc_sock*)sk) 68#define tipc_sk(sk) ((struct tipc_sock *)(sk))
69#define tipc_sk_port(sk) ((struct tipc_port *)(tipc_sk(sk)->p))
70 70
71static int backlog_rcv(struct sock *sk, struct sk_buff *skb);
71static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); 72static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf);
72static void wakeupdispatch(struct tipc_port *tport); 73static void wakeupdispatch(struct tipc_port *tport);
73 74
@@ -81,93 +82,115 @@ static int sockets_enabled = 0;
81 82
82static atomic_t tipc_queue_size = ATOMIC_INIT(0); 83static atomic_t tipc_queue_size = ATOMIC_INIT(0);
83 84
84
85/* 85/*
86 * sock_lock(): Lock a port/socket pair. lock_sock() can 86 * Revised TIPC socket locking policy:
87 * not be used here, since the same lock must protect ports 87 *
88 * with non-socket interfaces. 88 * Most socket operations take the standard socket lock when they start
89 * See net.c for description of locking policy. 89 * and hold it until they finish (or until they need to sleep). Acquiring
90 * this lock grants the owner exclusive access to the fields of the socket
91 * data structures, with the exception of the backlog queue. A few socket
92 * operations can be done without taking the socket lock because they only
93 * read socket information that never changes during the life of the socket.
94 *
95 * Socket operations may acquire the lock for the associated TIPC port if they
96 * need to perform an operation on the port. If any routine needs to acquire
97 * both the socket lock and the port lock it must take the socket lock first
98 * to avoid the risk of deadlock.
99 *
100 * The dispatcher handling incoming messages cannot grab the socket lock in
101 * the standard fashion, since invoked it runs at the BH level and cannot block.
102 * Instead, it checks to see if the socket lock is currently owned by someone,
103 * and either handles the message itself or adds it to the socket's backlog
104 * queue; in the latter case the queued message is processed once the process
105 * owning the socket lock releases it.
106 *
107 * NOTE: Releasing the socket lock while an operation is sleeping overcomes
108 * the problem of a blocked socket operation preventing any other operations
109 * from occurring. However, applications must be careful if they have
110 * multiple threads trying to send (or receive) on the same socket, as these
111 * operations might interfere with each other. For example, doing a connect
112 * and a receive at the same time might allow the receive to consume the
113 * ACK message meant for the connect. While additional work could be done
114 * to try and overcome this, it doesn't seem to be worthwhile at the present.
115 *
116 * NOTE: Releasing the socket lock while an operation is sleeping also ensures
117 * that another operation that must be performed in a non-blocking manner is
118 * not delayed for very long because the lock has already been taken.
119 *
120 * NOTE: This code assumes that certain fields of a port/socket pair are
121 * constant over its lifetime; such fields can be examined without taking
122 * the socket lock and/or port lock, and do not need to be re-read even
123 * after resuming processing after waiting. These fields include:
124 * - socket type
125 * - pointer to socket sk structure (aka tipc_sock structure)
126 * - pointer to port structure
127 * - port reference
90 */ 128 */
91static void sock_lock(struct tipc_sock* tsock)
92{
93 spin_lock_bh(tsock->p->lock);
94}
95 129
96/* 130/**
97 * sock_unlock(): Unlock a port/socket pair 131 * advance_rx_queue - discard first buffer in socket receive queue
132 *
133 * Caller must hold socket lock
98 */ 134 */
99static void sock_unlock(struct tipc_sock* tsock) 135
136static void advance_rx_queue(struct sock *sk)
100{ 137{
101 spin_unlock_bh(tsock->p->lock); 138 buf_discard(__skb_dequeue(&sk->sk_receive_queue));
139 atomic_dec(&tipc_queue_size);
102} 140}
103 141
104/** 142/**
105 * pollmask - determine the current set of poll() events for a socket 143 * discard_rx_queue - discard all buffers in socket receive queue
106 * @sock: socket structure
107 *
108 * TIPC sets the returned events as follows:
109 * a) POLLRDNORM and POLLIN are set if the socket's receive queue is non-empty
110 * or if a connection-oriented socket is does not have an active connection
111 * (i.e. a read operation will not block).
112 * b) POLLOUT is set except when a socket's connection has been terminated
113 * (i.e. a write operation will not block).
114 * c) POLLHUP is set when a socket's connection has been terminated.
115 *
116 * IMPORTANT: The fact that a read or write operation will not block does NOT
117 * imply that the operation will succeed!
118 * 144 *
119 * Returns pollmask value 145 * Caller must hold socket lock
120 */ 146 */
121 147
122static u32 pollmask(struct socket *sock) 148static void discard_rx_queue(struct sock *sk)
123{ 149{
124 u32 mask; 150 struct sk_buff *buf;
125
126 if ((skb_queue_len(&sock->sk->sk_receive_queue) != 0) ||
127 (sock->state == SS_UNCONNECTED) ||
128 (sock->state == SS_DISCONNECTING))
129 mask = (POLLRDNORM | POLLIN);
130 else
131 mask = 0;
132
133 if (sock->state == SS_DISCONNECTING)
134 mask |= POLLHUP;
135 else
136 mask |= POLLOUT;
137 151
138 return mask; 152 while ((buf = __skb_dequeue(&sk->sk_receive_queue))) {
153 atomic_dec(&tipc_queue_size);
154 buf_discard(buf);
155 }
139} 156}
140 157
141
142/** 158/**
143 * advance_queue - discard first buffer in queue 159 * reject_rx_queue - reject all buffers in socket receive queue
144 * @tsock: TIPC socket 160 *
161 * Caller must hold socket lock
145 */ 162 */
146 163
147static void advance_queue(struct tipc_sock *tsock) 164static void reject_rx_queue(struct sock *sk)
148{ 165{
149 sock_lock(tsock); 166 struct sk_buff *buf;
150 buf_discard(skb_dequeue(&tsock->sk.sk_receive_queue)); 167
151 sock_unlock(tsock); 168 while ((buf = __skb_dequeue(&sk->sk_receive_queue))) {
152 atomic_dec(&tipc_queue_size); 169 tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
170 atomic_dec(&tipc_queue_size);
171 }
153} 172}
154 173
155/** 174/**
156 * tipc_create - create a TIPC socket 175 * tipc_create - create a TIPC socket
176 * @net: network namespace (must be default network)
157 * @sock: pre-allocated socket structure 177 * @sock: pre-allocated socket structure
158 * @protocol: protocol indicator (must be 0) 178 * @protocol: protocol indicator (must be 0)
159 * 179 *
160 * This routine creates and attaches a 'struct sock' to the 'struct socket', 180 * This routine creates additional data structures used by the TIPC socket,
161 * then create and attaches a TIPC port to the 'struct sock' part. 181 * initializes them, and links them together.
162 * 182 *
163 * Returns 0 on success, errno otherwise 183 * Returns 0 on success, errno otherwise
164 */ 184 */
185
165static int tipc_create(struct net *net, struct socket *sock, int protocol) 186static int tipc_create(struct net *net, struct socket *sock, int protocol)
166{ 187{
167 struct tipc_sock *tsock; 188 const struct proto_ops *ops;
168 struct tipc_port *port; 189 socket_state state;
169 struct sock *sk; 190 struct sock *sk;
170 u32 ref; 191 u32 portref;
192
193 /* Validate arguments */
171 194
172 if (net != &init_net) 195 if (net != &init_net)
173 return -EAFNOSUPPORT; 196 return -EAFNOSUPPORT;
@@ -175,54 +198,56 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol)
175 if (unlikely(protocol != 0)) 198 if (unlikely(protocol != 0))
176 return -EPROTONOSUPPORT; 199 return -EPROTONOSUPPORT;
177 200
178 ref = tipc_createport_raw(NULL, &dispatch, &wakeupdispatch, TIPC_LOW_IMPORTANCE);
179 if (unlikely(!ref))
180 return -ENOMEM;
181
182 sock->state = SS_UNCONNECTED;
183
184 switch (sock->type) { 201 switch (sock->type) {
185 case SOCK_STREAM: 202 case SOCK_STREAM:
186 sock->ops = &stream_ops; 203 ops = &stream_ops;
204 state = SS_UNCONNECTED;
187 break; 205 break;
188 case SOCK_SEQPACKET: 206 case SOCK_SEQPACKET:
189 sock->ops = &packet_ops; 207 ops = &packet_ops;
208 state = SS_UNCONNECTED;
190 break; 209 break;
191 case SOCK_DGRAM: 210 case SOCK_DGRAM:
192 tipc_set_portunreliable(ref, 1);
193 /* fall through */
194 case SOCK_RDM: 211 case SOCK_RDM:
195 tipc_set_portunreturnable(ref, 1); 212 ops = &msg_ops;
196 sock->ops = &msg_ops; 213 state = SS_READY;
197 sock->state = SS_READY;
198 break; 214 break;
199 default: 215 default:
200 tipc_deleteport(ref);
201 return -EPROTOTYPE; 216 return -EPROTOTYPE;
202 } 217 }
203 218
219 /* Allocate socket's protocol area */
220
204 sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); 221 sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto);
205 if (!sk) { 222 if (sk == NULL)
206 tipc_deleteport(ref);
207 return -ENOMEM; 223 return -ENOMEM;
208 }
209 224
210 sock_init_data(sock, sk); 225 /* Allocate TIPC port for socket to use */
211 init_waitqueue_head(sk->sk_sleep);
212 sk->sk_rcvtimeo = 8 * HZ; /* default connect timeout = 8s */
213 226
214 tsock = tipc_sk(sk); 227 portref = tipc_createport_raw(sk, &dispatch, &wakeupdispatch,
215 port = tipc_get_port(ref); 228 TIPC_LOW_IMPORTANCE);
229 if (unlikely(portref == 0)) {
230 sk_free(sk);
231 return -ENOMEM;
232 }
216 233
217 tsock->p = port; 234 /* Finish initializing socket data structures */
218 port->usr_handle = tsock;
219 235
220 init_MUTEX(&tsock->sem); 236 sock->ops = ops;
237 sock->state = state;
221 238
222 dbg("sock_create: %x\n",tsock); 239 sock_init_data(sock, sk);
240 sk->sk_rcvtimeo = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
241 sk->sk_backlog_rcv = backlog_rcv;
242 tipc_sk(sk)->p = tipc_get_port(portref);
223 243
224 atomic_inc(&tipc_user_count); 244 if (sock->state == SS_READY) {
245 tipc_set_portunreturnable(portref, 1);
246 if (sock->type == SOCK_DGRAM)
247 tipc_set_portunreliable(portref, 1);
248 }
225 249
250 atomic_inc(&tipc_user_count);
226 return 0; 251 return 0;
227} 252}
228 253
@@ -245,52 +270,62 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol)
245 270
246static int release(struct socket *sock) 271static int release(struct socket *sock)
247{ 272{
248 struct tipc_sock *tsock = tipc_sk(sock->sk);
249 struct sock *sk = sock->sk; 273 struct sock *sk = sock->sk;
250 int res = TIPC_OK; 274 struct tipc_port *tport;
251 struct sk_buff *buf; 275 struct sk_buff *buf;
276 int res;
252 277
253 dbg("sock_delete: %x\n",tsock); 278 /*
254 if (!tsock) 279 * Exit if socket isn't fully initialized (occurs when a failed accept()
255 return 0; 280 * releases a pre-allocated child socket that was never used)
256 down(&tsock->sem); 281 */
257 if (!sock->sk) { 282
258 up(&tsock->sem); 283 if (sk == NULL)
259 return 0; 284 return 0;
260 }
261 285
262 /* Reject unreceived messages, unless no longer connected */ 286 tport = tipc_sk_port(sk);
287 lock_sock(sk);
288
289 /*
290 * Reject all unreceived messages, except on an active connection
291 * (which disconnects locally & sends a 'FIN+' to peer)
292 */
263 293
264 while (sock->state != SS_DISCONNECTING) { 294 while (sock->state != SS_DISCONNECTING) {
265 sock_lock(tsock); 295 buf = __skb_dequeue(&sk->sk_receive_queue);
266 buf = skb_dequeue(&sk->sk_receive_queue); 296 if (buf == NULL)
267 if (!buf)
268 tsock->p->usr_handle = NULL;
269 sock_unlock(tsock);
270 if (!buf)
271 break; 297 break;
298 atomic_dec(&tipc_queue_size);
272 if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) 299 if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf)))
273 buf_discard(buf); 300 buf_discard(buf);
274 else 301 else {
302 if ((sock->state == SS_CONNECTING) ||
303 (sock->state == SS_CONNECTED)) {
304 sock->state = SS_DISCONNECTING;
305 tipc_disconnect(tport->ref);
306 }
275 tipc_reject_msg(buf, TIPC_ERR_NO_PORT); 307 tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
276 atomic_dec(&tipc_queue_size); 308 }
277 } 309 }
278 310
279 /* Delete TIPC port */ 311 /*
312 * Delete TIPC port; this ensures no more messages are queued
313 * (also disconnects an active connection & sends a 'FIN-' to peer)
314 */
280 315
281 res = tipc_deleteport(tsock->p->ref); 316 res = tipc_deleteport(tport->ref);
282 sock->sk = NULL;
283 317
284 /* Discard any remaining messages */ 318 /* Discard any remaining (connection-based) messages in receive queue */
285 319
286 while ((buf = skb_dequeue(&sk->sk_receive_queue))) { 320 discard_rx_queue(sk);
287 buf_discard(buf); 321
288 atomic_dec(&tipc_queue_size); 322 /* Reject any messages that accumulated in backlog queue */
289 }
290 323
291 up(&tsock->sem); 324 sock->state = SS_DISCONNECTING;
325 release_sock(sk);
292 326
293 sock_put(sk); 327 sock_put(sk);
328 sock->sk = NULL;
294 329
295 atomic_dec(&tipc_user_count); 330 atomic_dec(&tipc_user_count);
296 return res; 331 return res;
@@ -307,47 +342,32 @@ static int release(struct socket *sock)
307 * (i.e. a socket address length of 0) unbinds all names from the socket. 342 * (i.e. a socket address length of 0) unbinds all names from the socket.
308 * 343 *
309 * Returns 0 on success, errno otherwise 344 * Returns 0 on success, errno otherwise
345 *
346 * NOTE: This routine doesn't need to take the socket lock since it doesn't
347 * access any non-constant socket information.
310 */ 348 */
311 349
312static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) 350static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len)
313{ 351{
314 struct tipc_sock *tsock = tipc_sk(sock->sk);
315 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; 352 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
316 int res; 353 u32 portref = tipc_sk_port(sock->sk)->ref;
317 354
318 if (down_interruptible(&tsock->sem)) 355 if (unlikely(!uaddr_len))
319 return -ERESTARTSYS; 356 return tipc_withdraw(portref, 0, NULL);
320 357
321 if (unlikely(!uaddr_len)) { 358 if (uaddr_len < sizeof(struct sockaddr_tipc))
322 res = tipc_withdraw(tsock->p->ref, 0, NULL); 359 return -EINVAL;
323 goto exit; 360 if (addr->family != AF_TIPC)
324 } 361 return -EAFNOSUPPORT;
325
326 if (uaddr_len < sizeof(struct sockaddr_tipc)) {
327 res = -EINVAL;
328 goto exit;
329 }
330 362
331 if (addr->family != AF_TIPC) {
332 res = -EAFNOSUPPORT;
333 goto exit;
334 }
335 if (addr->addrtype == TIPC_ADDR_NAME) 363 if (addr->addrtype == TIPC_ADDR_NAME)
336 addr->addr.nameseq.upper = addr->addr.nameseq.lower; 364 addr->addr.nameseq.upper = addr->addr.nameseq.lower;
337 else if (addr->addrtype != TIPC_ADDR_NAMESEQ) { 365 else if (addr->addrtype != TIPC_ADDR_NAMESEQ)
338 res = -EAFNOSUPPORT; 366 return -EAFNOSUPPORT;
339 goto exit;
340 }
341 367
342 if (addr->scope > 0) 368 return (addr->scope > 0) ?
343 res = tipc_publish(tsock->p->ref, addr->scope, 369 tipc_publish(portref, addr->scope, &addr->addr.nameseq) :
344 &addr->addr.nameseq); 370 tipc_withdraw(portref, -addr->scope, &addr->addr.nameseq);
345 else
346 res = tipc_withdraw(tsock->p->ref, -addr->scope,
347 &addr->addr.nameseq);
348exit:
349 up(&tsock->sem);
350 return res;
351} 371}
352 372
353/** 373/**
@@ -358,30 +378,33 @@ exit:
358 * @peer: 0 to obtain socket name, 1 to obtain peer socket name 378 * @peer: 0 to obtain socket name, 1 to obtain peer socket name
359 * 379 *
360 * Returns 0 on success, errno otherwise 380 * Returns 0 on success, errno otherwise
381 *
382 * NOTE: This routine doesn't need to take the socket lock since it doesn't
383 * access any non-constant socket information.
361 */ 384 */
362 385
363static int get_name(struct socket *sock, struct sockaddr *uaddr, 386static int get_name(struct socket *sock, struct sockaddr *uaddr,
364 int *uaddr_len, int peer) 387 int *uaddr_len, int peer)
365{ 388{
366 struct tipc_sock *tsock = tipc_sk(sock->sk);
367 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; 389 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
390 u32 portref = tipc_sk_port(sock->sk)->ref;
368 u32 res; 391 u32 res;
369 392
370 if (down_interruptible(&tsock->sem)) 393 if (peer) {
371 return -ERESTARTSYS; 394 res = tipc_peer(portref, &addr->addr.id);
395 if (res)
396 return res;
397 } else {
398 tipc_ownidentity(portref, &addr->addr.id);
399 }
372 400
373 *uaddr_len = sizeof(*addr); 401 *uaddr_len = sizeof(*addr);
374 addr->addrtype = TIPC_ADDR_ID; 402 addr->addrtype = TIPC_ADDR_ID;
375 addr->family = AF_TIPC; 403 addr->family = AF_TIPC;
376 addr->scope = 0; 404 addr->scope = 0;
377 if (peer)
378 res = tipc_peer(tsock->p->ref, &addr->addr.id);
379 else
380 res = tipc_ownidentity(tsock->p->ref, &addr->addr.id);
381 addr->addr.name.domain = 0; 405 addr->addr.name.domain = 0;
382 406
383 up(&tsock->sem); 407 return 0;
384 return res;
385} 408}
386 409
387/** 410/**
@@ -390,15 +413,47 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
390 * @sock: socket for which to calculate the poll bits 413 * @sock: socket for which to calculate the poll bits
391 * @wait: ??? 414 * @wait: ???
392 * 415 *
393 * Returns the pollmask 416 * Returns pollmask value
417 *
418 * COMMENTARY:
419 * It appears that the usual socket locking mechanisms are not useful here
420 * since the pollmask info is potentially out-of-date the moment this routine
421 * exits. TCP and other protocols seem to rely on higher level poll routines
422 * to handle any preventable race conditions, so TIPC will do the same ...
423 *
424 * TIPC sets the returned events as follows:
425 * a) POLLRDNORM and POLLIN are set if the socket's receive queue is non-empty
426 * or if a connection-oriented socket is does not have an active connection
427 * (i.e. a read operation will not block).
428 * b) POLLOUT is set except when a socket's connection has been terminated
429 * (i.e. a write operation will not block).
430 * c) POLLHUP is set when a socket's connection has been terminated.
431 *
432 * IMPORTANT: The fact that a read or write operation will not block does NOT
433 * imply that the operation will succeed!
394 */ 434 */
395 435
396static unsigned int poll(struct file *file, struct socket *sock, 436static unsigned int poll(struct file *file, struct socket *sock,
397 poll_table *wait) 437 poll_table *wait)
398{ 438{
399 poll_wait(file, sock->sk->sk_sleep, wait); 439 struct sock *sk = sock->sk;
400 /* NEED LOCK HERE? */ 440 u32 mask;
401 return pollmask(sock); 441
442 poll_wait(file, sk->sk_sleep, wait);
443
444 if (!skb_queue_empty(&sk->sk_receive_queue) ||
445 (sock->state == SS_UNCONNECTED) ||
446 (sock->state == SS_DISCONNECTING))
447 mask = (POLLRDNORM | POLLIN);
448 else
449 mask = 0;
450
451 if (sock->state == SS_DISCONNECTING)
452 mask |= POLLHUP;
453 else
454 mask |= POLLOUT;
455
456 return mask;
402} 457}
403 458
404/** 459/**
@@ -420,7 +475,6 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
420 return 0; 475 return 0;
421 if (likely(dest->addr.name.name.type == TIPC_TOP_SRV)) 476 if (likely(dest->addr.name.name.type == TIPC_TOP_SRV))
422 return 0; 477 return 0;
423
424 if (likely(dest->addr.name.name.type != TIPC_CFG_SRV)) 478 if (likely(dest->addr.name.name.type != TIPC_CFG_SRV))
425 return -EACCES; 479 return -EACCES;
426 480
@@ -434,7 +488,7 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
434 488
435/** 489/**
436 * send_msg - send message in connectionless manner 490 * send_msg - send message in connectionless manner
437 * @iocb: (unused) 491 * @iocb: if NULL, indicates that socket lock is already held
438 * @sock: socket structure 492 * @sock: socket structure
439 * @m: message to send 493 * @m: message to send
440 * @total_len: length of message 494 * @total_len: length of message
@@ -450,9 +504,9 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m)
450static int send_msg(struct kiocb *iocb, struct socket *sock, 504static int send_msg(struct kiocb *iocb, struct socket *sock,
451 struct msghdr *m, size_t total_len) 505 struct msghdr *m, size_t total_len)
452{ 506{
453 struct tipc_sock *tsock = tipc_sk(sock->sk); 507 struct sock *sk = sock->sk;
508 struct tipc_port *tport = tipc_sk_port(sk);
454 struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; 509 struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
455 struct sk_buff *buf;
456 int needs_conn; 510 int needs_conn;
457 int res = -EINVAL; 511 int res = -EINVAL;
458 512
@@ -462,48 +516,46 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
462 (dest->family != AF_TIPC))) 516 (dest->family != AF_TIPC)))
463 return -EINVAL; 517 return -EINVAL;
464 518
519 if (iocb)
520 lock_sock(sk);
521
465 needs_conn = (sock->state != SS_READY); 522 needs_conn = (sock->state != SS_READY);
466 if (unlikely(needs_conn)) { 523 if (unlikely(needs_conn)) {
467 if (sock->state == SS_LISTENING) 524 if (sock->state == SS_LISTENING) {
468 return -EPIPE; 525 res = -EPIPE;
469 if (sock->state != SS_UNCONNECTED) 526 goto exit;
470 return -EISCONN; 527 }
471 if ((tsock->p->published) || 528 if (sock->state != SS_UNCONNECTED) {
472 ((sock->type == SOCK_STREAM) && (total_len != 0))) 529 res = -EISCONN;
473 return -EOPNOTSUPP; 530 goto exit;
531 }
532 if ((tport->published) ||
533 ((sock->type == SOCK_STREAM) && (total_len != 0))) {
534 res = -EOPNOTSUPP;
535 goto exit;
536 }
474 if (dest->addrtype == TIPC_ADDR_NAME) { 537 if (dest->addrtype == TIPC_ADDR_NAME) {
475 tsock->p->conn_type = dest->addr.name.name.type; 538 tport->conn_type = dest->addr.name.name.type;
476 tsock->p->conn_instance = dest->addr.name.name.instance; 539 tport->conn_instance = dest->addr.name.name.instance;
477 } 540 }
478 }
479
480 if (down_interruptible(&tsock->sem))
481 return -ERESTARTSYS;
482
483 if (needs_conn) {
484 541
485 /* Abort any pending connection attempts (very unlikely) */ 542 /* Abort any pending connection attempts (very unlikely) */
486 543
487 while ((buf = skb_dequeue(&sock->sk->sk_receive_queue))) { 544 reject_rx_queue(sk);
488 tipc_reject_msg(buf, TIPC_ERR_NO_PORT);
489 atomic_dec(&tipc_queue_size);
490 }
491
492 sock->state = SS_CONNECTING;
493 } 545 }
494 546
495 do { 547 do {
496 if (dest->addrtype == TIPC_ADDR_NAME) { 548 if (dest->addrtype == TIPC_ADDR_NAME) {
497 if ((res = dest_name_check(dest, m))) 549 if ((res = dest_name_check(dest, m)))
498 goto exit; 550 break;
499 res = tipc_send2name(tsock->p->ref, 551 res = tipc_send2name(tport->ref,
500 &dest->addr.name.name, 552 &dest->addr.name.name,
501 dest->addr.name.domain, 553 dest->addr.name.domain,
502 m->msg_iovlen, 554 m->msg_iovlen,
503 m->msg_iov); 555 m->msg_iov);
504 } 556 }
505 else if (dest->addrtype == TIPC_ADDR_ID) { 557 else if (dest->addrtype == TIPC_ADDR_ID) {
506 res = tipc_send2port(tsock->p->ref, 558 res = tipc_send2port(tport->ref,
507 &dest->addr.id, 559 &dest->addr.id,
508 m->msg_iovlen, 560 m->msg_iovlen,
509 m->msg_iov); 561 m->msg_iov);
@@ -511,36 +563,43 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
511 else if (dest->addrtype == TIPC_ADDR_MCAST) { 563 else if (dest->addrtype == TIPC_ADDR_MCAST) {
512 if (needs_conn) { 564 if (needs_conn) {
513 res = -EOPNOTSUPP; 565 res = -EOPNOTSUPP;
514 goto exit; 566 break;
515 } 567 }
516 if ((res = dest_name_check(dest, m))) 568 if ((res = dest_name_check(dest, m)))
517 goto exit; 569 break;
518 res = tipc_multicast(tsock->p->ref, 570 res = tipc_multicast(tport->ref,
519 &dest->addr.nameseq, 571 &dest->addr.nameseq,
520 0, 572 0,
521 m->msg_iovlen, 573 m->msg_iovlen,
522 m->msg_iov); 574 m->msg_iov);
523 } 575 }
524 if (likely(res != -ELINKCONG)) { 576 if (likely(res != -ELINKCONG)) {
525exit: 577 if (needs_conn && (res >= 0)) {
526 up(&tsock->sem); 578 sock->state = SS_CONNECTING;
527 return res; 579 }
580 break;
528 } 581 }
529 if (m->msg_flags & MSG_DONTWAIT) { 582 if (m->msg_flags & MSG_DONTWAIT) {
530 res = -EWOULDBLOCK; 583 res = -EWOULDBLOCK;
531 goto exit; 584 break;
532 }
533 if (wait_event_interruptible(*sock->sk->sk_sleep,
534 !tsock->p->congested)) {
535 res = -ERESTARTSYS;
536 goto exit;
537 } 585 }
586 release_sock(sk);
587 res = wait_event_interruptible(*sk->sk_sleep,
588 !tport->congested);
589 lock_sock(sk);
590 if (res)
591 break;
538 } while (1); 592 } while (1);
593
594exit:
595 if (iocb)
596 release_sock(sk);
597 return res;
539} 598}
540 599
541/** 600/**
542 * send_packet - send a connection-oriented message 601 * send_packet - send a connection-oriented message
543 * @iocb: (unused) 602 * @iocb: if NULL, indicates that socket lock is already held
544 * @sock: socket structure 603 * @sock: socket structure
545 * @m: message to send 604 * @m: message to send
546 * @total_len: length of message 605 * @total_len: length of message
@@ -553,7 +612,8 @@ exit:
553static int send_packet(struct kiocb *iocb, struct socket *sock, 612static int send_packet(struct kiocb *iocb, struct socket *sock,
554 struct msghdr *m, size_t total_len) 613 struct msghdr *m, size_t total_len)
555{ 614{
556 struct tipc_sock *tsock = tipc_sk(sock->sk); 615 struct sock *sk = sock->sk;
616 struct tipc_port *tport = tipc_sk_port(sk);
557 struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; 617 struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name;
558 int res; 618 int res;
559 619
@@ -562,9 +622,8 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
562 if (unlikely(dest)) 622 if (unlikely(dest))
563 return send_msg(iocb, sock, m, total_len); 623 return send_msg(iocb, sock, m, total_len);
564 624
565 if (down_interruptible(&tsock->sem)) { 625 if (iocb)
566 return -ERESTARTSYS; 626 lock_sock(sk);
567 }
568 627
569 do { 628 do {
570 if (unlikely(sock->state != SS_CONNECTED)) { 629 if (unlikely(sock->state != SS_CONNECTED)) {
@@ -572,25 +631,28 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
572 res = -EPIPE; 631 res = -EPIPE;
573 else 632 else
574 res = -ENOTCONN; 633 res = -ENOTCONN;
575 goto exit; 634 break;
576 } 635 }
577 636
578 res = tipc_send(tsock->p->ref, m->msg_iovlen, m->msg_iov); 637 res = tipc_send(tport->ref, m->msg_iovlen, m->msg_iov);
579 if (likely(res != -ELINKCONG)) { 638 if (likely(res != -ELINKCONG)) {
580exit: 639 break;
581 up(&tsock->sem);
582 return res;
583 } 640 }
584 if (m->msg_flags & MSG_DONTWAIT) { 641 if (m->msg_flags & MSG_DONTWAIT) {
585 res = -EWOULDBLOCK; 642 res = -EWOULDBLOCK;
586 goto exit; 643 break;
587 }
588 if (wait_event_interruptible(*sock->sk->sk_sleep,
589 !tsock->p->congested)) {
590 res = -ERESTARTSYS;
591 goto exit;
592 } 644 }
645 release_sock(sk);
646 res = wait_event_interruptible(*sk->sk_sleep,
647 (!tport->congested || !tport->connected));
648 lock_sock(sk);
649 if (res)
650 break;
593 } while (1); 651 } while (1);
652
653 if (iocb)
654 release_sock(sk);
655 return res;
594} 656}
595 657
596/** 658/**
@@ -606,11 +668,11 @@ exit:
606 * or errno if no data sent 668 * or errno if no data sent
607 */ 669 */
608 670
609
610static int send_stream(struct kiocb *iocb, struct socket *sock, 671static int send_stream(struct kiocb *iocb, struct socket *sock,
611 struct msghdr *m, size_t total_len) 672 struct msghdr *m, size_t total_len)
612{ 673{
613 struct tipc_port *tport; 674 struct sock *sk = sock->sk;
675 struct tipc_port *tport = tipc_sk_port(sk);
614 struct msghdr my_msg; 676 struct msghdr my_msg;
615 struct iovec my_iov; 677 struct iovec my_iov;
616 struct iovec *curr_iov; 678 struct iovec *curr_iov;
@@ -622,19 +684,27 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
622 int bytes_sent; 684 int bytes_sent;
623 int res; 685 int res;
624 686
687 lock_sock(sk);
688
625 /* Handle special cases where there is no connection */ 689 /* Handle special cases where there is no connection */
626 690
627 if (unlikely(sock->state != SS_CONNECTED)) { 691 if (unlikely(sock->state != SS_CONNECTED)) {
628 if (sock->state == SS_UNCONNECTED) 692 if (sock->state == SS_UNCONNECTED) {
629 return send_packet(iocb, sock, m, total_len); 693 res = send_packet(NULL, sock, m, total_len);
630 else if (sock->state == SS_DISCONNECTING) 694 goto exit;
631 return -EPIPE; 695 } else if (sock->state == SS_DISCONNECTING) {
632 else 696 res = -EPIPE;
633 return -ENOTCONN; 697 goto exit;
698 } else {
699 res = -ENOTCONN;
700 goto exit;
701 }
634 } 702 }
635 703
636 if (unlikely(m->msg_name)) 704 if (unlikely(m->msg_name)) {
637 return -EISCONN; 705 res = -EISCONN;
706 goto exit;
707 }
638 708
639 /* 709 /*
640 * Send each iovec entry using one or more messages 710 * Send each iovec entry using one or more messages
@@ -652,7 +722,6 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
652 my_msg.msg_name = NULL; 722 my_msg.msg_name = NULL;
653 bytes_sent = 0; 723 bytes_sent = 0;
654 724
655 tport = tipc_sk(sock->sk)->p;
656 hdr_size = msg_hdr_sz(&tport->phdr); 725 hdr_size = msg_hdr_sz(&tport->phdr);
657 726
658 while (curr_iovlen--) { 727 while (curr_iovlen--) {
@@ -667,10 +736,10 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
667 bytes_to_send = curr_left; 736 bytes_to_send = curr_left;
668 my_iov.iov_base = curr_start; 737 my_iov.iov_base = curr_start;
669 my_iov.iov_len = bytes_to_send; 738 my_iov.iov_len = bytes_to_send;
670 if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) { 739 if ((res = send_packet(NULL, sock, &my_msg, 0)) < 0) {
671 if (bytes_sent != 0) 740 if (bytes_sent)
672 res = bytes_sent; 741 res = bytes_sent;
673 return res; 742 goto exit;
674 } 743 }
675 curr_left -= bytes_to_send; 744 curr_left -= bytes_to_send;
676 curr_start += bytes_to_send; 745 curr_start += bytes_to_send;
@@ -679,22 +748,23 @@ static int send_stream(struct kiocb *iocb, struct socket *sock,
679 748
680 curr_iov++; 749 curr_iov++;
681 } 750 }
682 751 res = bytes_sent;
683 return bytes_sent; 752exit:
753 release_sock(sk);
754 return res;
684} 755}
685 756
686/** 757/**
687 * auto_connect - complete connection setup to a remote port 758 * auto_connect - complete connection setup to a remote port
688 * @sock: socket structure 759 * @sock: socket structure
689 * @tsock: TIPC-specific socket structure
690 * @msg: peer's response message 760 * @msg: peer's response message
691 * 761 *
692 * Returns 0 on success, errno otherwise 762 * Returns 0 on success, errno otherwise
693 */ 763 */
694 764
695static int auto_connect(struct socket *sock, struct tipc_sock *tsock, 765static int auto_connect(struct socket *sock, struct tipc_msg *msg)
696 struct tipc_msg *msg)
697{ 766{
767 struct tipc_port *tport = tipc_sk_port(sock->sk);
698 struct tipc_portid peer; 768 struct tipc_portid peer;
699 769
700 if (msg_errcode(msg)) { 770 if (msg_errcode(msg)) {
@@ -704,8 +774,8 @@ static int auto_connect(struct socket *sock, struct tipc_sock *tsock,
704 774
705 peer.ref = msg_origport(msg); 775 peer.ref = msg_origport(msg);
706 peer.node = msg_orignode(msg); 776 peer.node = msg_orignode(msg);
707 tipc_connect2port(tsock->p->ref, &peer); 777 tipc_connect2port(tport->ref, &peer);
708 tipc_set_portimportance(tsock->p->ref, msg_importance(msg)); 778 tipc_set_portimportance(tport->ref, msg_importance(msg));
709 sock->state = SS_CONNECTED; 779 sock->state = SS_CONNECTED;
710 return 0; 780 return 0;
711} 781}
@@ -818,62 +888,54 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
818static int recv_msg(struct kiocb *iocb, struct socket *sock, 888static int recv_msg(struct kiocb *iocb, struct socket *sock,
819 struct msghdr *m, size_t buf_len, int flags) 889 struct msghdr *m, size_t buf_len, int flags)
820{ 890{
821 struct tipc_sock *tsock = tipc_sk(sock->sk); 891 struct sock *sk = sock->sk;
892 struct tipc_port *tport = tipc_sk_port(sk);
822 struct sk_buff *buf; 893 struct sk_buff *buf;
823 struct tipc_msg *msg; 894 struct tipc_msg *msg;
824 unsigned int q_len;
825 unsigned int sz; 895 unsigned int sz;
826 u32 err; 896 u32 err;
827 int res; 897 int res;
828 898
829 /* Currently doesn't support receiving into multiple iovec entries */ 899 /* Catch invalid receive requests */
830 900
831 if (m->msg_iovlen != 1) 901 if (m->msg_iovlen != 1)
832 return -EOPNOTSUPP; 902 return -EOPNOTSUPP; /* Don't do multiple iovec entries yet */
833
834 /* Catch invalid receive attempts */
835 903
836 if (unlikely(!buf_len)) 904 if (unlikely(!buf_len))
837 return -EINVAL; 905 return -EINVAL;
838 906
839 if (sock->type == SOCK_SEQPACKET) { 907 lock_sock(sk);
840 if (unlikely(sock->state == SS_UNCONNECTED))
841 return -ENOTCONN;
842 if (unlikely((sock->state == SS_DISCONNECTING) &&
843 (skb_queue_len(&sock->sk->sk_receive_queue) == 0)))
844 return -ENOTCONN;
845 }
846 908
847 /* Look for a message in receive queue; wait if necessary */ 909 if (unlikely(sock->state == SS_UNCONNECTED)) {
848 910 res = -ENOTCONN;
849 if (unlikely(down_interruptible(&tsock->sem)))
850 return -ERESTARTSYS;
851
852restart:
853 if (unlikely((skb_queue_len(&sock->sk->sk_receive_queue) == 0) &&
854 (flags & MSG_DONTWAIT))) {
855 res = -EWOULDBLOCK;
856 goto exit; 911 goto exit;
857 } 912 }
858 913
859 if ((res = wait_event_interruptible( 914restart:
860 *sock->sk->sk_sleep,
861 ((q_len = skb_queue_len(&sock->sk->sk_receive_queue)) ||
862 (sock->state == SS_DISCONNECTING))) )) {
863 goto exit;
864 }
865 915
866 /* Catch attempt to receive on an already terminated connection */ 916 /* Look for a message in receive queue; wait if necessary */
867 /* [THIS CHECK MAY OVERLAP WITH AN EARLIER CHECK] */
868 917
869 if (!q_len) { 918 while (skb_queue_empty(&sk->sk_receive_queue)) {
870 res = -ENOTCONN; 919 if (sock->state == SS_DISCONNECTING) {
871 goto exit; 920 res = -ENOTCONN;
921 goto exit;
922 }
923 if (flags & MSG_DONTWAIT) {
924 res = -EWOULDBLOCK;
925 goto exit;
926 }
927 release_sock(sk);
928 res = wait_event_interruptible(*sk->sk_sleep,
929 (!skb_queue_empty(&sk->sk_receive_queue) ||
930 (sock->state == SS_DISCONNECTING)));
931 lock_sock(sk);
932 if (res)
933 goto exit;
872 } 934 }
873 935
874 /* Get access to first message in receive queue */ 936 /* Look at first message in receive queue */
875 937
876 buf = skb_peek(&sock->sk->sk_receive_queue); 938 buf = skb_peek(&sk->sk_receive_queue);
877 msg = buf_msg(buf); 939 msg = buf_msg(buf);
878 sz = msg_data_sz(msg); 940 sz = msg_data_sz(msg);
879 err = msg_errcode(msg); 941 err = msg_errcode(msg);
@@ -881,14 +943,15 @@ restart:
881 /* Complete connection setup for an implied connect */ 943 /* Complete connection setup for an implied connect */
882 944
883 if (unlikely(sock->state == SS_CONNECTING)) { 945 if (unlikely(sock->state == SS_CONNECTING)) {
884 if ((res = auto_connect(sock, tsock, msg))) 946 res = auto_connect(sock, msg);
947 if (res)
885 goto exit; 948 goto exit;
886 } 949 }
887 950
888 /* Discard an empty non-errored message & try again */ 951 /* Discard an empty non-errored message & try again */
889 952
890 if ((!sz) && (!err)) { 953 if ((!sz) && (!err)) {
891 advance_queue(tsock); 954 advance_rx_queue(sk);
892 goto restart; 955 goto restart;
893 } 956 }
894 957
@@ -898,7 +961,8 @@ restart:
898 961
899 /* Capture ancillary data (optional) */ 962 /* Capture ancillary data (optional) */
900 963
901 if ((res = anc_data_recv(m, msg, tsock->p))) 964 res = anc_data_recv(m, msg, tport);
965 if (res)
902 goto exit; 966 goto exit;
903 967
904 /* Capture message data (if valid) & compute return value (always) */ 968 /* Capture message data (if valid) & compute return value (always) */
@@ -925,12 +989,13 @@ restart:
925 /* Consume received message (optional) */ 989 /* Consume received message (optional) */
926 990
927 if (likely(!(flags & MSG_PEEK))) { 991 if (likely(!(flags & MSG_PEEK))) {
928 if (unlikely(++tsock->p->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) 992 if ((sock->state != SS_READY) &&
929 tipc_acknowledge(tsock->p->ref, tsock->p->conn_unacked); 993 (++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
930 advance_queue(tsock); 994 tipc_acknowledge(tport->ref, tport->conn_unacked);
995 advance_rx_queue(sk);
931 } 996 }
932exit: 997exit:
933 up(&tsock->sem); 998 release_sock(sk);
934 return res; 999 return res;
935} 1000}
936 1001
@@ -950,10 +1015,10 @@ exit:
950static int recv_stream(struct kiocb *iocb, struct socket *sock, 1015static int recv_stream(struct kiocb *iocb, struct socket *sock,
951 struct msghdr *m, size_t buf_len, int flags) 1016 struct msghdr *m, size_t buf_len, int flags)
952{ 1017{
953 struct tipc_sock *tsock = tipc_sk(sock->sk); 1018 struct sock *sk = sock->sk;
1019 struct tipc_port *tport = tipc_sk_port(sk);
954 struct sk_buff *buf; 1020 struct sk_buff *buf;
955 struct tipc_msg *msg; 1021 struct tipc_msg *msg;
956 unsigned int q_len;
957 unsigned int sz; 1022 unsigned int sz;
958 int sz_to_copy; 1023 int sz_to_copy;
959 int sz_copied = 0; 1024 int sz_copied = 0;
@@ -961,54 +1026,49 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
961 char __user *crs = m->msg_iov->iov_base; 1026 char __user *crs = m->msg_iov->iov_base;
962 unsigned char *buf_crs; 1027 unsigned char *buf_crs;
963 u32 err; 1028 u32 err;
964 int res; 1029 int res = 0;
965 1030
966 /* Currently doesn't support receiving into multiple iovec entries */ 1031 /* Catch invalid receive attempts */
967 1032
968 if (m->msg_iovlen != 1) 1033 if (m->msg_iovlen != 1)
969 return -EOPNOTSUPP; 1034 return -EOPNOTSUPP; /* Don't do multiple iovec entries yet */
970
971 /* Catch invalid receive attempts */
972 1035
973 if (unlikely(!buf_len)) 1036 if (unlikely(!buf_len))
974 return -EINVAL; 1037 return -EINVAL;
975 1038
976 if (unlikely(sock->state == SS_DISCONNECTING)) { 1039 lock_sock(sk);
977 if (skb_queue_len(&sock->sk->sk_receive_queue) == 0)
978 return -ENOTCONN;
979 } else if (unlikely(sock->state != SS_CONNECTED))
980 return -ENOTCONN;
981 1040
982 /* Look for a message in receive queue; wait if necessary */ 1041 if (unlikely((sock->state == SS_UNCONNECTED) ||
983 1042 (sock->state == SS_CONNECTING))) {
984 if (unlikely(down_interruptible(&tsock->sem))) 1043 res = -ENOTCONN;
985 return -ERESTARTSYS;
986
987restart:
988 if (unlikely((skb_queue_len(&sock->sk->sk_receive_queue) == 0) &&
989 (flags & MSG_DONTWAIT))) {
990 res = -EWOULDBLOCK;
991 goto exit; 1044 goto exit;
992 } 1045 }
993 1046
994 if ((res = wait_event_interruptible( 1047restart:
995 *sock->sk->sk_sleep,
996 ((q_len = skb_queue_len(&sock->sk->sk_receive_queue)) ||
997 (sock->state == SS_DISCONNECTING))) )) {
998 goto exit;
999 }
1000 1048
1001 /* Catch attempt to receive on an already terminated connection */ 1049 /* Look for a message in receive queue; wait if necessary */
1002 /* [THIS CHECK MAY OVERLAP WITH AN EARLIER CHECK] */
1003 1050
1004 if (!q_len) { 1051 while (skb_queue_empty(&sk->sk_receive_queue)) {
1005 res = -ENOTCONN; 1052 if (sock->state == SS_DISCONNECTING) {
1006 goto exit; 1053 res = -ENOTCONN;
1054 goto exit;
1055 }
1056 if (flags & MSG_DONTWAIT) {
1057 res = -EWOULDBLOCK;
1058 goto exit;
1059 }
1060 release_sock(sk);
1061 res = wait_event_interruptible(*sk->sk_sleep,
1062 (!skb_queue_empty(&sk->sk_receive_queue) ||
1063 (sock->state == SS_DISCONNECTING)));
1064 lock_sock(sk);
1065 if (res)
1066 goto exit;
1007 } 1067 }
1008 1068
1009 /* Get access to first message in receive queue */ 1069 /* Look at first message in receive queue */
1010 1070
1011 buf = skb_peek(&sock->sk->sk_receive_queue); 1071 buf = skb_peek(&sk->sk_receive_queue);
1012 msg = buf_msg(buf); 1072 msg = buf_msg(buf);
1013 sz = msg_data_sz(msg); 1073 sz = msg_data_sz(msg);
1014 err = msg_errcode(msg); 1074 err = msg_errcode(msg);
@@ -1016,7 +1076,7 @@ restart:
1016 /* Discard an empty non-errored message & try again */ 1076 /* Discard an empty non-errored message & try again */
1017 1077
1018 if ((!sz) && (!err)) { 1078 if ((!sz) && (!err)) {
1019 advance_queue(tsock); 1079 advance_rx_queue(sk);
1020 goto restart; 1080 goto restart;
1021 } 1081 }
1022 1082
@@ -1024,7 +1084,8 @@ restart:
1024 1084
1025 if (sz_copied == 0) { 1085 if (sz_copied == 0) {
1026 set_orig_addr(m, msg); 1086 set_orig_addr(m, msg);
1027 if ((res = anc_data_recv(m, msg, tsock->p))) 1087 res = anc_data_recv(m, msg, tport);
1088 if (res)
1028 goto exit; 1089 goto exit;
1029 } 1090 }
1030 1091
@@ -1032,7 +1093,7 @@ restart:
1032 1093
1033 if (!err) { 1094 if (!err) {
1034 buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle); 1095 buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle);
1035 sz = skb_tail_pointer(buf) - buf_crs; 1096 sz = (unsigned char *)msg + msg_size(msg) - buf_crs;
1036 1097
1037 needed = (buf_len - sz_copied); 1098 needed = (buf_len - sz_copied);
1038 sz_to_copy = (sz <= needed) ? sz : needed; 1099 sz_to_copy = (sz <= needed) ? sz : needed;
@@ -1062,35 +1123,37 @@ restart:
1062 /* Consume received message (optional) */ 1123 /* Consume received message (optional) */
1063 1124
1064 if (likely(!(flags & MSG_PEEK))) { 1125 if (likely(!(flags & MSG_PEEK))) {
1065 if (unlikely(++tsock->p->conn_unacked >= TIPC_FLOW_CONTROL_WIN)) 1126 if (unlikely(++tport->conn_unacked >= TIPC_FLOW_CONTROL_WIN))
1066 tipc_acknowledge(tsock->p->ref, tsock->p->conn_unacked); 1127 tipc_acknowledge(tport->ref, tport->conn_unacked);
1067 advance_queue(tsock); 1128 advance_rx_queue(sk);
1068 } 1129 }
1069 1130
1070 /* Loop around if more data is required */ 1131 /* Loop around if more data is required */
1071 1132
1072 if ((sz_copied < buf_len) /* didn't get all requested data */ 1133 if ((sz_copied < buf_len) /* didn't get all requested data */
1073 && (flags & MSG_WAITALL) /* ... and need to wait for more */ 1134 && (!skb_queue_empty(&sock->sk->sk_receive_queue) ||
1135 (flags & MSG_WAITALL))
1136 /* ... and more is ready or required */
1074 && (!(flags & MSG_PEEK)) /* ... and aren't just peeking at data */ 1137 && (!(flags & MSG_PEEK)) /* ... and aren't just peeking at data */
1075 && (!err) /* ... and haven't reached a FIN */ 1138 && (!err) /* ... and haven't reached a FIN */
1076 ) 1139 )
1077 goto restart; 1140 goto restart;
1078 1141
1079exit: 1142exit:
1080 up(&tsock->sem); 1143 release_sock(sk);
1081 return sz_copied ? sz_copied : res; 1144 return sz_copied ? sz_copied : res;
1082} 1145}
1083 1146
1084/** 1147/**
1085 * queue_overloaded - test if queue overload condition exists 1148 * rx_queue_full - determine if receive queue can accept another message
1149 * @msg: message to be added to queue
1086 * @queue_size: current size of queue 1150 * @queue_size: current size of queue
1087 * @base: nominal maximum size of queue 1151 * @base: nominal maximum size of queue
1088 * @msg: message to be added to queue
1089 * 1152 *
1090 * Returns 1 if queue is currently overloaded, 0 otherwise 1153 * Returns 1 if queue is unable to accept message, 0 otherwise
1091 */ 1154 */
1092 1155
1093static int queue_overloaded(u32 queue_size, u32 base, struct tipc_msg *msg) 1156static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base)
1094{ 1157{
1095 u32 threshold; 1158 u32 threshold;
1096 u32 imp = msg_importance(msg); 1159 u32 imp = msg_importance(msg);
@@ -1107,41 +1170,28 @@ static int queue_overloaded(u32 queue_size, u32 base, struct tipc_msg *msg)
1107 if (msg_connected(msg)) 1170 if (msg_connected(msg))
1108 threshold *= 4; 1171 threshold *= 4;
1109 1172
1110 return (queue_size > threshold); 1173 return (queue_size >= threshold);
1111}
1112
1113/**
1114 * async_disconnect - wrapper function used to disconnect port
1115 * @portref: TIPC port reference (passed as pointer-sized value)
1116 */
1117
1118static void async_disconnect(unsigned long portref)
1119{
1120 tipc_disconnect((u32)portref);
1121} 1174}
1122 1175
1123/** 1176/**
1124 * dispatch - handle arriving message 1177 * filter_rcv - validate incoming message
1125 * @tport: TIPC port that received message 1178 * @sk: socket
1126 * @buf: message 1179 * @buf: message
1127 * 1180 *
1128 * Called with port locked. Must not take socket lock to avoid deadlock risk. 1181 * Enqueues message on receive queue if acceptable; optionally handles
1182 * disconnect indication for a connected socket.
1183 *
1184 * Called with socket lock already taken; port lock may also be taken.
1129 * 1185 *
1130 * Returns TIPC error status code (TIPC_OK if message is not to be rejected) 1186 * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
1131 */ 1187 */
1132 1188
1133static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) 1189static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
1134{ 1190{
1191 struct socket *sock = sk->sk_socket;
1135 struct tipc_msg *msg = buf_msg(buf); 1192 struct tipc_msg *msg = buf_msg(buf);
1136 struct tipc_sock *tsock = (struct tipc_sock *)tport->usr_handle;
1137 struct socket *sock;
1138 u32 recv_q_len; 1193 u32 recv_q_len;
1139 1194
1140 /* Reject message if socket is closing */
1141
1142 if (!tsock)
1143 return TIPC_ERR_NO_PORT;
1144
1145 /* Reject message if it is wrong sort of message for socket */ 1195 /* Reject message if it is wrong sort of message for socket */
1146 1196
1147 /* 1197 /*
@@ -1149,7 +1199,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
1149 * "NO PORT" ISN'T REALLY THE RIGHT ERROR CODE, AND THERE MAY 1199 * "NO PORT" ISN'T REALLY THE RIGHT ERROR CODE, AND THERE MAY
1150 * BE SECURITY IMPLICATIONS INHERENT IN REJECTING INVALID TRAFFIC 1200 * BE SECURITY IMPLICATIONS INHERENT IN REJECTING INVALID TRAFFIC
1151 */ 1201 */
1152 sock = tsock->sk.sk_socket; 1202
1153 if (sock->state == SS_READY) { 1203 if (sock->state == SS_READY) {
1154 if (msg_connected(msg)) { 1204 if (msg_connected(msg)) {
1155 msg_dbg(msg, "dispatch filter 1\n"); 1205 msg_dbg(msg, "dispatch filter 1\n");
@@ -1192,52 +1242,103 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
1192 1242
1193 /* Reject message if there isn't room to queue it */ 1243 /* Reject message if there isn't room to queue it */
1194 1244
1195 if (unlikely((u32)atomic_read(&tipc_queue_size) > 1245 recv_q_len = (u32)atomic_read(&tipc_queue_size);
1196 OVERLOAD_LIMIT_BASE)) { 1246 if (unlikely(recv_q_len >= OVERLOAD_LIMIT_BASE)) {
1197 if (queue_overloaded(atomic_read(&tipc_queue_size), 1247 if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE))
1198 OVERLOAD_LIMIT_BASE, msg))
1199 return TIPC_ERR_OVERLOAD; 1248 return TIPC_ERR_OVERLOAD;
1200 } 1249 }
1201 recv_q_len = skb_queue_len(&tsock->sk.sk_receive_queue); 1250 recv_q_len = skb_queue_len(&sk->sk_receive_queue);
1202 if (unlikely(recv_q_len > (OVERLOAD_LIMIT_BASE / 2))) { 1251 if (unlikely(recv_q_len >= (OVERLOAD_LIMIT_BASE / 2))) {
1203 if (queue_overloaded(recv_q_len, 1252 if (rx_queue_full(msg, recv_q_len, OVERLOAD_LIMIT_BASE / 2))
1204 OVERLOAD_LIMIT_BASE / 2, msg))
1205 return TIPC_ERR_OVERLOAD; 1253 return TIPC_ERR_OVERLOAD;
1206 } 1254 }
1207 1255
1256 /* Enqueue message (finally!) */
1257
1258 msg_dbg(msg, "<DISP<: ");
1259 TIPC_SKB_CB(buf)->handle = msg_data(msg);
1260 atomic_inc(&tipc_queue_size);
1261 __skb_queue_tail(&sk->sk_receive_queue, buf);
1262
1208 /* Initiate connection termination for an incoming 'FIN' */ 1263 /* Initiate connection termination for an incoming 'FIN' */
1209 1264
1210 if (unlikely(msg_errcode(msg) && (sock->state == SS_CONNECTED))) { 1265 if (unlikely(msg_errcode(msg) && (sock->state == SS_CONNECTED))) {
1211 sock->state = SS_DISCONNECTING; 1266 sock->state = SS_DISCONNECTING;
1212 /* Note: Use signal since port lock is already taken! */ 1267 tipc_disconnect_port(tipc_sk_port(sk));
1213 tipc_k_signal((Handler)async_disconnect, tport->ref);
1214 } 1268 }
1215 1269
1216 /* Enqueue message (finally!) */ 1270 if (waitqueue_active(sk->sk_sleep))
1271 wake_up_interruptible(sk->sk_sleep);
1272 return TIPC_OK;
1273}
1217 1274
1218 msg_dbg(msg,"<DISP<: "); 1275/**
1219 TIPC_SKB_CB(buf)->handle = msg_data(msg); 1276 * backlog_rcv - handle incoming message from backlog queue
1220 atomic_inc(&tipc_queue_size); 1277 * @sk: socket
1221 skb_queue_tail(&sock->sk->sk_receive_queue, buf); 1278 * @buf: message
1279 *
1280 * Caller must hold socket lock, but not port lock.
1281 *
1282 * Returns 0
1283 */
1222 1284
1223 if (waitqueue_active(sock->sk->sk_sleep)) 1285static int backlog_rcv(struct sock *sk, struct sk_buff *buf)
1224 wake_up_interruptible(sock->sk->sk_sleep); 1286{
1225 return TIPC_OK; 1287 u32 res;
1288
1289 res = filter_rcv(sk, buf);
1290 if (res)
1291 tipc_reject_msg(buf, res);
1292 return 0;
1293}
1294
1295/**
1296 * dispatch - handle incoming message
1297 * @tport: TIPC port that received message
1298 * @buf: message
1299 *
1300 * Called with port lock already taken.
1301 *
1302 * Returns TIPC error status code (TIPC_OK if message is not to be rejected)
1303 */
1304
1305static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf)
1306{
1307 struct sock *sk = (struct sock *)tport->usr_handle;
1308 u32 res;
1309
1310 /*
1311 * Process message if socket is unlocked; otherwise add to backlog queue
1312 *
1313 * This code is based on sk_receive_skb(), but must be distinct from it
1314 * since a TIPC-specific filter/reject mechanism is utilized
1315 */
1316
1317 bh_lock_sock(sk);
1318 if (!sock_owned_by_user(sk)) {
1319 res = filter_rcv(sk, buf);
1320 } else {
1321 sk_add_backlog(sk, buf);
1322 res = TIPC_OK;
1323 }
1324 bh_unlock_sock(sk);
1325
1326 return res;
1226} 1327}
1227 1328
1228/** 1329/**
1229 * wakeupdispatch - wake up port after congestion 1330 * wakeupdispatch - wake up port after congestion
1230 * @tport: port to wakeup 1331 * @tport: port to wakeup
1231 * 1332 *
1232 * Called with port lock on. 1333 * Called with port lock already taken.
1233 */ 1334 */
1234 1335
1235static void wakeupdispatch(struct tipc_port *tport) 1336static void wakeupdispatch(struct tipc_port *tport)
1236{ 1337{
1237 struct tipc_sock *tsock = (struct tipc_sock *)tport->usr_handle; 1338 struct sock *sk = (struct sock *)tport->usr_handle;
1238 1339
1239 if (waitqueue_active(tsock->sk.sk_sleep)) 1340 if (waitqueue_active(sk->sk_sleep))
1240 wake_up_interruptible(tsock->sk.sk_sleep); 1341 wake_up_interruptible(sk->sk_sleep);
1241} 1342}
1242 1343
1243/** 1344/**
@@ -1245,7 +1346,7 @@ static void wakeupdispatch(struct tipc_port *tport)
1245 * @sock: socket structure 1346 * @sock: socket structure
1246 * @dest: socket address for destination port 1347 * @dest: socket address for destination port
1247 * @destlen: size of socket address data structure 1348 * @destlen: size of socket address data structure
1248 * @flags: (unused) 1349 * @flags: file-related flags associated with socket
1249 * 1350 *
1250 * Returns 0 on success, errno otherwise 1351 * Returns 0 on success, errno otherwise
1251 */ 1352 */
@@ -1253,72 +1354,105 @@ static void wakeupdispatch(struct tipc_port *tport)
1253static int connect(struct socket *sock, struct sockaddr *dest, int destlen, 1354static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1254 int flags) 1355 int flags)
1255{ 1356{
1256 struct tipc_sock *tsock = tipc_sk(sock->sk); 1357 struct sock *sk = sock->sk;
1257 struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; 1358 struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
1258 struct msghdr m = {NULL,}; 1359 struct msghdr m = {NULL,};
1259 struct sk_buff *buf; 1360 struct sk_buff *buf;
1260 struct tipc_msg *msg; 1361 struct tipc_msg *msg;
1261 int res; 1362 int res;
1262 1363
1263 /* For now, TIPC does not allow use of connect() with DGRAM or RDM types */ 1364 lock_sock(sk);
1264 1365
1265 if (sock->state == SS_READY) 1366 /* For now, TIPC does not allow use of connect() with DGRAM/RDM types */
1266 return -EOPNOTSUPP; 1367
1267 1368 if (sock->state == SS_READY) {
1268 /* Issue Posix-compliant error code if socket is in the wrong state */ 1369 res = -EOPNOTSUPP;
1269 1370 goto exit;
1270 if (sock->state == SS_LISTENING) 1371 }
1271 return -EOPNOTSUPP; 1372
1272 if (sock->state == SS_CONNECTING) 1373 /* For now, TIPC does not support the non-blocking form of connect() */
1273 return -EALREADY; 1374
1274 if (sock->state != SS_UNCONNECTED) 1375 if (flags & O_NONBLOCK) {
1275 return -EISCONN; 1376 res = -EWOULDBLOCK;
1276 1377 goto exit;
1277 /* 1378 }
1278 * Reject connection attempt using multicast address 1379
1279 * 1380 /* Issue Posix-compliant error code if socket is in the wrong state */
1280 * Note: send_msg() validates the rest of the address fields, 1381
1281 * so there's no need to do it here 1382 if (sock->state == SS_LISTENING) {
1282 */ 1383 res = -EOPNOTSUPP;
1283 1384 goto exit;
1284 if (dst->addrtype == TIPC_ADDR_MCAST) 1385 }
1285 return -EINVAL; 1386 if (sock->state == SS_CONNECTING) {
1286 1387 res = -EALREADY;
1287 /* Send a 'SYN-' to destination */ 1388 goto exit;
1288 1389 }
1289 m.msg_name = dest; 1390 if (sock->state != SS_UNCONNECTED) {
1290 m.msg_namelen = destlen; 1391 res = -EISCONN;
1291 if ((res = send_msg(NULL, sock, &m, 0)) < 0) { 1392 goto exit;
1292 sock->state = SS_DISCONNECTING; 1393 }
1293 return res; 1394
1294 } 1395 /*
1295 1396 * Reject connection attempt using multicast address
1296 if (down_interruptible(&tsock->sem)) 1397 *
1297 return -ERESTARTSYS; 1398 * Note: send_msg() validates the rest of the address fields,
1298 1399 * so there's no need to do it here
1299 /* Wait for destination's 'ACK' response */ 1400 */
1300 1401
1301 res = wait_event_interruptible_timeout(*sock->sk->sk_sleep, 1402 if (dst->addrtype == TIPC_ADDR_MCAST) {
1302 skb_queue_len(&sock->sk->sk_receive_queue), 1403 res = -EINVAL;
1303 sock->sk->sk_rcvtimeo); 1404 goto exit;
1304 buf = skb_peek(&sock->sk->sk_receive_queue); 1405 }
1305 if (res > 0) { 1406
1306 msg = buf_msg(buf); 1407 /* Reject any messages already in receive queue (very unlikely) */
1307 res = auto_connect(sock, tsock, msg); 1408
1308 if (!res) { 1409 reject_rx_queue(sk);
1309 if (!msg_data_sz(msg)) 1410
1310 advance_queue(tsock); 1411 /* Send a 'SYN-' to destination */
1311 } 1412
1312 } else { 1413 m.msg_name = dest;
1313 if (res == 0) { 1414 m.msg_namelen = destlen;
1314 res = -ETIMEDOUT; 1415 res = send_msg(NULL, sock, &m, 0);
1315 } else 1416 if (res < 0) {
1316 { /* leave "res" unchanged */ } 1417 goto exit;
1317 sock->state = SS_DISCONNECTING; 1418 }
1318 } 1419
1319 1420 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
1320 up(&tsock->sem); 1421
1321 return res; 1422 release_sock(sk);
1423 res = wait_event_interruptible_timeout(*sk->sk_sleep,
1424 (!skb_queue_empty(&sk->sk_receive_queue) ||
1425 (sock->state != SS_CONNECTING)),
1426 sk->sk_rcvtimeo);
1427 lock_sock(sk);
1428
1429 if (res > 0) {
1430 buf = skb_peek(&sk->sk_receive_queue);
1431 if (buf != NULL) {
1432 msg = buf_msg(buf);
1433 res = auto_connect(sock, msg);
1434 if (!res) {
1435 if (!msg_data_sz(msg))
1436 advance_rx_queue(sk);
1437 }
1438 } else {
1439 if (sock->state == SS_CONNECTED) {
1440 res = -EISCONN;
1441 } else {
1442 res = -ECONNREFUSED;
1443 }
1444 }
1445 } else {
1446 if (res == 0)
1447 res = -ETIMEDOUT;
1448 else
1449 ; /* leave "res" unchanged */
1450 sock->state = SS_DISCONNECTING;
1451 }
1452
1453exit:
1454 release_sock(sk);
1455 return res;
1322} 1456}
1323 1457
1324/** 1458/**
@@ -1331,14 +1465,22 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1331 1465
1332static int listen(struct socket *sock, int len) 1466static int listen(struct socket *sock, int len)
1333{ 1467{
1334 /* REQUIRES SOCKET LOCKING OF SOME SORT? */ 1468 struct sock *sk = sock->sk;
1469 int res;
1470
1471 lock_sock(sk);
1335 1472
1336 if (sock->state == SS_READY) 1473 if (sock->state == SS_READY)
1337 return -EOPNOTSUPP; 1474 res = -EOPNOTSUPP;
1338 if (sock->state != SS_UNCONNECTED) 1475 else if (sock->state != SS_UNCONNECTED)
1339 return -EINVAL; 1476 res = -EINVAL;
1340 sock->state = SS_LISTENING; 1477 else {
1341 return 0; 1478 sock->state = SS_LISTENING;
1479 res = 0;
1480 }
1481
1482 release_sock(sk);
1483 return res;
1342} 1484}
1343 1485
1344/** 1486/**
@@ -1350,50 +1492,69 @@ static int listen(struct socket *sock, int len)
1350 * Returns 0 on success, errno otherwise 1492 * Returns 0 on success, errno otherwise
1351 */ 1493 */
1352 1494
1353static int accept(struct socket *sock, struct socket *newsock, int flags) 1495static int accept(struct socket *sock, struct socket *new_sock, int flags)
1354{ 1496{
1355 struct tipc_sock *tsock = tipc_sk(sock->sk); 1497 struct sock *sk = sock->sk;
1356 struct sk_buff *buf; 1498 struct sk_buff *buf;
1357 int res = -EFAULT; 1499 int res;
1358
1359 if (sock->state == SS_READY)
1360 return -EOPNOTSUPP;
1361 if (sock->state != SS_LISTENING)
1362 return -EINVAL;
1363
1364 if (unlikely((skb_queue_len(&sock->sk->sk_receive_queue) == 0) &&
1365 (flags & O_NONBLOCK)))
1366 return -EWOULDBLOCK;
1367 1500
1368 if (down_interruptible(&tsock->sem)) 1501 lock_sock(sk);
1369 return -ERESTARTSYS;
1370 1502
1371 if (wait_event_interruptible(*sock->sk->sk_sleep, 1503 if (sock->state == SS_READY) {
1372 skb_queue_len(&sock->sk->sk_receive_queue))) { 1504 res = -EOPNOTSUPP;
1373 res = -ERESTARTSYS; 1505 goto exit;
1506 }
1507 if (sock->state != SS_LISTENING) {
1508 res = -EINVAL;
1374 goto exit; 1509 goto exit;
1375 } 1510 }
1376 buf = skb_peek(&sock->sk->sk_receive_queue);
1377 1511
1378 res = tipc_create(sock->sk->sk_net, newsock, 0); 1512 while (skb_queue_empty(&sk->sk_receive_queue)) {
1513 if (flags & O_NONBLOCK) {
1514 res = -EWOULDBLOCK;
1515 goto exit;
1516 }
1517 release_sock(sk);
1518 res = wait_event_interruptible(*sk->sk_sleep,
1519 (!skb_queue_empty(&sk->sk_receive_queue)));
1520 lock_sock(sk);
1521 if (res)
1522 goto exit;
1523 }
1524
1525 buf = skb_peek(&sk->sk_receive_queue);
1526
1527 res = tipc_create(sock_net(sock->sk), new_sock, 0);
1379 if (!res) { 1528 if (!res) {
1380 struct tipc_sock *new_tsock = tipc_sk(newsock->sk); 1529 struct sock *new_sk = new_sock->sk;
1530 struct tipc_port *new_tport = tipc_sk_port(new_sk);
1531 u32 new_ref = new_tport->ref;
1381 struct tipc_portid id; 1532 struct tipc_portid id;
1382 struct tipc_msg *msg = buf_msg(buf); 1533 struct tipc_msg *msg = buf_msg(buf);
1383 u32 new_ref = new_tsock->p->ref; 1534
1535 lock_sock(new_sk);
1536
1537 /*
1538 * Reject any stray messages received by new socket
1539 * before the socket lock was taken (very, very unlikely)
1540 */
1541
1542 reject_rx_queue(new_sk);
1543
1544 /* Connect new socket to it's peer */
1384 1545
1385 id.ref = msg_origport(msg); 1546 id.ref = msg_origport(msg);
1386 id.node = msg_orignode(msg); 1547 id.node = msg_orignode(msg);
1387 tipc_connect2port(new_ref, &id); 1548 tipc_connect2port(new_ref, &id);
1388 newsock->state = SS_CONNECTED; 1549 new_sock->state = SS_CONNECTED;
1389 1550
1390 tipc_set_portimportance(new_ref, msg_importance(msg)); 1551 tipc_set_portimportance(new_ref, msg_importance(msg));
1391 if (msg_named(msg)) { 1552 if (msg_named(msg)) {
1392 new_tsock->p->conn_type = msg_nametype(msg); 1553 new_tport->conn_type = msg_nametype(msg);
1393 new_tsock->p->conn_instance = msg_nameinst(msg); 1554 new_tport->conn_instance = msg_nameinst(msg);
1394 } 1555 }
1395 1556
1396 /* 1557 /*
1397 * Respond to 'SYN-' by discarding it & returning 'ACK'-. 1558 * Respond to 'SYN-' by discarding it & returning 'ACK'-.
1398 * Respond to 'SYN+' by queuing it on new socket. 1559 * Respond to 'SYN+' by queuing it on new socket.
1399 */ 1560 */
@@ -1402,24 +1563,23 @@ static int accept(struct socket *sock, struct socket *newsock, int flags)
1402 if (!msg_data_sz(msg)) { 1563 if (!msg_data_sz(msg)) {
1403 struct msghdr m = {NULL,}; 1564 struct msghdr m = {NULL,};
1404 1565
1405 send_packet(NULL, newsock, &m, 0); 1566 advance_rx_queue(sk);
1406 advance_queue(tsock); 1567 send_packet(NULL, new_sock, &m, 0);
1407 } else { 1568 } else {
1408 sock_lock(tsock); 1569 __skb_dequeue(&sk->sk_receive_queue);
1409 skb_dequeue(&sock->sk->sk_receive_queue); 1570 __skb_queue_head(&new_sk->sk_receive_queue, buf);
1410 sock_unlock(tsock);
1411 skb_queue_head(&newsock->sk->sk_receive_queue, buf);
1412 } 1571 }
1572 release_sock(new_sk);
1413 } 1573 }
1414exit: 1574exit:
1415 up(&tsock->sem); 1575 release_sock(sk);
1416 return res; 1576 return res;
1417} 1577}
1418 1578
1419/** 1579/**
1420 * shutdown - shutdown socket connection 1580 * shutdown - shutdown socket connection
1421 * @sock: socket structure 1581 * @sock: socket structure
1422 * @how: direction to close (unused; always treated as read + write) 1582 * @how: direction to close (must be SHUT_RDWR)
1423 * 1583 *
1424 * Terminates connection (if necessary), then purges socket's receive queue. 1584 * Terminates connection (if necessary), then purges socket's receive queue.
1425 * 1585 *
@@ -1428,53 +1588,46 @@ exit:
1428 1588
1429static int shutdown(struct socket *sock, int how) 1589static int shutdown(struct socket *sock, int how)
1430{ 1590{
1431 struct tipc_sock* tsock = tipc_sk(sock->sk); 1591 struct sock *sk = sock->sk;
1592 struct tipc_port *tport = tipc_sk_port(sk);
1432 struct sk_buff *buf; 1593 struct sk_buff *buf;
1433 int res; 1594 int res;
1434 1595
1435 /* Could return -EINVAL for an invalid "how", but why bother? */ 1596 if (how != SHUT_RDWR)
1436 1597 return -EINVAL;
1437 if (down_interruptible(&tsock->sem))
1438 return -ERESTARTSYS;
1439 1598
1440 sock_lock(tsock); 1599 lock_sock(sk);
1441 1600
1442 switch (sock->state) { 1601 switch (sock->state) {
1602 case SS_CONNECTING:
1443 case SS_CONNECTED: 1603 case SS_CONNECTED:
1444 1604
1445 /* Send 'FIN+' or 'FIN-' message to peer */ 1605 /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
1446
1447 sock_unlock(tsock);
1448restart: 1606restart:
1449 if ((buf = skb_dequeue(&sock->sk->sk_receive_queue))) { 1607 buf = __skb_dequeue(&sk->sk_receive_queue);
1608 if (buf) {
1450 atomic_dec(&tipc_queue_size); 1609 atomic_dec(&tipc_queue_size);
1451 if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) { 1610 if (TIPC_SKB_CB(buf)->handle != msg_data(buf_msg(buf))) {
1452 buf_discard(buf); 1611 buf_discard(buf);
1453 goto restart; 1612 goto restart;
1454 } 1613 }
1614 tipc_disconnect(tport->ref);
1455 tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN); 1615 tipc_reject_msg(buf, TIPC_CONN_SHUTDOWN);
1616 } else {
1617 tipc_shutdown(tport->ref);
1456 } 1618 }
1457 else { 1619
1458 tipc_shutdown(tsock->p->ref); 1620 sock->state = SS_DISCONNECTING;
1459 }
1460 sock_lock(tsock);
1461 1621
1462 /* fall through */ 1622 /* fall through */
1463 1623
1464 case SS_DISCONNECTING: 1624 case SS_DISCONNECTING:
1465 1625
1466 /* Discard any unreceived messages */ 1626 /* Discard any unreceived messages; wake up sleeping tasks */
1467 1627
1468 while ((buf = skb_dequeue(&sock->sk->sk_receive_queue))) { 1628 discard_rx_queue(sk);
1469 atomic_dec(&tipc_queue_size); 1629 if (waitqueue_active(sk->sk_sleep))
1470 buf_discard(buf); 1630 wake_up_interruptible(sk->sk_sleep);
1471 }
1472 tsock->p->conn_unacked = 0;
1473
1474 /* fall through */
1475
1476 case SS_CONNECTING:
1477 sock->state = SS_DISCONNECTING;
1478 res = 0; 1631 res = 0;
1479 break; 1632 break;
1480 1633
@@ -1482,9 +1635,7 @@ restart:
1482 res = -ENOTCONN; 1635 res = -ENOTCONN;
1483 } 1636 }
1484 1637
1485 sock_unlock(tsock); 1638 release_sock(sk);
1486
1487 up(&tsock->sem);
1488 return res; 1639 return res;
1489} 1640}
1490 1641
@@ -1505,7 +1656,8 @@ restart:
1505static int setsockopt(struct socket *sock, 1656static int setsockopt(struct socket *sock,
1506 int lvl, int opt, char __user *ov, int ol) 1657 int lvl, int opt, char __user *ov, int ol)
1507{ 1658{
1508 struct tipc_sock *tsock = tipc_sk(sock->sk); 1659 struct sock *sk = sock->sk;
1660 struct tipc_port *tport = tipc_sk_port(sk);
1509 u32 value; 1661 u32 value;
1510 int res; 1662 int res;
1511 1663
@@ -1518,30 +1670,31 @@ static int setsockopt(struct socket *sock,
1518 if ((res = get_user(value, (u32 __user *)ov))) 1670 if ((res = get_user(value, (u32 __user *)ov)))
1519 return res; 1671 return res;
1520 1672
1521 if (down_interruptible(&tsock->sem)) 1673 lock_sock(sk);
1522 return -ERESTARTSYS;
1523 1674
1524 switch (opt) { 1675 switch (opt) {
1525 case TIPC_IMPORTANCE: 1676 case TIPC_IMPORTANCE:
1526 res = tipc_set_portimportance(tsock->p->ref, value); 1677 res = tipc_set_portimportance(tport->ref, value);
1527 break; 1678 break;
1528 case TIPC_SRC_DROPPABLE: 1679 case TIPC_SRC_DROPPABLE:
1529 if (sock->type != SOCK_STREAM) 1680 if (sock->type != SOCK_STREAM)
1530 res = tipc_set_portunreliable(tsock->p->ref, value); 1681 res = tipc_set_portunreliable(tport->ref, value);
1531 else 1682 else
1532 res = -ENOPROTOOPT; 1683 res = -ENOPROTOOPT;
1533 break; 1684 break;
1534 case TIPC_DEST_DROPPABLE: 1685 case TIPC_DEST_DROPPABLE:
1535 res = tipc_set_portunreturnable(tsock->p->ref, value); 1686 res = tipc_set_portunreturnable(tport->ref, value);
1536 break; 1687 break;
1537 case TIPC_CONN_TIMEOUT: 1688 case TIPC_CONN_TIMEOUT:
1538 sock->sk->sk_rcvtimeo = (value * HZ / 1000); 1689 sk->sk_rcvtimeo = msecs_to_jiffies(value);
1690 /* no need to set "res", since already 0 at this point */
1539 break; 1691 break;
1540 default: 1692 default:
1541 res = -EINVAL; 1693 res = -EINVAL;
1542 } 1694 }
1543 1695
1544 up(&tsock->sem); 1696 release_sock(sk);
1697
1545 return res; 1698 return res;
1546} 1699}
1547 1700
@@ -1562,7 +1715,8 @@ static int setsockopt(struct socket *sock,
1562static int getsockopt(struct socket *sock, 1715static int getsockopt(struct socket *sock,
1563 int lvl, int opt, char __user *ov, int __user *ol) 1716 int lvl, int opt, char __user *ov, int __user *ol)
1564{ 1717{
1565 struct tipc_sock *tsock = tipc_sk(sock->sk); 1718 struct sock *sk = sock->sk;
1719 struct tipc_port *tport = tipc_sk_port(sk);
1566 int len; 1720 int len;
1567 u32 value; 1721 u32 value;
1568 int res; 1722 int res;
@@ -1574,26 +1728,28 @@ static int getsockopt(struct socket *sock,
1574 if ((res = get_user(len, ol))) 1728 if ((res = get_user(len, ol)))
1575 return res; 1729 return res;
1576 1730
1577 if (down_interruptible(&tsock->sem)) 1731 lock_sock(sk);
1578 return -ERESTARTSYS;
1579 1732
1580 switch (opt) { 1733 switch (opt) {
1581 case TIPC_IMPORTANCE: 1734 case TIPC_IMPORTANCE:
1582 res = tipc_portimportance(tsock->p->ref, &value); 1735 res = tipc_portimportance(tport->ref, &value);
1583 break; 1736 break;
1584 case TIPC_SRC_DROPPABLE: 1737 case TIPC_SRC_DROPPABLE:
1585 res = tipc_portunreliable(tsock->p->ref, &value); 1738 res = tipc_portunreliable(tport->ref, &value);
1586 break; 1739 break;
1587 case TIPC_DEST_DROPPABLE: 1740 case TIPC_DEST_DROPPABLE:
1588 res = tipc_portunreturnable(tsock->p->ref, &value); 1741 res = tipc_portunreturnable(tport->ref, &value);
1589 break; 1742 break;
1590 case TIPC_CONN_TIMEOUT: 1743 case TIPC_CONN_TIMEOUT:
1591 value = (sock->sk->sk_rcvtimeo * 1000) / HZ; 1744 value = jiffies_to_msecs(sk->sk_rcvtimeo);
1745 /* no need to set "res", since already 0 at this point */
1592 break; 1746 break;
1593 default: 1747 default:
1594 res = -EINVAL; 1748 res = -EINVAL;
1595 } 1749 }
1596 1750
1751 release_sock(sk);
1752
1597 if (res) { 1753 if (res) {
1598 /* "get" failed */ 1754 /* "get" failed */
1599 } 1755 }
@@ -1607,7 +1763,6 @@ static int getsockopt(struct socket *sock,
1607 res = put_user(sizeof(value), ol); 1763 res = put_user(sizeof(value), ol);
1608 } 1764 }
1609 1765
1610 up(&tsock->sem);
1611 return res; 1766 return res;
1612} 1767}
1613 1768
@@ -1720,6 +1875,7 @@ int tipc_socket_init(void)
1720/** 1875/**
1721 * tipc_socket_stop - stop TIPC socket interface 1876 * tipc_socket_stop - stop TIPC socket interface
1722 */ 1877 */
1878
1723void tipc_socket_stop(void) 1879void tipc_socket_stop(void)
1724{ 1880{
1725 if (!sockets_enabled) 1881 if (!sockets_enabled)