aboutsummaryrefslogtreecommitdiffstats
path: root/lib/dma-debug.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2013-07-24 21:30:23 -0400
committerJ. Bruce Fields <bfields@redhat.com>2013-08-01 08:39:16 -0400
commit447383d2ba6061bb069da45f95f223a01bba61dd (patch)
treec2bd31576271ae3a1fd6a246745fbab8c5f018e9 /lib/dma-debug.c
parente4daf1ffbe6cc3b12aab4d604e627829e93e9914 (diff)
NFSD/sunrpc: avoid deadlock on TCP connection due to memory pressure.
Since we enabled auto-tuning for sunrpc TCP connections we do not guarantee that there is enough write-space on each connection to queue a reply. If memory pressure causes the window to shrink too small, the request throttling in sunrpc/svc will not accept any requests so no more requests will be handled. Even when pressure decreases the window will not grow again until data is sent on the connection. This means we get a deadlock: no requests will be handled until there is more space, and no space will be allocated until a request is handled. This can be simulated by modifying svc_tcp_has_wspace to inflate the number of byte required and removing the 'svc_sock_setbufsize' calls in svc_setup_socket. I found that multiplying by 16 was enough to make the requirement exceed the default allocation. With this modification in place: mount -o vers=3,proto=tcp 127.0.0.1:/home /mnt would block and eventually time out because the nfs server could not accept any requests. This patch relaxes the request throttling to always allow at least one request through per connection. It does this by checking both sk_stream_min_wspace() and xprt->xpt_reserved are zero. The first is zero when the TCP transmit queue is empty. The second is zero when there are no RPC requests being processed. When both of these are zero the socket is idle and so one more request can safely be allowed through. Applying this patch allows the above mount command to succeed cleanly. Tracing shows that the allocated write buffer space quickly grows and after a few requests are handled, the extra tests are no longer needed to permit further requests to be processed. The main purpose of request throttling is to handle the case when one client is slow at collecting replies and the send queue gets full of replies that the client hasn't acknowledged (at the TCP level) yet. As we only change behaviour when the send queue is empty this main purpose is still preserved. Reported-by: Ben Myers <bpm@sgi.com> Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Diffstat (limited to 'lib/dma-debug.c')
0 files changed, 0 insertions, 0 deletions
channel available */ #define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */ #define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */ #define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ struct virtio_net_config { /* The config defining mac address (if VIRTIO_NET_F_MAC) */ __u8 mac[6]; /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ __u16 status; } __attribute__((packed)); /* This is the first element of the scatter-gather list. If you don't * specify GSO or CSUM features, you can simply ignore the header. */ struct virtio_net_hdr { #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset #define VIRTIO_NET_HDR_F_DATA_VALID 2 // Csum is valid __u8 flags; #define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame #define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO) #define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO) #define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP #define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set __u8 gso_type; __u16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ __u16 gso_size; /* Bytes to append to hdr_len per frame */ __u16 csum_start; /* Position to start checksumming from */ __u16 csum_offset; /* Offset after that to place checksum */ }; /* This is the version of the header to use when the MRG_RXBUF * feature has been negotiated. */ struct virtio_net_hdr_mrg_rxbuf { struct virtio_net_hdr hdr; __u16 num_buffers; /* Number of merged rx buffers */ }; /* * Control virtqueue data structures * * The control virtqueue expects a header in the first sg entry * and an ack/status response in the last entry. Data for the * command goes in between. */ struct virtio_net_ctrl_hdr { __u8 class; __u8 cmd; } __attribute__((packed)); typedef __u8 virtio_net_ctrl_ack; #define VIRTIO_NET_OK 0 #define VIRTIO_NET_ERR 1 /* * Control the RX mode, ie. promisucous, allmulti, etc... * All commands require an "out" sg entry containing a 1 byte * state value, zero = disable, non-zero = enable. Commands * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature. * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA. */ #define VIRTIO_NET_CTRL_RX 0 #define VIRTIO_NET_CTRL_RX_PROMISC 0 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 #define VIRTIO_NET_CTRL_RX_ALLUNI 2 #define VIRTIO_NET_CTRL_RX_NOMULTI 3 #define VIRTIO_NET_CTRL_RX_NOUNI 4 #define VIRTIO_NET_CTRL_RX_NOBCAST 5 /* * Control the MAC filter table. * * The MAC filter table is managed by the hypervisor, the guest should * assume the size is infinite. Filtering should be considered * non-perfect, ie. based on hypervisor resources, the guest may * received packets from sources not specified in the filter list. * * In addition to the class/cmd header, the TABLE_SET command requires * two out scatterlists. Each contains a 4 byte count of entries followed * by a concatenated byte stream of the ETH_ALEN MAC addresses. The * first sg list contains unicast addresses, the second is for multicast. * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature * is available. */ struct virtio_net_ctrl_mac { __u32 entries; __u8 macs[][ETH_ALEN]; } __attribute__((packed)); #define VIRTIO_NET_CTRL_MAC 1 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 /* * Control VLAN filtering * * The VLAN filter table is controlled via a simple ADD/DEL interface. * VLAN IDs not added may be filterd by the hypervisor. Del is the * opposite of add. Both commands expect an out entry containing a 2 * byte VLAN ID. VLAN filterting is available with the * VIRTIO_NET_F_CTRL_VLAN feature bit. */ #define VIRTIO_NET_CTRL_VLAN 2 #define VIRTIO_NET_CTRL_VLAN_ADD 0 #define VIRTIO_NET_CTRL_VLAN_DEL 1 #endif /* _LINUX_VIRTIO_NET_H */