aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 23:58:12 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 23:58:12 -0500
commit848b81415c42ff3dc9a4204749087b015c37ef66 (patch)
tree391da3a73aea48632248220d2d6b8d45a88f7eae /drivers/block
parent992956189de58cae9f2be40585bc25105cd7c5ad (diff)
parent6fd59a83b9261fa53eaf98fb5514abba504a3ea3 (diff)
Merge branch 'akpm' (Andrew's patch-bomb)
Merge misc patches from Andrew Morton: "Incoming: - lots of misc stuff - backlight tree updates - lib/ updates - Oleg's percpu-rwsem changes - checkpatch - rtc - aoe - more checkpoint/restart support I still have a pile of MM stuff pending - Pekka should be merging later today after which that is good to go. A number of other things are twiddling thumbs awaiting maintainer merges." * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (180 commits) scatterlist: don't BUG when we can trivially return a proper error. docs: update documentation about /proc/<pid>/fdinfo/<fd> fanotify output fs, fanotify: add @mflags field to fanotify output docs: add documentation about /proc/<pid>/fdinfo/<fd> output fs, notify: add procfs fdinfo helper fs, exportfs: add exportfs_encode_inode_fh() helper fs, exportfs: escape nil dereference if no s_export_op present fs, epoll: add procfs fdinfo helper fs, eventfd: add procfs fdinfo helper procfs: add ability to plug in auxiliary fdinfo providers tools/testing/selftests/kcmp/kcmp_test.c: print reason for failure in kcmp_test breakpoint selftests: print failure status instead of cause make error kcmp selftests: print fail status instead of cause make error kcmp selftests: make run_tests fix mem-hotplug selftests: print failure status instead of cause make error cpu-hotplug selftests: print failure status instead of cause make error mqueue selftests: print failure status instead of cause make error vm selftests: print failure status instead of cause make error ubifs: use prandom_bytes mtd: nandsim: use prandom_bytes ...
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/aoe/aoe.h57
-rw-r--r--drivers/block/aoe/aoeblk.c104
-rw-r--r--drivers/block/aoe/aoechr.c7
-rw-r--r--drivers/block/aoe/aoecmd.c715
-rw-r--r--drivers/block/aoe/aoedev.c243
-rw-r--r--drivers/block/aoe/aoemain.c2
-rw-r--r--drivers/block/aoe/aoenet.c15
7 files changed, 822 insertions, 321 deletions
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index d2ed7f18d1a..175649468c9 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -1,5 +1,5 @@
1/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ 1/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */
2#define VERSION "50" 2#define VERSION "81"
3#define AOE_MAJOR 152 3#define AOE_MAJOR 152
4#define DEVICE_NAME "aoe" 4#define DEVICE_NAME "aoe"
5 5
@@ -10,7 +10,7 @@
10#define AOE_PARTITIONS (16) 10#define AOE_PARTITIONS (16)
11#endif 11#endif
12 12
13#define WHITESPACE " \t\v\f\n" 13#define WHITESPACE " \t\v\f\n,"
14 14
15enum { 15enum {
16 AOECMD_ATA, 16 AOECMD_ATA,
@@ -73,21 +73,29 @@ enum {
73 DEVFL_TKILL = (1<<1), /* flag for timer to know when to kill self */ 73 DEVFL_TKILL = (1<<1), /* flag for timer to know when to kill self */
74 DEVFL_EXT = (1<<2), /* device accepts lba48 commands */ 74 DEVFL_EXT = (1<<2), /* device accepts lba48 commands */
75 DEVFL_GDALLOC = (1<<3), /* need to alloc gendisk */ 75 DEVFL_GDALLOC = (1<<3), /* need to alloc gendisk */
76 DEVFL_KICKME = (1<<4), /* slow polling network card catch */ 76 DEVFL_GD_NOW = (1<<4), /* allocating gendisk */
77 DEVFL_NEWSIZE = (1<<5), /* need to update dev size in block layer */ 77 DEVFL_KICKME = (1<<5), /* slow polling network card catch */
78 DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */
79 DEVFL_FREEING = (1<<7), /* set when device is being cleaned up */
80 DEVFL_FREED = (1<<8), /* device has been cleaned up */
78}; 81};
79 82
80enum { 83enum {
81 DEFAULTBCNT = 2 * 512, /* 2 sectors */ 84 DEFAULTBCNT = 2 * 512, /* 2 sectors */
82 MIN_BUFS = 16, 85 MIN_BUFS = 16,
83 NTARGETS = 8, 86 NTARGETS = 4,
84 NAOEIFS = 8, 87 NAOEIFS = 8,
85 NSKBPOOLMAX = 256, 88 NSKBPOOLMAX = 256,
86 NFACTIVE = 61, 89 NFACTIVE = 61,
87 90
88 TIMERTICK = HZ / 10, 91 TIMERTICK = HZ / 10,
89 MINTIMER = HZ >> 2, 92 RTTSCALE = 8,
90 MAXTIMER = HZ << 1, 93 RTTDSCALE = 3,
94 RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE,
95 RTTDEV_INIT = RTTAVG_INIT / 4,
96
97 HARD_SCORN_SECS = 10, /* try another remote port after this */
98 MAX_TAINT = 1000, /* cap on aoetgt taint */
91}; 99};
92 100
93struct buf { 101struct buf {
@@ -100,10 +108,17 @@ struct buf {
100 struct request *rq; 108 struct request *rq;
101}; 109};
102 110
111enum frame_flags {
112 FFL_PROBE = 1,
113};
114
103struct frame { 115struct frame {
104 struct list_head head; 116 struct list_head head;
105 u32 tag; 117 u32 tag;
118 struct timeval sent; /* high-res time packet was sent */
119 u32 sent_jiffs; /* low-res jiffies-based sent time */
106 ulong waited; 120 ulong waited;
121 ulong waited_total;
107 struct aoetgt *t; /* parent target I belong to */ 122 struct aoetgt *t; /* parent target I belong to */
108 sector_t lba; 123 sector_t lba;
109 struct sk_buff *skb; /* command skb freed on module exit */ 124 struct sk_buff *skb; /* command skb freed on module exit */
@@ -112,6 +127,7 @@ struct frame {
112 struct bio_vec *bv; 127 struct bio_vec *bv;
113 ulong bcnt; 128 ulong bcnt;
114 ulong bv_off; 129 ulong bv_off;
130 char flags;
115}; 131};
116 132
117struct aoeif { 133struct aoeif {
@@ -122,28 +138,31 @@ struct aoeif {
122 138
123struct aoetgt { 139struct aoetgt {
124 unsigned char addr[6]; 140 unsigned char addr[6];
125 ushort nframes; 141 ushort nframes; /* cap on frames to use */
126 struct aoedev *d; /* parent device I belong to */ 142 struct aoedev *d; /* parent device I belong to */
127 struct list_head ffree; /* list of free frames */ 143 struct list_head ffree; /* list of free frames */
128 struct aoeif ifs[NAOEIFS]; 144 struct aoeif ifs[NAOEIFS];
129 struct aoeif *ifp; /* current aoeif in use */ 145 struct aoeif *ifp; /* current aoeif in use */
130 ushort nout; 146 ushort nout; /* number of AoE commands outstanding */
131 ushort maxout; 147 ushort maxout; /* current value for max outstanding */
132 ulong falloc; 148 ushort next_cwnd; /* incr maxout after decrementing to zero */
133 ulong lastwadj; /* last window adjustment */ 149 ushort ssthresh; /* slow start threshold */
150 ulong falloc; /* number of allocated frames */
151 int taint; /* how much we want to avoid this aoetgt */
134 int minbcnt; 152 int minbcnt;
135 int wpkts, rpkts; 153 int wpkts, rpkts;
154 char nout_probes;
136}; 155};
137 156
138struct aoedev { 157struct aoedev {
139 struct aoedev *next; 158 struct aoedev *next;
140 ulong sysminor; 159 ulong sysminor;
141 ulong aoemajor; 160 ulong aoemajor;
161 u32 rttavg; /* scaled AoE round trip time average */
162 u32 rttdev; /* scaled round trip time mean deviation */
142 u16 aoeminor; 163 u16 aoeminor;
143 u16 flags; 164 u16 flags;
144 u16 nopen; /* (bd_openers isn't available without sleeping) */ 165 u16 nopen; /* (bd_openers isn't available without sleeping) */
145 u16 rttavg; /* round trip average of requests/responses */
146 u16 mintimer;
147 u16 fw_ver; /* version of blade's firmware */ 166 u16 fw_ver; /* version of blade's firmware */
148 u16 lasttag; /* last tag sent */ 167 u16 lasttag; /* last tag sent */
149 u16 useme; 168 u16 useme;
@@ -151,7 +170,7 @@ struct aoedev {
151 struct work_struct work;/* disk create work struct */ 170 struct work_struct work;/* disk create work struct */
152 struct gendisk *gd; 171 struct gendisk *gd;
153 struct request_queue *blkq; 172 struct request_queue *blkq;
154 struct hd_geometry geo; 173 struct hd_geometry geo;
155 sector_t ssize; 174 sector_t ssize;
156 struct timer_list timer; 175 struct timer_list timer;
157 spinlock_t lock; 176 spinlock_t lock;
@@ -164,11 +183,12 @@ struct aoedev {
164 } ip; 183 } ip;
165 ulong maxbcnt; 184 ulong maxbcnt;
166 struct list_head factive[NFACTIVE]; /* hash of active frames */ 185 struct list_head factive[NFACTIVE]; /* hash of active frames */
167 struct aoetgt *targets[NTARGETS]; 186 struct list_head rexmitq; /* deferred retransmissions */
187 struct aoetgt **targets;
188 ulong ntargets; /* number of allocated aoetgt pointers */
168 struct aoetgt **tgt; /* target in use when working */ 189 struct aoetgt **tgt; /* target in use when working */
169 struct aoetgt *htgt; /* target needing rexmit assistance */
170 ulong ntargets;
171 ulong kicked; 190 ulong kicked;
191 char ident[512];
172}; 192};
173 193
174/* kthread tracking */ 194/* kthread tracking */
@@ -195,6 +215,7 @@ void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor);
195struct sk_buff *aoecmd_ata_rsp(struct sk_buff *); 215struct sk_buff *aoecmd_ata_rsp(struct sk_buff *);
196void aoecmd_cfg_rsp(struct sk_buff *); 216void aoecmd_cfg_rsp(struct sk_buff *);
197void aoecmd_sleepwork(struct work_struct *); 217void aoecmd_sleepwork(struct work_struct *);
218void aoecmd_wreset(struct aoetgt *t);
198void aoecmd_cleanslate(struct aoedev *); 219void aoecmd_cleanslate(struct aoedev *);
199void aoecmd_exit(void); 220void aoecmd_exit(void);
200int aoecmd_init(void); 221int aoecmd_init(void);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 00dfc5008ad..a129f8c8073 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -16,11 +16,19 @@
16#include <linux/netdevice.h> 16#include <linux/netdevice.h>
17#include <linux/mutex.h> 17#include <linux/mutex.h>
18#include <linux/export.h> 18#include <linux/export.h>
19#include <linux/moduleparam.h>
20#include <scsi/sg.h>
19#include "aoe.h" 21#include "aoe.h"
20 22
21static DEFINE_MUTEX(aoeblk_mutex); 23static DEFINE_MUTEX(aoeblk_mutex);
22static struct kmem_cache *buf_pool_cache; 24static struct kmem_cache *buf_pool_cache;
23 25
26/* GPFS needs a larger value than the default. */
27static int aoe_maxsectors;
28module_param(aoe_maxsectors, int, 0644);
29MODULE_PARM_DESC(aoe_maxsectors,
30 "When nonzero, set the maximum number of sectors per I/O request");
31
24static ssize_t aoedisk_show_state(struct device *dev, 32static ssize_t aoedisk_show_state(struct device *dev,
25 struct device_attribute *attr, char *page) 33 struct device_attribute *attr, char *page)
26{ 34{
@@ -59,7 +67,7 @@ static ssize_t aoedisk_show_netif(struct device *dev,
59 nd = nds; 67 nd = nds;
60 ne = nd + ARRAY_SIZE(nds); 68 ne = nd + ARRAY_SIZE(nds);
61 t = d->targets; 69 t = d->targets;
62 te = t + NTARGETS; 70 te = t + d->ntargets;
63 for (; t < te && *t; t++) { 71 for (; t < te && *t; t++) {
64 ifp = (*t)->ifs; 72 ifp = (*t)->ifs;
65 e = ifp + NAOEIFS; 73 e = ifp + NAOEIFS;
@@ -91,6 +99,14 @@ static ssize_t aoedisk_show_fwver(struct device *dev,
91 99
92 return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver); 100 return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver);
93} 101}
102static ssize_t aoedisk_show_payload(struct device *dev,
103 struct device_attribute *attr, char *page)
104{
105 struct gendisk *disk = dev_to_disk(dev);
106 struct aoedev *d = disk->private_data;
107
108 return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt);
109}
94 110
95static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL); 111static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
96static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL); 112static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
@@ -99,12 +115,14 @@ static struct device_attribute dev_attr_firmware_version = {
99 .attr = { .name = "firmware-version", .mode = S_IRUGO }, 115 .attr = { .name = "firmware-version", .mode = S_IRUGO },
100 .show = aoedisk_show_fwver, 116 .show = aoedisk_show_fwver,
101}; 117};
118static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL);
102 119
103static struct attribute *aoe_attrs[] = { 120static struct attribute *aoe_attrs[] = {
104 &dev_attr_state.attr, 121 &dev_attr_state.attr,
105 &dev_attr_mac.attr, 122 &dev_attr_mac.attr,
106 &dev_attr_netif.attr, 123 &dev_attr_netif.attr,
107 &dev_attr_firmware_version.attr, 124 &dev_attr_firmware_version.attr,
125 &dev_attr_payload.attr,
108 NULL, 126 NULL,
109}; 127};
110 128
@@ -129,9 +147,18 @@ aoeblk_open(struct block_device *bdev, fmode_t mode)
129 struct aoedev *d = bdev->bd_disk->private_data; 147 struct aoedev *d = bdev->bd_disk->private_data;
130 ulong flags; 148 ulong flags;
131 149
150 if (!virt_addr_valid(d)) {
151 pr_crit("aoe: invalid device pointer in %s\n",
152 __func__);
153 WARN_ON(1);
154 return -ENODEV;
155 }
156 if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL)
157 return -ENODEV;
158
132 mutex_lock(&aoeblk_mutex); 159 mutex_lock(&aoeblk_mutex);
133 spin_lock_irqsave(&d->lock, flags); 160 spin_lock_irqsave(&d->lock, flags);
134 if (d->flags & DEVFL_UP) { 161 if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) {
135 d->nopen++; 162 d->nopen++;
136 spin_unlock_irqrestore(&d->lock, flags); 163 spin_unlock_irqrestore(&d->lock, flags);
137 mutex_unlock(&aoeblk_mutex); 164 mutex_unlock(&aoeblk_mutex);
@@ -195,9 +222,38 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
195 return 0; 222 return 0;
196} 223}
197 224
225static int
226aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg)
227{
228 struct aoedev *d;
229
230 if (!arg)
231 return -EINVAL;
232
233 d = bdev->bd_disk->private_data;
234 if ((d->flags & DEVFL_UP) == 0) {
235 pr_err("aoe: disk not up\n");
236 return -ENODEV;
237 }
238
239 if (cmd == HDIO_GET_IDENTITY) {
240 if (!copy_to_user((void __user *) arg, &d->ident,
241 sizeof(d->ident)))
242 return 0;
243 return -EFAULT;
244 }
245
246 /* udev calls scsi_id, which uses SG_IO, resulting in noise */
247 if (cmd != SG_IO)
248 pr_info("aoe: unknown ioctl 0x%x\n", cmd);
249
250 return -ENOTTY;
251}
252
198static const struct block_device_operations aoe_bdops = { 253static const struct block_device_operations aoe_bdops = {
199 .open = aoeblk_open, 254 .open = aoeblk_open,
200 .release = aoeblk_release, 255 .release = aoeblk_release,
256 .ioctl = aoeblk_ioctl,
201 .getgeo = aoeblk_getgeo, 257 .getgeo = aoeblk_getgeo,
202 .owner = THIS_MODULE, 258 .owner = THIS_MODULE,
203}; 259};
@@ -212,6 +268,18 @@ aoeblk_gdalloc(void *vp)
212 struct request_queue *q; 268 struct request_queue *q;
213 enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, }; 269 enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, };
214 ulong flags; 270 ulong flags;
271 int late = 0;
272
273 spin_lock_irqsave(&d->lock, flags);
274 if (d->flags & DEVFL_GDALLOC
275 && !(d->flags & DEVFL_TKILL)
276 && !(d->flags & DEVFL_GD_NOW))
277 d->flags |= DEVFL_GD_NOW;
278 else
279 late = 1;
280 spin_unlock_irqrestore(&d->lock, flags);
281 if (late)
282 return;
215 283
216 gd = alloc_disk(AOE_PARTITIONS); 284 gd = alloc_disk(AOE_PARTITIONS);
217 if (gd == NULL) { 285 if (gd == NULL) {
@@ -231,23 +299,24 @@ aoeblk_gdalloc(void *vp)
231 if (q == NULL) { 299 if (q == NULL) {
232 pr_err("aoe: cannot allocate block queue for %ld.%d\n", 300 pr_err("aoe: cannot allocate block queue for %ld.%d\n",
233 d->aoemajor, d->aoeminor); 301 d->aoemajor, d->aoeminor);
234 mempool_destroy(mp); 302 goto err_mempool;
235 goto err_disk;
236 } 303 }
237 304
238 d->blkq = blk_alloc_queue(GFP_KERNEL);
239 if (!d->blkq)
240 goto err_mempool;
241 d->blkq->backing_dev_info.name = "aoe";
242 if (bdi_init(&d->blkq->backing_dev_info))
243 goto err_blkq;
244 spin_lock_irqsave(&d->lock, flags); 305 spin_lock_irqsave(&d->lock, flags);
245 blk_queue_max_hw_sectors(d->blkq, BLK_DEF_MAX_SECTORS); 306 WARN_ON(!(d->flags & DEVFL_GD_NOW));
307 WARN_ON(!(d->flags & DEVFL_GDALLOC));
308 WARN_ON(d->flags & DEVFL_TKILL);
309 WARN_ON(d->gd);
310 WARN_ON(d->flags & DEVFL_UP);
311 blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
312 q->backing_dev_info.name = "aoe";
246 q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; 313 q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
247 d->bufpool = mp; 314 d->bufpool = mp;
248 d->blkq = gd->queue = q; 315 d->blkq = gd->queue = q;
249 q->queuedata = d; 316 q->queuedata = d;
250 d->gd = gd; 317 d->gd = gd;
318 if (aoe_maxsectors)
319 blk_queue_max_hw_sectors(q, aoe_maxsectors);
251 gd->major = AOE_MAJOR; 320 gd->major = AOE_MAJOR;
252 gd->first_minor = d->sysminor; 321 gd->first_minor = d->sysminor;
253 gd->fops = &aoe_bdops; 322 gd->fops = &aoe_bdops;
@@ -263,18 +332,21 @@ aoeblk_gdalloc(void *vp)
263 332
264 add_disk(gd); 333 add_disk(gd);
265 aoedisk_add_sysfs(d); 334 aoedisk_add_sysfs(d);
335
336 spin_lock_irqsave(&d->lock, flags);
337 WARN_ON(!(d->flags & DEVFL_GD_NOW));
338 d->flags &= ~DEVFL_GD_NOW;
339 spin_unlock_irqrestore(&d->lock, flags);
266 return; 340 return;
267 341
268err_blkq:
269 blk_cleanup_queue(d->blkq);
270 d->blkq = NULL;
271err_mempool: 342err_mempool:
272 mempool_destroy(d->bufpool); 343 mempool_destroy(mp);
273err_disk: 344err_disk:
274 put_disk(gd); 345 put_disk(gd);
275err: 346err:
276 spin_lock_irqsave(&d->lock, flags); 347 spin_lock_irqsave(&d->lock, flags);
277 d->flags &= ~DEVFL_GDALLOC; 348 d->flags &= ~DEVFL_GD_NOW;
349 schedule_work(&d->work);
278 spin_unlock_irqrestore(&d->lock, flags); 350 spin_unlock_irqrestore(&d->lock, flags);
279} 351}
280 352
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index ed57a890c64..42e67ad6bd2 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -39,6 +39,11 @@ struct ErrMsg {
39}; 39};
40 40
41static DEFINE_MUTEX(aoechr_mutex); 41static DEFINE_MUTEX(aoechr_mutex);
42
43/* A ring buffer of error messages, to be read through
44 * "/dev/etherd/err". When no messages are present,
45 * readers will block waiting for messages to appear.
46 */
42static struct ErrMsg emsgs[NMSG]; 47static struct ErrMsg emsgs[NMSG];
43static int emsgs_head_idx, emsgs_tail_idx; 48static int emsgs_head_idx, emsgs_tail_idx;
44static struct completion emsgs_comp; 49static struct completion emsgs_comp;
@@ -282,7 +287,7 @@ aoechr_init(void)
282 int n, i; 287 int n, i;
283 288
284 n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops); 289 n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops);
285 if (n < 0) { 290 if (n < 0) {
286 printk(KERN_ERR "aoe: can't register char device\n"); 291 printk(KERN_ERR "aoe: can't register char device\n");
287 return n; 292 return n;
288 } 293 }
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 9fe4f186555..25ef5c014fc 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -22,6 +22,7 @@
22#define MAXIOC (8192) /* default meant to avoid most soft lockups */ 22#define MAXIOC (8192) /* default meant to avoid most soft lockups */
23 23
24static void ktcomplete(struct frame *, struct sk_buff *); 24static void ktcomplete(struct frame *, struct sk_buff *);
25static int count_targets(struct aoedev *d, int *untainted);
25 26
26static struct buf *nextbuf(struct aoedev *); 27static struct buf *nextbuf(struct aoedev *);
27 28
@@ -29,7 +30,7 @@ static int aoe_deadsecs = 60 * 3;
29module_param(aoe_deadsecs, int, 0644); 30module_param(aoe_deadsecs, int, 0644);
30MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev."); 31MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
31 32
32static int aoe_maxout = 16; 33static int aoe_maxout = 64;
33module_param(aoe_maxout, int, 0644); 34module_param(aoe_maxout, int, 0644);
34MODULE_PARM_DESC(aoe_maxout, 35MODULE_PARM_DESC(aoe_maxout,
35 "Only aoe_maxout outstanding packets for every MAC on eX.Y."); 36 "Only aoe_maxout outstanding packets for every MAC on eX.Y.");
@@ -43,6 +44,8 @@ static struct {
43 spinlock_t lock; 44 spinlock_t lock;
44} iocq; 45} iocq;
45 46
47static struct page *empty_page;
48
46static struct sk_buff * 49static struct sk_buff *
47new_skb(ulong len) 50new_skb(ulong len)
48{ 51{
@@ -59,6 +62,23 @@ new_skb(ulong len)
59} 62}
60 63
61static struct frame * 64static struct frame *
65getframe_deferred(struct aoedev *d, u32 tag)
66{
67 struct list_head *head, *pos, *nx;
68 struct frame *f;
69
70 head = &d->rexmitq;
71 list_for_each_safe(pos, nx, head) {
72 f = list_entry(pos, struct frame, head);
73 if (f->tag == tag) {
74 list_del(pos);
75 return f;
76 }
77 }
78 return NULL;
79}
80
81static struct frame *
62getframe(struct aoedev *d, u32 tag) 82getframe(struct aoedev *d, u32 tag)
63{ 83{
64 struct frame *f; 84 struct frame *f;
@@ -162,8 +182,10 @@ aoe_freetframe(struct frame *f)
162 182
163 t = f->t; 183 t = f->t;
164 f->buf = NULL; 184 f->buf = NULL;
185 f->lba = 0;
165 f->bv = NULL; 186 f->bv = NULL;
166 f->r_skb = NULL; 187 f->r_skb = NULL;
188 f->flags = 0;
167 list_add(&f->head, &t->ffree); 189 list_add(&f->head, &t->ffree);
168} 190}
169 191
@@ -217,20 +239,25 @@ newframe(struct aoedev *d)
217 struct frame *f; 239 struct frame *f;
218 struct aoetgt *t, **tt; 240 struct aoetgt *t, **tt;
219 int totout = 0; 241 int totout = 0;
242 int use_tainted;
243 int has_untainted;
220 244
221 if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */ 245 if (!d->targets || !d->targets[0]) {
222 printk(KERN_ERR "aoe: NULL TARGETS!\n"); 246 printk(KERN_ERR "aoe: NULL TARGETS!\n");
223 return NULL; 247 return NULL;
224 } 248 }
225 tt = d->tgt; /* last used target */ 249 tt = d->tgt; /* last used target */
226 for (;;) { 250 for (use_tainted = 0, has_untainted = 0;;) {
227 tt++; 251 tt++;
228 if (tt >= &d->targets[NTARGETS] || !*tt) 252 if (tt >= &d->targets[d->ntargets] || !*tt)
229 tt = d->targets; 253 tt = d->targets;
230 t = *tt; 254 t = *tt;
231 totout += t->nout; 255 if (!t->taint) {
256 has_untainted = 1;
257 totout += t->nout;
258 }
232 if (t->nout < t->maxout 259 if (t->nout < t->maxout
233 && t != d->htgt 260 && (use_tainted || !t->taint)
234 && t->ifp->nd) { 261 && t->ifp->nd) {
235 f = newtframe(d, t); 262 f = newtframe(d, t);
236 if (f) { 263 if (f) {
@@ -239,8 +266,12 @@ newframe(struct aoedev *d)
239 return f; 266 return f;
240 } 267 }
241 } 268 }
242 if (tt == d->tgt) /* we've looped and found nada */ 269 if (tt == d->tgt) { /* we've looped and found nada */
243 break; 270 if (!use_tainted && !has_untainted)
271 use_tainted = 1;
272 else
273 break;
274 }
244 } 275 }
245 if (totout == 0) { 276 if (totout == 0) {
246 d->kicked++; 277 d->kicked++;
@@ -277,21 +308,68 @@ fhash(struct frame *f)
277 list_add_tail(&f->head, &d->factive[n]); 308 list_add_tail(&f->head, &d->factive[n]);
278} 309}
279 310
311static void
312ata_rw_frameinit(struct frame *f)
313{
314 struct aoetgt *t;
315 struct aoe_hdr *h;
316 struct aoe_atahdr *ah;
317 struct sk_buff *skb;
318 char writebit, extbit;
319
320 skb = f->skb;
321 h = (struct aoe_hdr *) skb_mac_header(skb);
322 ah = (struct aoe_atahdr *) (h + 1);
323 skb_put(skb, sizeof(*h) + sizeof(*ah));
324 memset(h, 0, skb->len);
325
326 writebit = 0x10;
327 extbit = 0x4;
328
329 t = f->t;
330 f->tag = aoehdr_atainit(t->d, t, h);
331 fhash(f);
332 t->nout++;
333 f->waited = 0;
334 f->waited_total = 0;
335 if (f->buf)
336 f->lba = f->buf->sector;
337
338 /* set up ata header */
339 ah->scnt = f->bcnt >> 9;
340 put_lba(ah, f->lba);
341 if (t->d->flags & DEVFL_EXT) {
342 ah->aflags |= AOEAFL_EXT;
343 } else {
344 extbit = 0;
345 ah->lba3 &= 0x0f;
346 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
347 }
348 if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
349 skb_fillup(skb, f->bv, f->bv_off, f->bcnt);
350 ah->aflags |= AOEAFL_WRITE;
351 skb->len += f->bcnt;
352 skb->data_len = f->bcnt;
353 skb->truesize += f->bcnt;
354 t->wpkts++;
355 } else {
356 t->rpkts++;
357 writebit = 0;
358 }
359
360 ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
361 skb->dev = t->ifp->nd;
362}
363
280static int 364static int
281aoecmd_ata_rw(struct aoedev *d) 365aoecmd_ata_rw(struct aoedev *d)
282{ 366{
283 struct frame *f; 367 struct frame *f;
284 struct aoe_hdr *h;
285 struct aoe_atahdr *ah;
286 struct buf *buf; 368 struct buf *buf;
287 struct aoetgt *t; 369 struct aoetgt *t;
288 struct sk_buff *skb; 370 struct sk_buff *skb;
289 struct sk_buff_head queue; 371 struct sk_buff_head queue;
290 ulong bcnt, fbcnt; 372 ulong bcnt, fbcnt;
291 char writebit, extbit;
292
293 writebit = 0x10;
294 extbit = 0x4;
295 373
296 buf = nextbuf(d); 374 buf = nextbuf(d);
297 if (buf == NULL) 375 if (buf == NULL)
@@ -326,50 +404,18 @@ aoecmd_ata_rw(struct aoedev *d)
326 } while (fbcnt); 404 } while (fbcnt);
327 405
328 /* initialize the headers & frame */ 406 /* initialize the headers & frame */
329 skb = f->skb;
330 h = (struct aoe_hdr *) skb_mac_header(skb);
331 ah = (struct aoe_atahdr *) (h+1);
332 skb_put(skb, sizeof *h + sizeof *ah);
333 memset(h, 0, skb->len);
334 f->tag = aoehdr_atainit(d, t, h);
335 fhash(f);
336 t->nout++;
337 f->waited = 0;
338 f->buf = buf; 407 f->buf = buf;
339 f->bcnt = bcnt; 408 f->bcnt = bcnt;
340 f->lba = buf->sector; 409 ata_rw_frameinit(f);
341
342 /* set up ata header */
343 ah->scnt = bcnt >> 9;
344 put_lba(ah, buf->sector);
345 if (d->flags & DEVFL_EXT) {
346 ah->aflags |= AOEAFL_EXT;
347 } else {
348 extbit = 0;
349 ah->lba3 &= 0x0f;
350 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
351 }
352 if (bio_data_dir(buf->bio) == WRITE) {
353 skb_fillup(skb, f->bv, f->bv_off, bcnt);
354 ah->aflags |= AOEAFL_WRITE;
355 skb->len += bcnt;
356 skb->data_len = bcnt;
357 skb->truesize += bcnt;
358 t->wpkts++;
359 } else {
360 t->rpkts++;
361 writebit = 0;
362 }
363
364 ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
365 410
366 /* mark all tracking fields and load out */ 411 /* mark all tracking fields and load out */
367 buf->nframesout += 1; 412 buf->nframesout += 1;
368 buf->sector += bcnt >> 9; 413 buf->sector += bcnt >> 9;
369 414
370 skb->dev = t->ifp->nd; 415 skb = skb_clone(f->skb, GFP_ATOMIC);
371 skb = skb_clone(skb, GFP_ATOMIC);
372 if (skb) { 416 if (skb) {
417 do_gettimeofday(&f->sent);
418 f->sent_jiffs = (u32) jiffies;
373 __skb_queue_head_init(&queue); 419 __skb_queue_head_init(&queue);
374 __skb_queue_tail(&queue, skb); 420 __skb_queue_tail(&queue, skb);
375 aoenet_xmit(&queue); 421 aoenet_xmit(&queue);
@@ -442,11 +488,14 @@ resend(struct aoedev *d, struct frame *f)
442 h = (struct aoe_hdr *) skb_mac_header(skb); 488 h = (struct aoe_hdr *) skb_mac_header(skb);
443 ah = (struct aoe_atahdr *) (h+1); 489 ah = (struct aoe_atahdr *) (h+1);
444 490
445 snprintf(buf, sizeof buf, 491 if (!(f->flags & FFL_PROBE)) {
446 "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n", 492 snprintf(buf, sizeof(buf),
447 "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n, 493 "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
448 h->src, h->dst, t->nout); 494 "retransmit", d->aoemajor, d->aoeminor,
449 aoechr_error(buf); 495 f->tag, jiffies, n,
496 h->src, h->dst, t->nout);
497 aoechr_error(buf);
498 }
450 499
451 f->tag = n; 500 f->tag = n;
452 fhash(f); 501 fhash(f);
@@ -458,12 +507,46 @@ resend(struct aoedev *d, struct frame *f)
458 skb = skb_clone(skb, GFP_ATOMIC); 507 skb = skb_clone(skb, GFP_ATOMIC);
459 if (skb == NULL) 508 if (skb == NULL)
460 return; 509 return;
510 do_gettimeofday(&f->sent);
511 f->sent_jiffs = (u32) jiffies;
461 __skb_queue_head_init(&queue); 512 __skb_queue_head_init(&queue);
462 __skb_queue_tail(&queue, skb); 513 __skb_queue_tail(&queue, skb);
463 aoenet_xmit(&queue); 514 aoenet_xmit(&queue);
464} 515}
465 516
466static int 517static int
518tsince_hr(struct frame *f)
519{
520 struct timeval now;
521 int n;
522
523 do_gettimeofday(&now);
524 n = now.tv_usec - f->sent.tv_usec;
525 n += (now.tv_sec - f->sent.tv_sec) * USEC_PER_SEC;
526
527 if (n < 0)
528 n = -n;
529
530 /* For relatively long periods, use jiffies to avoid
531 * discrepancies caused by updates to the system time.
532 *
533 * On system with HZ of 1000, 32-bits is over 49 days
534 * worth of jiffies, or over 71 minutes worth of usecs.
535 *
536 * Jiffies overflow is handled by subtraction of unsigned ints:
537 * (gdb) print (unsigned) 2 - (unsigned) 0xfffffffe
538 * $3 = 4
539 * (gdb)
540 */
541 if (n > USEC_PER_SEC / 4) {
542 n = ((u32) jiffies) - f->sent_jiffs;
543 n *= USEC_PER_SEC / HZ;
544 }
545
546 return n;
547}
548
549static int
467tsince(u32 tag) 550tsince(u32 tag)
468{ 551{
469 int n; 552 int n;
@@ -472,7 +555,7 @@ tsince(u32 tag)
472 n -= tag & 0xffff; 555 n -= tag & 0xffff;
473 if (n < 0) 556 if (n < 0)
474 n += 1<<16; 557 n += 1<<16;
475 return n; 558 return jiffies_to_usecs(n + 1);
476} 559}
477 560
478static struct aoeif * 561static struct aoeif *
@@ -503,70 +586,189 @@ ejectif(struct aoetgt *t, struct aoeif *ifp)
503 dev_put(nd); 586 dev_put(nd);
504} 587}
505 588
506static int 589static struct frame *
507sthtith(struct aoedev *d) 590reassign_frame(struct frame *f)
508{ 591{
509 struct frame *f, *nf; 592 struct frame *nf;
510 struct list_head *nx, *pos, *head;
511 struct sk_buff *skb; 593 struct sk_buff *skb;
512 struct aoetgt *ht = d->htgt;
513 int i;
514 594
515 for (i = 0; i < NFACTIVE; i++) { 595 nf = newframe(f->t->d);
516 head = &d->factive[i]; 596 if (!nf)
517 list_for_each_safe(pos, nx, head) { 597 return NULL;
518 f = list_entry(pos, struct frame, head); 598 if (nf->t == f->t) {
519 if (f->t != ht) 599 aoe_freetframe(nf);
520 continue; 600 return NULL;
601 }
521 602
522 nf = newframe(d); 603 skb = nf->skb;
523 if (!nf) 604 nf->skb = f->skb;
524 return 0; 605 nf->buf = f->buf;
606 nf->bcnt = f->bcnt;
607 nf->lba = f->lba;
608 nf->bv = f->bv;
609 nf->bv_off = f->bv_off;
610 nf->waited = 0;
611 nf->waited_total = f->waited_total;
612 nf->sent = f->sent;
613 nf->sent_jiffs = f->sent_jiffs;
614 f->skb = skb;
615
616 return nf;
617}
525 618
526 /* remove frame from active list */ 619static void
527 list_del(pos); 620probe(struct aoetgt *t)
621{
622 struct aoedev *d;
623 struct frame *f;
624 struct sk_buff *skb;
625 struct sk_buff_head queue;
626 size_t n, m;
627 int frag;
528 628
529 /* reassign all pertinent bits to new outbound frame */ 629 d = t->d;
530 skb = nf->skb; 630 f = newtframe(d, t);
531 nf->skb = f->skb; 631 if (!f) {
532 nf->buf = f->buf; 632 pr_err("%s %pm for e%ld.%d: %s\n",
533 nf->bcnt = f->bcnt; 633 "aoe: cannot probe remote address",
534 nf->lba = f->lba; 634 t->addr,
535 nf->bv = f->bv; 635 (long) d->aoemajor, d->aoeminor,
536 nf->bv_off = f->bv_off; 636 "no frame available");
537 nf->waited = 0; 637 return;
538 f->skb = skb; 638 }
639 f->flags |= FFL_PROBE;
640 ifrotate(t);
641 f->bcnt = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
642 ata_rw_frameinit(f);
643 skb = f->skb;
644 for (frag = 0, n = f->bcnt; n > 0; ++frag, n -= m) {
645 if (n < PAGE_SIZE)
646 m = n;
647 else
648 m = PAGE_SIZE;
649 skb_fill_page_desc(skb, frag, empty_page, 0, m);
650 }
651 skb->len += f->bcnt;
652 skb->data_len = f->bcnt;
653 skb->truesize += f->bcnt;
654
655 skb = skb_clone(f->skb, GFP_ATOMIC);
656 if (skb) {
657 do_gettimeofday(&f->sent);
658 f->sent_jiffs = (u32) jiffies;
659 __skb_queue_head_init(&queue);
660 __skb_queue_tail(&queue, skb);
661 aoenet_xmit(&queue);
662 }
663}
664
665static long
666rto(struct aoedev *d)
667{
668 long t;
669
670 t = 2 * d->rttavg >> RTTSCALE;
671 t += 8 * d->rttdev >> RTTDSCALE;
672 if (t == 0)
673 t = 1;
674
675 return t;
676}
677
678static void
679rexmit_deferred(struct aoedev *d)
680{
681 struct aoetgt *t;
682 struct frame *f;
683 struct frame *nf;
684 struct list_head *pos, *nx, *head;
685 int since;
686 int untainted;
687
688 count_targets(d, &untainted);
689
690 head = &d->rexmitq;
691 list_for_each_safe(pos, nx, head) {
692 f = list_entry(pos, struct frame, head);
693 t = f->t;
694 if (t->taint) {
695 if (!(f->flags & FFL_PROBE)) {
696 nf = reassign_frame(f);
697 if (nf) {
698 if (t->nout_probes == 0
699 && untainted > 0) {
700 probe(t);
701 t->nout_probes++;
702 }
703 list_replace(&f->head, &nf->head);
704 pos = &nf->head;
705 aoe_freetframe(f);
706 f = nf;
707 t = f->t;
708 }
709 } else if (untainted < 1) {
710 /* don't probe w/o other untainted aoetgts */
711 goto stop_probe;
712 } else if (tsince_hr(f) < t->taint * rto(d)) {
713 /* reprobe slowly when taint is high */
714 continue;
715 }
716 } else if (f->flags & FFL_PROBE) {
717stop_probe: /* don't probe untainted aoetgts */
718 list_del(pos);
539 aoe_freetframe(f); 719 aoe_freetframe(f);
540 ht->nout--; 720 /* leaving d->kicked, because this is routine */
541 nf->t->nout++; 721 f->t->d->flags |= DEVFL_KICKME;
542 resend(d, nf); 722 continue;
543 } 723 }
724 if (t->nout >= t->maxout)
725 continue;
726 list_del(pos);
727 t->nout++;
728 if (f->flags & FFL_PROBE)
729 t->nout_probes++;
730 since = tsince_hr(f);
731 f->waited += since;
732 f->waited_total += since;
733 resend(d, f);
544 } 734 }
545 /* We've cleaned up the outstanding so take away his
546 * interfaces so he won't be used. We should remove him from
547 * the target array here, but cleaning up a target is
548 * involved. PUNT!
549 */
550 memset(ht->ifs, 0, sizeof ht->ifs);
551 d->htgt = NULL;
552 return 1;
553} 735}
554 736
555static inline unsigned char 737/* An aoetgt accumulates demerits quickly, and successful
556ata_scnt(unsigned char *packet) { 738 * probing redeems the aoetgt slowly.
557 struct aoe_hdr *h; 739 */
558 struct aoe_atahdr *ah; 740static void
741scorn(struct aoetgt *t)
742{
743 int n;
559 744
560 h = (struct aoe_hdr *) packet; 745 n = t->taint++;
561 ah = (struct aoe_atahdr *) (h+1); 746 t->taint += t->taint * 2;
562 return ah->scnt; 747 if (n > t->taint)
748 t->taint = n;
749 if (t->taint > MAX_TAINT)
750 t->taint = MAX_TAINT;
751}
752
753static int
754count_targets(struct aoedev *d, int *untainted)
755{
756 int i, good;
757
758 for (i = good = 0; i < d->ntargets && d->targets[i]; ++i)
759 if (d->targets[i]->taint == 0)
760 good++;
761
762 if (untainted)
763 *untainted = good;
764 return i;
563} 765}
564 766
565static void 767static void
566rexmit_timer(ulong vp) 768rexmit_timer(ulong vp)
567{ 769{
568 struct aoedev *d; 770 struct aoedev *d;
569 struct aoetgt *t, **tt, **te; 771 struct aoetgt *t;
570 struct aoeif *ifp; 772 struct aoeif *ifp;
571 struct frame *f; 773 struct frame *f;
572 struct list_head *head, *pos, *nx; 774 struct list_head *head, *pos, *nx;
@@ -574,15 +776,18 @@ rexmit_timer(ulong vp)
574 register long timeout; 776 register long timeout;
575 ulong flags, n; 777 ulong flags, n;
576 int i; 778 int i;
779 int utgts; /* number of aoetgt descriptors (not slots) */
780 int since;
577 781
578 d = (struct aoedev *) vp; 782 d = (struct aoedev *) vp;
579 783
580 /* timeout is always ~150% of the moving average */
581 timeout = d->rttavg;
582 timeout += timeout >> 1;
583
584 spin_lock_irqsave(&d->lock, flags); 784 spin_lock_irqsave(&d->lock, flags);
585 785
786 /* timeout based on observed timings and variations */
787 timeout = rto(d);
788
789 utgts = count_targets(d, NULL);
790
586 if (d->flags & DEVFL_TKILL) { 791 if (d->flags & DEVFL_TKILL) {
587 spin_unlock_irqrestore(&d->lock, flags); 792 spin_unlock_irqrestore(&d->lock, flags);
588 return; 793 return;
@@ -593,67 +798,61 @@ rexmit_timer(ulong vp)
593 head = &d->factive[i]; 798 head = &d->factive[i];
594 list_for_each_safe(pos, nx, head) { 799 list_for_each_safe(pos, nx, head) {
595 f = list_entry(pos, struct frame, head); 800 f = list_entry(pos, struct frame, head);
596 if (tsince(f->tag) < timeout) 801 if (tsince_hr(f) < timeout)
597 break; /* end of expired frames */ 802 break; /* end of expired frames */
598 /* move to flist for later processing */ 803 /* move to flist for later processing */
599 list_move_tail(pos, &flist); 804 list_move_tail(pos, &flist);
600 } 805 }
601 } 806 }
602 /* window check */
603 tt = d->targets;
604 te = tt + d->ntargets;
605 for (; tt < te && (t = *tt); tt++) {
606 if (t->nout == t->maxout
607 && t->maxout < t->nframes
608 && (jiffies - t->lastwadj)/HZ > 10) {
609 t->maxout++;
610 t->lastwadj = jiffies;
611 }
612 }
613
614 if (!list_empty(&flist)) { /* retransmissions necessary */
615 n = d->rttavg <<= 1;
616 if (n > MAXTIMER)
617 d->rttavg = MAXTIMER;
618 }
619 807
620 /* process expired frames */ 808 /* process expired frames */
621 while (!list_empty(&flist)) { 809 while (!list_empty(&flist)) {
622 pos = flist.next; 810 pos = flist.next;
623 f = list_entry(pos, struct frame, head); 811 f = list_entry(pos, struct frame, head);
624 n = f->waited += timeout; 812 since = tsince_hr(f);
625 n /= HZ; 813 n = f->waited_total + since;
626 if (n > aoe_deadsecs) { 814 n /= USEC_PER_SEC;
815 if (aoe_deadsecs
816 && n > aoe_deadsecs
817 && !(f->flags & FFL_PROBE)) {
627 /* Waited too long. Device failure. 818 /* Waited too long. Device failure.
628 * Hang all frames on first hash bucket for downdev 819 * Hang all frames on first hash bucket for downdev
629 * to clean up. 820 * to clean up.
630 */ 821 */
631 list_splice(&flist, &d->factive[0]); 822 list_splice(&flist, &d->factive[0]);
632 aoedev_downdev(d); 823 aoedev_downdev(d);
633 break; 824 goto out;
634 } 825 }
635 list_del(pos);
636 826
637 t = f->t; 827 t = f->t;
638 if (n > aoe_deadsecs/2) 828 n = f->waited + since;
639 d->htgt = t; /* see if another target can help */ 829 n /= USEC_PER_SEC;
640 830 if (aoe_deadsecs && utgts > 0
641 if (t->nout == t->maxout) { 831 && (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS))
642 if (t->maxout > 1) 832 scorn(t); /* avoid this target */
643 t->maxout--; 833
644 t->lastwadj = jiffies; 834 if (t->maxout != 1) {
835 t->ssthresh = t->maxout / 2;
836 t->maxout = 1;
645 } 837 }
646 838
647 ifp = getif(t, f->skb->dev); 839 if (f->flags & FFL_PROBE) {
648 if (ifp && ++ifp->lost > (t->nframes << 1) 840 t->nout_probes--;
649 && (ifp != t->ifs || t->ifs[1].nd)) { 841 } else {
650 ejectif(t, ifp); 842 ifp = getif(t, f->skb->dev);
651 ifp = NULL; 843 if (ifp && ++ifp->lost > (t->nframes << 1)
844 && (ifp != t->ifs || t->ifs[1].nd)) {
845 ejectif(t, ifp);
846 ifp = NULL;
847 }
652 } 848 }
653 resend(d, f); 849 list_move_tail(pos, &d->rexmitq);
850 t->nout--;
654 } 851 }
852 rexmit_deferred(d);
655 853
656 if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) { 854out:
855 if ((d->flags & DEVFL_KICKME) && d->blkq) {
657 d->flags &= ~DEVFL_KICKME; 856 d->flags &= ~DEVFL_KICKME;
658 d->blkq->request_fn(d->blkq); 857 d->blkq->request_fn(d->blkq);
659 } 858 }
@@ -774,8 +973,7 @@ nextbuf(struct aoedev *d)
774void 973void
775aoecmd_work(struct aoedev *d) 974aoecmd_work(struct aoedev *d)
776{ 975{
777 if (d->htgt && !sthtith(d)) 976 rexmit_deferred(d);
778 return;
779 while (aoecmd_ata_rw(d)) 977 while (aoecmd_ata_rw(d))
780 ; 978 ;
781} 979}
@@ -809,6 +1007,17 @@ aoecmd_sleepwork(struct work_struct *work)
809} 1007}
810 1008
811static void 1009static void
1010ata_ident_fixstring(u16 *id, int ns)
1011{
1012 u16 s;
1013
1014 while (ns-- > 0) {
1015 s = *id;
1016 *id++ = s >> 8 | s << 8;
1017 }
1018}
1019
1020static void
812ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) 1021ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
813{ 1022{
814 u64 ssize; 1023 u64 ssize;
@@ -843,6 +1052,11 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
843 d->geo.sectors = get_unaligned_le16(&id[56 << 1]); 1052 d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
844 } 1053 }
845 1054
1055 ata_ident_fixstring((u16 *) &id[10<<1], 10); /* serial */
1056 ata_ident_fixstring((u16 *) &id[23<<1], 4); /* firmware */
1057 ata_ident_fixstring((u16 *) &id[27<<1], 20); /* model */
1058 memcpy(d->ident, id, sizeof(d->ident));
1059
846 if (d->ssize != ssize) 1060 if (d->ssize != ssize)
847 printk(KERN_INFO 1061 printk(KERN_INFO
848 "aoe: %pm e%ld.%d v%04x has %llu sectors\n", 1062 "aoe: %pm e%ld.%d v%04x has %llu sectors\n",
@@ -862,26 +1076,28 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
862} 1076}
863 1077
864static void 1078static void
865calc_rttavg(struct aoedev *d, int rtt) 1079calc_rttavg(struct aoedev *d, struct aoetgt *t, int rtt)
866{ 1080{
867 register long n; 1081 register long n;
868 1082
869 n = rtt; 1083 n = rtt;
870 if (n < 0) { 1084
871 n = -rtt; 1085 /* cf. Congestion Avoidance and Control, Jacobson & Karels, 1988 */
872 if (n < MINTIMER) 1086 n -= d->rttavg >> RTTSCALE;
873 n = MINTIMER; 1087 d->rttavg += n;
874 else if (n > MAXTIMER) 1088 if (n < 0)
875 n = MAXTIMER; 1089 n = -n;
876 d->mintimer += (n - d->mintimer) >> 1; 1090 n -= d->rttdev >> RTTDSCALE;
877 } else if (n < d->mintimer) 1091 d->rttdev += n;
878 n = d->mintimer; 1092
879 else if (n > MAXTIMER) 1093 if (!t || t->maxout >= t->nframes)
880 n = MAXTIMER; 1094 return;
881 1095 if (t->maxout < t->ssthresh)
882 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */ 1096 t->maxout += 1;
883 n -= d->rttavg; 1097 else if (t->nout == t->maxout && t->next_cwnd-- == 0) {
884 d->rttavg += n >> 2; 1098 t->maxout += 1;
1099 t->next_cwnd = t->maxout;
1100 }
885} 1101}
886 1102
887static struct aoetgt * 1103static struct aoetgt *
@@ -890,7 +1106,7 @@ gettgt(struct aoedev *d, char *addr)
890 struct aoetgt **t, **e; 1106 struct aoetgt **t, **e;
891 1107
892 t = d->targets; 1108 t = d->targets;
893 e = t + NTARGETS; 1109 e = t + d->ntargets;
894 for (; t < e && *t; t++) 1110 for (; t < e && *t; t++)
895 if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0) 1111 if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
896 return *t; 1112 return *t;
@@ -966,19 +1182,22 @@ ktiocomplete(struct frame *f)
966 struct aoeif *ifp; 1182 struct aoeif *ifp;
967 struct aoedev *d; 1183 struct aoedev *d;
968 long n; 1184 long n;
1185 int untainted;
969 1186
970 if (f == NULL) 1187 if (f == NULL)
971 return; 1188 return;
972 1189
973 t = f->t; 1190 t = f->t;
974 d = t->d; 1191 d = t->d;
1192 skb = f->r_skb;
1193 buf = f->buf;
1194 if (f->flags & FFL_PROBE)
1195 goto out;
1196 if (!skb) /* just fail the buf. */
1197 goto noskb;
975 1198
976 hout = (struct aoe_hdr *) skb_mac_header(f->skb); 1199 hout = (struct aoe_hdr *) skb_mac_header(f->skb);
977 ahout = (struct aoe_atahdr *) (hout+1); 1200 ahout = (struct aoe_atahdr *) (hout+1);
978 buf = f->buf;
979 skb = f->r_skb;
980 if (skb == NULL)
981 goto noskb; /* just fail the buf. */
982 1201
983 hin = (struct aoe_hdr *) skb->data; 1202 hin = (struct aoe_hdr *) skb->data;
984 skb_pull(skb, sizeof(*hin)); 1203 skb_pull(skb, sizeof(*hin));
@@ -988,9 +1207,9 @@ ktiocomplete(struct frame *f)
988 pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n", 1207 pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
989 ahout->cmdstat, ahin->cmdstat, 1208 ahout->cmdstat, ahin->cmdstat,
990 d->aoemajor, d->aoeminor); 1209 d->aoemajor, d->aoeminor);
991noskb: if (buf) 1210noskb: if (buf)
992 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); 1211 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
993 goto badrsp; 1212 goto out;
994 } 1213 }
995 1214
996 n = ahout->scnt << 9; 1215 n = ahout->scnt << 9;
@@ -998,8 +1217,10 @@ noskb: if (buf)
998 case ATA_CMD_PIO_READ: 1217 case ATA_CMD_PIO_READ:
999 case ATA_CMD_PIO_READ_EXT: 1218 case ATA_CMD_PIO_READ_EXT:
1000 if (skb->len < n) { 1219 if (skb->len < n) {
1001 pr_err("aoe: runt data size in read. skb->len=%d need=%ld\n", 1220 pr_err("%s e%ld.%d. skb->len=%d need=%ld\n",
1002 skb->len, n); 1221 "aoe: runt data size in read from",
1222 (long) d->aoemajor, d->aoeminor,
1223 skb->len, n);
1003 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); 1224 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
1004 break; 1225 break;
1005 } 1226 }
@@ -1010,13 +1231,13 @@ noskb: if (buf)
1010 ifp = getif(t, skb->dev); 1231 ifp = getif(t, skb->dev);
1011 if (ifp) 1232 if (ifp)
1012 ifp->lost = 0; 1233 ifp->lost = 0;
1013 if (d->htgt == t) /* I'll help myself, thank you. */
1014 d->htgt = NULL;
1015 spin_unlock_irq(&d->lock); 1234 spin_unlock_irq(&d->lock);
1016 break; 1235 break;
1017 case ATA_CMD_ID_ATA: 1236 case ATA_CMD_ID_ATA:
1018 if (skb->len < 512) { 1237 if (skb->len < 512) {
1019 pr_info("aoe: runt data size in ataid. skb->len=%d\n", 1238 pr_info("%s e%ld.%d. skb->len=%d need=512\n",
1239 "aoe: runt data size in ataid from",
1240 (long) d->aoemajor, d->aoeminor,
1020 skb->len); 1241 skb->len);
1021 break; 1242 break;
1022 } 1243 }
@@ -1032,16 +1253,23 @@ noskb: if (buf)
1032 be16_to_cpu(get_unaligned(&hin->major)), 1253 be16_to_cpu(get_unaligned(&hin->major)),
1033 hin->minor); 1254 hin->minor);
1034 } 1255 }
1035badrsp: 1256out:
1036 spin_lock_irq(&d->lock); 1257 spin_lock_irq(&d->lock);
1258 if (t->taint > 0
1259 && --t->taint > 0
1260 && t->nout_probes == 0) {
1261 count_targets(d, &untainted);
1262 if (untainted > 0) {
1263 probe(t);
1264 t->nout_probes++;
1265 }
1266 }
1037 1267
1038 aoe_freetframe(f); 1268 aoe_freetframe(f);
1039 1269
1040 if (buf && --buf->nframesout == 0 && buf->resid == 0) 1270 if (buf && --buf->nframesout == 0 && buf->resid == 0)
1041 aoe_end_buf(d, buf); 1271 aoe_end_buf(d, buf);
1042 1272
1043 aoecmd_work(d);
1044
1045 spin_unlock_irq(&d->lock); 1273 spin_unlock_irq(&d->lock);
1046 aoedev_put(d); 1274 aoedev_put(d);
1047 dev_kfree_skb(skb); 1275 dev_kfree_skb(skb);
@@ -1141,7 +1369,6 @@ aoecmd_ata_rsp(struct sk_buff *skb)
1141 struct aoedev *d; 1369 struct aoedev *d;
1142 struct aoe_hdr *h; 1370 struct aoe_hdr *h;
1143 struct frame *f; 1371 struct frame *f;
1144 struct aoetgt *t;
1145 u32 n; 1372 u32 n;
1146 ulong flags; 1373 ulong flags;
1147 char ebuf[128]; 1374 char ebuf[128];
@@ -1162,23 +1389,32 @@ aoecmd_ata_rsp(struct sk_buff *skb)
1162 1389
1163 n = be32_to_cpu(get_unaligned(&h->tag)); 1390 n = be32_to_cpu(get_unaligned(&h->tag));
1164 f = getframe(d, n); 1391 f = getframe(d, n);
1165 if (f == NULL) { 1392 if (f) {
1166 calc_rttavg(d, -tsince(n)); 1393 calc_rttavg(d, f->t, tsince_hr(f));
1167 spin_unlock_irqrestore(&d->lock, flags); 1394 f->t->nout--;
1168 aoedev_put(d); 1395 if (f->flags & FFL_PROBE)
1169 snprintf(ebuf, sizeof ebuf, 1396 f->t->nout_probes--;
1170 "%15s e%d.%d tag=%08x@%08lx\n", 1397 } else {
1171 "unexpected rsp", 1398 f = getframe_deferred(d, n);
1172 get_unaligned_be16(&h->major), 1399 if (f) {
1173 h->minor, 1400 calc_rttavg(d, NULL, tsince_hr(f));
1174 get_unaligned_be32(&h->tag), 1401 } else {
1175 jiffies); 1402 calc_rttavg(d, NULL, tsince(n));
1176 aoechr_error(ebuf); 1403 spin_unlock_irqrestore(&d->lock, flags);
1177 return skb; 1404 aoedev_put(d);
1405 snprintf(ebuf, sizeof(ebuf),
1406 "%15s e%d.%d tag=%08x@%08lx s=%pm d=%pm\n",
1407 "unexpected rsp",
1408 get_unaligned_be16(&h->major),
1409 h->minor,
1410 get_unaligned_be32(&h->tag),
1411 jiffies,
1412 h->src,
1413 h->dst);
1414 aoechr_error(ebuf);
1415 return skb;
1416 }
1178 } 1417 }
1179 t = f->t;
1180 calc_rttavg(d, tsince(f->tag));
1181 t->nout--;
1182 aoecmd_work(d); 1418 aoecmd_work(d);
1183 1419
1184 spin_unlock_irqrestore(&d->lock, flags); 1420 spin_unlock_irqrestore(&d->lock, flags);
@@ -1201,7 +1437,7 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
1201 aoecmd_cfg_pkts(aoemajor, aoeminor, &queue); 1437 aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
1202 aoenet_xmit(&queue); 1438 aoenet_xmit(&queue);
1203} 1439}
1204 1440
1205struct sk_buff * 1441struct sk_buff *
1206aoecmd_ata_id(struct aoedev *d) 1442aoecmd_ata_id(struct aoedev *d)
1207{ 1443{
@@ -1227,6 +1463,7 @@ aoecmd_ata_id(struct aoedev *d)
1227 fhash(f); 1463 fhash(f);
1228 t->nout++; 1464 t->nout++;
1229 f->waited = 0; 1465 f->waited = 0;
1466 f->waited_total = 0;
1230 1467
1231 /* set up ata header */ 1468 /* set up ata header */
1232 ah->scnt = 1; 1469 ah->scnt = 1;
@@ -1235,41 +1472,69 @@ aoecmd_ata_id(struct aoedev *d)
1235 1472
1236 skb->dev = t->ifp->nd; 1473 skb->dev = t->ifp->nd;
1237 1474
1238 d->rttavg = MAXTIMER; 1475 d->rttavg = RTTAVG_INIT;
1476 d->rttdev = RTTDEV_INIT;
1239 d->timer.function = rexmit_timer; 1477 d->timer.function = rexmit_timer;
1240 1478
1241 return skb_clone(skb, GFP_ATOMIC); 1479 skb = skb_clone(skb, GFP_ATOMIC);
1480 if (skb) {
1481 do_gettimeofday(&f->sent);
1482 f->sent_jiffs = (u32) jiffies;
1483 }
1484
1485 return skb;
1242} 1486}
1243 1487
1488static struct aoetgt **
1489grow_targets(struct aoedev *d)
1490{
1491 ulong oldn, newn;
1492 struct aoetgt **tt;
1493
1494 oldn = d->ntargets;
1495 newn = oldn * 2;
1496 tt = kcalloc(newn, sizeof(*d->targets), GFP_ATOMIC);
1497 if (!tt)
1498 return NULL;
1499 memmove(tt, d->targets, sizeof(*d->targets) * oldn);
1500 d->tgt = tt + (d->tgt - d->targets);
1501 kfree(d->targets);
1502 d->targets = tt;
1503 d->ntargets = newn;
1504
1505 return &d->targets[oldn];
1506}
1507
1244static struct aoetgt * 1508static struct aoetgt *
1245addtgt(struct aoedev *d, char *addr, ulong nframes) 1509addtgt(struct aoedev *d, char *addr, ulong nframes)
1246{ 1510{
1247 struct aoetgt *t, **tt, **te; 1511 struct aoetgt *t, **tt, **te;
1248 1512
1249 tt = d->targets; 1513 tt = d->targets;
1250 te = tt + NTARGETS; 1514 te = tt + d->ntargets;
1251 for (; tt < te && *tt; tt++) 1515 for (; tt < te && *tt; tt++)
1252 ; 1516 ;
1253 1517
1254 if (tt == te) { 1518 if (tt == te) {
1255 printk(KERN_INFO 1519 tt = grow_targets(d);
1256 "aoe: device addtgt failure; too many targets\n"); 1520 if (!tt)
1257 return NULL; 1521 goto nomem;
1258 } 1522 }
1259 t = kzalloc(sizeof(*t), GFP_ATOMIC); 1523 t = kzalloc(sizeof(*t), GFP_ATOMIC);
1260 if (!t) { 1524 if (!t)
1261 printk(KERN_INFO "aoe: cannot allocate memory to add target\n"); 1525 goto nomem;
1262 return NULL;
1263 }
1264
1265 d->ntargets++;
1266 t->nframes = nframes; 1526 t->nframes = nframes;
1267 t->d = d; 1527 t->d = d;
1268 memcpy(t->addr, addr, sizeof t->addr); 1528 memcpy(t->addr, addr, sizeof t->addr);
1269 t->ifp = t->ifs; 1529 t->ifp = t->ifs;
1270 t->maxout = t->nframes; 1530 aoecmd_wreset(t);
1531 t->maxout = t->nframes / 2;
1271 INIT_LIST_HEAD(&t->ffree); 1532 INIT_LIST_HEAD(&t->ffree);
1272 return *tt = t; 1533 return *tt = t;
1534
1535 nomem:
1536 pr_info("aoe: cannot allocate memory to add target\n");
1537 return NULL;
1273} 1538}
1274 1539
1275static void 1540static void
@@ -1279,7 +1544,7 @@ setdbcnt(struct aoedev *d)
1279 int bcnt = 0; 1544 int bcnt = 0;
1280 1545
1281 t = d->targets; 1546 t = d->targets;
1282 e = t + NTARGETS; 1547 e = t + d->ntargets;
1283 for (; t < e && *t; t++) 1548 for (; t < e && *t; t++)
1284 if (bcnt == 0 || bcnt > (*t)->minbcnt) 1549 if (bcnt == 0 || bcnt > (*t)->minbcnt)
1285 bcnt = (*t)->minbcnt; 1550 bcnt = (*t)->minbcnt;
@@ -1373,7 +1638,11 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
1373 spin_lock_irqsave(&d->lock, flags); 1638 spin_lock_irqsave(&d->lock, flags);
1374 1639
1375 t = gettgt(d, h->src); 1640 t = gettgt(d, h->src);
1376 if (!t) { 1641 if (t) {
1642 t->nframes = n;
1643 if (n < t->maxout)
1644 aoecmd_wreset(t);
1645 } else {
1377 t = addtgt(d, h->src, n); 1646 t = addtgt(d, h->src, n);
1378 if (!t) 1647 if (!t)
1379 goto bail; 1648 goto bail;
@@ -1402,17 +1671,26 @@ bail:
1402} 1671}
1403 1672
1404void 1673void
1674aoecmd_wreset(struct aoetgt *t)
1675{
1676 t->maxout = 1;
1677 t->ssthresh = t->nframes / 2;
1678 t->next_cwnd = t->nframes;
1679}
1680
1681void
1405aoecmd_cleanslate(struct aoedev *d) 1682aoecmd_cleanslate(struct aoedev *d)
1406{ 1683{
1407 struct aoetgt **t, **te; 1684 struct aoetgt **t, **te;
1408 1685
1409 d->mintimer = MINTIMER; 1686 d->rttavg = RTTAVG_INIT;
1687 d->rttdev = RTTDEV_INIT;
1410 d->maxbcnt = 0; 1688 d->maxbcnt = 0;
1411 1689
1412 t = d->targets; 1690 t = d->targets;
1413 te = t + NTARGETS; 1691 te = t + d->ntargets;
1414 for (; t < te && *t; t++) 1692 for (; t < te && *t; t++)
1415 (*t)->maxout = (*t)->nframes; 1693 aoecmd_wreset(*t);
1416} 1694}
1417 1695
1418void 1696void
@@ -1460,6 +1738,14 @@ aoe_flush_iocq(void)
1460int __init 1738int __init
1461aoecmd_init(void) 1739aoecmd_init(void)
1462{ 1740{
1741 void *p;
1742
1743 /* get_zeroed_page returns page with ref count 1 */
1744 p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
1745 if (!p)
1746 return -ENOMEM;
1747 empty_page = virt_to_page(p);
1748
1463 INIT_LIST_HEAD(&iocq.head); 1749 INIT_LIST_HEAD(&iocq.head);
1464 spin_lock_init(&iocq.lock); 1750 spin_lock_init(&iocq.lock);
1465 init_waitqueue_head(&ktiowq); 1751 init_waitqueue_head(&ktiowq);
@@ -1475,4 +1761,7 @@ aoecmd_exit(void)
1475{ 1761{
1476 aoe_ktstop(&kts); 1762 aoe_ktstop(&kts);
1477 aoe_flush_iocq(); 1763 aoe_flush_iocq();
1764
1765 free_page((unsigned long) page_address(empty_page));
1766 empty_page = NULL;
1478} 1767}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 90e5b537f94..98f2965778b 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -15,7 +15,6 @@
15#include "aoe.h" 15#include "aoe.h"
16 16
17static void dummy_timer(ulong); 17static void dummy_timer(ulong);
18static void aoedev_freedev(struct aoedev *);
19static void freetgt(struct aoedev *d, struct aoetgt *t); 18static void freetgt(struct aoedev *d, struct aoetgt *t);
20static void skbpoolfree(struct aoedev *d); 19static void skbpoolfree(struct aoedev *d);
21 20
@@ -69,25 +68,34 @@ minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
69 NPERSHELF = 16, 68 NPERSHELF = 16,
70 }; 69 };
71 70
71 if (aoemin >= NPERSHELF) {
72 pr_err("aoe: %s %d slots per shelf\n",
73 "static minor device numbers support only",
74 NPERSHELF);
75 error = -1;
76 goto out;
77 }
78
72 n = aoemaj * NPERSHELF + aoemin; 79 n = aoemaj * NPERSHELF + aoemin;
73 if (aoemin >= NPERSHELF || n >= N_DEVS) { 80 if (n >= N_DEVS) {
74 pr_err("aoe: %s with e%ld.%d\n", 81 pr_err("aoe: %s with e%ld.%d\n",
75 "cannot use static minor device numbers", 82 "cannot use static minor device numbers",
76 aoemaj, aoemin); 83 aoemaj, aoemin);
77 error = -1; 84 error = -1;
78 } else { 85 goto out;
79 spin_lock_irqsave(&used_minors_lock, flags);
80 if (test_bit(n, used_minors)) {
81 pr_err("aoe: %s %lu\n",
82 "existing device already has static minor number",
83 n);
84 error = -1;
85 } else
86 set_bit(n, used_minors);
87 spin_unlock_irqrestore(&used_minors_lock, flags);
88 } 86 }
89 87
90 *sysminor = n; 88 spin_lock_irqsave(&used_minors_lock, flags);
89 if (test_bit(n, used_minors)) {
90 pr_err("aoe: %s %lu\n",
91 "existing device already has static minor number",
92 n);
93 error = -1;
94 } else
95 set_bit(n, used_minors);
96 spin_unlock_irqrestore(&used_minors_lock, flags);
97 *sysminor = n * AOE_PARTITIONS;
98out:
91 return error; 99 return error;
92} 100}
93 101
@@ -170,41 +178,50 @@ aoe_failip(struct aoedev *d)
170 aoe_end_request(d, rq, 0); 178 aoe_end_request(d, rq, 0);
171} 179}
172 180
181static void
182downdev_frame(struct list_head *pos)
183{
184 struct frame *f;
185
186 f = list_entry(pos, struct frame, head);
187 list_del(pos);
188 if (f->buf) {
189 f->buf->nframesout--;
190 aoe_failbuf(f->t->d, f->buf);
191 }
192 aoe_freetframe(f);
193}
194
173void 195void
174aoedev_downdev(struct aoedev *d) 196aoedev_downdev(struct aoedev *d)
175{ 197{
176 struct aoetgt *t, **tt, **te; 198 struct aoetgt *t, **tt, **te;
177 struct frame *f;
178 struct list_head *head, *pos, *nx; 199 struct list_head *head, *pos, *nx;
179 struct request *rq; 200 struct request *rq;
180 int i; 201 int i;
181 202
182 d->flags &= ~DEVFL_UP; 203 d->flags &= ~DEVFL_UP;
183 204
184 /* clean out active buffers */ 205 /* clean out active and to-be-retransmitted buffers */
185 for (i = 0; i < NFACTIVE; i++) { 206 for (i = 0; i < NFACTIVE; i++) {
186 head = &d->factive[i]; 207 head = &d->factive[i];
187 list_for_each_safe(pos, nx, head) { 208 list_for_each_safe(pos, nx, head)
188 f = list_entry(pos, struct frame, head); 209 downdev_frame(pos);
189 list_del(pos);
190 if (f->buf) {
191 f->buf->nframesout--;
192 aoe_failbuf(d, f->buf);
193 }
194 aoe_freetframe(f);
195 }
196 } 210 }
211 head = &d->rexmitq;
212 list_for_each_safe(pos, nx, head)
213 downdev_frame(pos);
214
197 /* reset window dressings */ 215 /* reset window dressings */
198 tt = d->targets; 216 tt = d->targets;
199 te = tt + NTARGETS; 217 te = tt + d->ntargets;
200 for (; tt < te && (t = *tt); tt++) { 218 for (; tt < te && (t = *tt); tt++) {
201 t->maxout = t->nframes; 219 aoecmd_wreset(t);
202 t->nout = 0; 220 t->nout = 0;
203 } 221 }
204 222
205 /* clean out the in-process request (if any) */ 223 /* clean out the in-process request (if any) */
206 aoe_failip(d); 224 aoe_failip(d);
207 d->htgt = NULL;
208 225
209 /* fast fail all pending I/O */ 226 /* fast fail all pending I/O */
210 if (d->blkq) { 227 if (d->blkq) {
@@ -218,12 +235,48 @@ aoedev_downdev(struct aoedev *d)
218 set_capacity(d->gd, 0); 235 set_capacity(d->gd, 0);
219} 236}
220 237
238/* return whether the user asked for this particular
239 * device to be flushed
240 */
241static int
242user_req(char *s, size_t slen, struct aoedev *d)
243{
244 char *p;
245 size_t lim;
246
247 if (!d->gd)
248 return 0;
249 p = strrchr(d->gd->disk_name, '/');
250 if (!p)
251 p = d->gd->disk_name;
252 else
253 p += 1;
254 lim = sizeof(d->gd->disk_name);
255 lim -= p - d->gd->disk_name;
256 if (slen < lim)
257 lim = slen;
258
259 return !strncmp(s, p, lim);
260}
261
221static void 262static void
222aoedev_freedev(struct aoedev *d) 263freedev(struct aoedev *d)
223{ 264{
224 struct aoetgt **t, **e; 265 struct aoetgt **t, **e;
266 int freeing = 0;
267 unsigned long flags;
268
269 spin_lock_irqsave(&d->lock, flags);
270 if (d->flags & DEVFL_TKILL
271 && !(d->flags & DEVFL_FREEING)) {
272 d->flags |= DEVFL_FREEING;
273 freeing = 1;
274 }
275 spin_unlock_irqrestore(&d->lock, flags);
276 if (!freeing)
277 return;
225 278
226 cancel_work_sync(&d->work); 279 del_timer_sync(&d->timer);
227 if (d->gd) { 280 if (d->gd) {
228 aoedisk_rm_sysfs(d); 281 aoedisk_rm_sysfs(d);
229 del_gendisk(d->gd); 282 del_gendisk(d->gd);
@@ -231,61 +284,113 @@ aoedev_freedev(struct aoedev *d)
231 blk_cleanup_queue(d->blkq); 284 blk_cleanup_queue(d->blkq);
232 } 285 }
233 t = d->targets; 286 t = d->targets;
234 e = t + NTARGETS; 287 e = t + d->ntargets;
235 for (; t < e && *t; t++) 288 for (; t < e && *t; t++)
236 freetgt(d, *t); 289 freetgt(d, *t);
237 if (d->bufpool) 290 if (d->bufpool)
238 mempool_destroy(d->bufpool); 291 mempool_destroy(d->bufpool);
239 skbpoolfree(d); 292 skbpoolfree(d);
240 minor_free(d->sysminor); 293 minor_free(d->sysminor);
241 kfree(d); 294
295 spin_lock_irqsave(&d->lock, flags);
296 d->flags |= DEVFL_FREED;
297 spin_unlock_irqrestore(&d->lock, flags);
242} 298}
243 299
244int 300enum flush_parms {
245aoedev_flush(const char __user *str, size_t cnt) 301 NOT_EXITING = 0,
302 EXITING = 1,
303};
304
305static int
306flush(const char __user *str, size_t cnt, int exiting)
246{ 307{
247 ulong flags; 308 ulong flags;
248 struct aoedev *d, **dd; 309 struct aoedev *d, **dd;
249 struct aoedev *rmd = NULL;
250 char buf[16]; 310 char buf[16];
251 int all = 0; 311 int all = 0;
312 int specified = 0; /* flush a specific device */
313 unsigned int skipflags;
314
315 skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL;
252 316
253 if (cnt >= 3) { 317 if (!exiting && cnt >= 3) {
254 if (cnt > sizeof buf) 318 if (cnt > sizeof buf)
255 cnt = sizeof buf; 319 cnt = sizeof buf;
256 if (copy_from_user(buf, str, cnt)) 320 if (copy_from_user(buf, str, cnt))
257 return -EFAULT; 321 return -EFAULT;
258 all = !strncmp(buf, "all", 3); 322 all = !strncmp(buf, "all", 3);
323 if (!all)
324 specified = 1;
259 } 325 }
260 326
327 flush_scheduled_work();
328 /* pass one: without sleeping, do aoedev_downdev */
261 spin_lock_irqsave(&devlist_lock, flags); 329 spin_lock_irqsave(&devlist_lock, flags);
262 dd = &devlist; 330 for (d = devlist; d; d = d->next) {
263 while ((d = *dd)) {
264 spin_lock(&d->lock); 331 spin_lock(&d->lock);
265 if ((!all && (d->flags & DEVFL_UP)) 332 if (exiting) {
266 || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) 333 /* unconditionally take each device down */
334 } else if (specified) {
335 if (!user_req(buf, cnt, d))
336 goto cont;
337 } else if ((!all && (d->flags & DEVFL_UP))
338 || d->flags & skipflags
267 || d->nopen 339 || d->nopen
268 || d->ref) { 340 || d->ref)
269 spin_unlock(&d->lock); 341 goto cont;
270 dd = &d->next; 342
271 continue;
272 }
273 *dd = d->next;
274 aoedev_downdev(d); 343 aoedev_downdev(d);
275 d->flags |= DEVFL_TKILL; 344 d->flags |= DEVFL_TKILL;
345cont:
276 spin_unlock(&d->lock); 346 spin_unlock(&d->lock);
277 d->next = rmd;
278 rmd = d;
279 } 347 }
280 spin_unlock_irqrestore(&devlist_lock, flags); 348 spin_unlock_irqrestore(&devlist_lock, flags);
281 while ((d = rmd)) { 349
282 rmd = d->next; 350 /* pass two: call freedev, which might sleep,
283 del_timer_sync(&d->timer); 351 * for aoedevs marked with DEVFL_TKILL
284 aoedev_freedev(d); /* must be able to sleep */ 352 */
353restart:
354 spin_lock_irqsave(&devlist_lock, flags);
355 for (d = devlist; d; d = d->next) {
356 spin_lock(&d->lock);
357 if (d->flags & DEVFL_TKILL
358 && !(d->flags & DEVFL_FREEING)) {
359 spin_unlock(&d->lock);
360 spin_unlock_irqrestore(&devlist_lock, flags);
361 freedev(d);
362 goto restart;
363 }
364 spin_unlock(&d->lock);
285 } 365 }
366
367 /* pass three: remove aoedevs marked with DEVFL_FREED */
368 for (dd = &devlist, d = *dd; d; d = *dd) {
369 struct aoedev *doomed = NULL;
370
371 spin_lock(&d->lock);
372 if (d->flags & DEVFL_FREED) {
373 *dd = d->next;
374 doomed = d;
375 } else {
376 dd = &d->next;
377 }
378 spin_unlock(&d->lock);
379 if (doomed)
380 kfree(doomed->targets);
381 kfree(doomed);
382 }
383 spin_unlock_irqrestore(&devlist_lock, flags);
384
286 return 0; 385 return 0;
287} 386}
288 387
388int
389aoedev_flush(const char __user *str, size_t cnt)
390{
391 return flush(str, cnt, NOT_EXITING);
392}
393
289/* This has been confirmed to occur once with Tms=3*1000 due to the 394/* This has been confirmed to occur once with Tms=3*1000 due to the
290 * driver changing link and not processing its transmit ring. The 395 * driver changing link and not processing its transmit ring. The
291 * problem is hard enough to solve by returning an error that I'm 396 * problem is hard enough to solve by returning an error that I'm
@@ -332,13 +437,20 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
332 struct aoedev *d; 437 struct aoedev *d;
333 int i; 438 int i;
334 ulong flags; 439 ulong flags;
335 ulong sysminor; 440 ulong sysminor = 0;
336 441
337 spin_lock_irqsave(&devlist_lock, flags); 442 spin_lock_irqsave(&devlist_lock, flags);
338 443
339 for (d=devlist; d; d=d->next) 444 for (d=devlist; d; d=d->next)
340 if (d->aoemajor == maj && d->aoeminor == min) { 445 if (d->aoemajor == maj && d->aoeminor == min) {
446 spin_lock(&d->lock);
447 if (d->flags & DEVFL_TKILL) {
448 spin_unlock(&d->lock);
449 d = NULL;
450 goto out;
451 }
341 d->ref++; 452 d->ref++;
453 spin_unlock(&d->lock);
342 break; 454 break;
343 } 455 }
344 if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0) 456 if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
@@ -346,6 +458,13 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
346 d = kcalloc(1, sizeof *d, GFP_ATOMIC); 458 d = kcalloc(1, sizeof *d, GFP_ATOMIC);
347 if (!d) 459 if (!d)
348 goto out; 460 goto out;
461 d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC);
462 if (!d->targets) {
463 kfree(d);
464 d = NULL;
465 goto out;
466 }
467 d->ntargets = NTARGETS;
349 INIT_WORK(&d->work, aoecmd_sleepwork); 468 INIT_WORK(&d->work, aoecmd_sleepwork);
350 spin_lock_init(&d->lock); 469 spin_lock_init(&d->lock);
351 skb_queue_head_init(&d->skbpool); 470 skb_queue_head_init(&d->skbpool);
@@ -359,10 +478,12 @@ aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
359 d->ref = 1; 478 d->ref = 1;
360 for (i = 0; i < NFACTIVE; i++) 479 for (i = 0; i < NFACTIVE; i++)
361 INIT_LIST_HEAD(&d->factive[i]); 480 INIT_LIST_HEAD(&d->factive[i]);
481 INIT_LIST_HEAD(&d->rexmitq);
362 d->sysminor = sysminor; 482 d->sysminor = sysminor;
363 d->aoemajor = maj; 483 d->aoemajor = maj;
364 d->aoeminor = min; 484 d->aoeminor = min;
365 d->mintimer = MINTIMER; 485 d->rttavg = RTTAVG_INIT;
486 d->rttdev = RTTDEV_INIT;
366 d->next = devlist; 487 d->next = devlist;
367 devlist = d; 488 devlist = d;
368 out: 489 out:
@@ -396,21 +517,9 @@ freetgt(struct aoedev *d, struct aoetgt *t)
396void 517void
397aoedev_exit(void) 518aoedev_exit(void)
398{ 519{
399 struct aoedev *d; 520 flush_scheduled_work();
400 ulong flags;
401
402 aoe_flush_iocq(); 521 aoe_flush_iocq();
403 while ((d = devlist)) { 522 flush(NULL, 0, EXITING);
404 devlist = d->next;
405
406 spin_lock_irqsave(&d->lock, flags);
407 aoedev_downdev(d);
408 d->flags |= DEVFL_TKILL;
409 spin_unlock_irqrestore(&d->lock, flags);
410
411 del_timer_sync(&d->timer);
412 aoedev_freedev(d);
413 }
414} 523}
415 524
416int __init 525int __init
diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c
index 04793c2c701..4b987c2fefb 100644
--- a/drivers/block/aoe/aoemain.c
+++ b/drivers/block/aoe/aoemain.c
@@ -105,7 +105,7 @@ aoe_init(void)
105 aoechr_exit(); 105 aoechr_exit();
106 chr_fail: 106 chr_fail:
107 aoedev_exit(); 107 aoedev_exit();
108 108
109 printk(KERN_INFO "aoe: initialisation failure.\n"); 109 printk(KERN_INFO "aoe: initialisation failure.\n");
110 return ret; 110 return ret;
111} 111}
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index 162c6471275..71d3ea8d300 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -31,7 +31,7 @@ enum {
31 31
32static char aoe_iflist[IFLISTSZ]; 32static char aoe_iflist[IFLISTSZ];
33module_param_string(aoe_iflist, aoe_iflist, IFLISTSZ, 0600); 33module_param_string(aoe_iflist, aoe_iflist, IFLISTSZ, 0600);
34MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=\"dev1 [dev2 ...]\""); 34MODULE_PARM_DESC(aoe_iflist, "aoe_iflist=dev1[,dev2...]");
35 35
36static wait_queue_head_t txwq; 36static wait_queue_head_t txwq;
37static struct ktstate kts; 37static struct ktstate kts;
@@ -52,13 +52,18 @@ static struct sk_buff_head skbtxq;
52 52
53/* enters with txlock held */ 53/* enters with txlock held */
54static int 54static int
55tx(void) 55tx(void) __must_hold(&txlock)
56{ 56{
57 struct sk_buff *skb; 57 struct sk_buff *skb;
58 struct net_device *ifp;
58 59
59 while ((skb = skb_dequeue(&skbtxq))) { 60 while ((skb = skb_dequeue(&skbtxq))) {
60 spin_unlock_irq(&txlock); 61 spin_unlock_irq(&txlock);
61 dev_queue_xmit(skb); 62 ifp = skb->dev;
63 if (dev_queue_xmit(skb) == NET_XMIT_DROP && net_ratelimit())
64 pr_warn("aoe: packet could not be sent on %s. %s\n",
65 ifp ? ifp->name : "netif",
66 "consider increasing tx_queue_len");
62 spin_lock_irq(&txlock); 67 spin_lock_irq(&txlock);
63 } 68 }
64 return 0; 69 return 0;
@@ -119,8 +124,8 @@ aoenet_xmit(struct sk_buff_head *queue)
119 } 124 }
120} 125}
121 126
122/* 127/*
123 * (1) len doesn't include the header by default. I want this. 128 * (1) len doesn't include the header by default. I want this.
124 */ 129 */
125static int 130static int
126aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev) 131aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev)