]> bbs.cooldavid.org Git - net-next-2.6.git/blame - drivers/block/aoe/aoecmd.c
Linux v2.6.14
[net-next-2.6.git] / drivers / block / aoe / aoecmd.c
CommitLineData
1da177e4
LT
1/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
2/*
3 * aoecmd.c
4 * Filesystem request handling methods
5 */
6
7#include <linux/hdreg.h>
8#include <linux/blkdev.h>
9#include <linux/skbuff.h>
10#include <linux/netdevice.h>
11#include "aoe.h"
12
13#define TIMERTICK (HZ / 10)
14#define MINTIMER (2 * TIMERTICK)
15#define MAXTIMER (HZ << 1)
16#define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
17
18static struct sk_buff *
19new_skb(struct net_device *if_dev, ulong len)
20{
21 struct sk_buff *skb;
22
23 skb = alloc_skb(len, GFP_ATOMIC);
24 if (skb) {
25 skb->nh.raw = skb->mac.raw = skb->data;
26 skb->dev = if_dev;
27 skb->protocol = __constant_htons(ETH_P_AOE);
28 skb->priority = 0;
29 skb_put(skb, len);
30 skb->next = skb->prev = NULL;
31
32 /* tell the network layer not to perform IP checksums
33 * or to get the NIC to do it
34 */
35 skb->ip_summed = CHECKSUM_NONE;
36 }
37 return skb;
38}
39
40static struct sk_buff *
41skb_prepare(struct aoedev *d, struct frame *f)
42{
43 struct sk_buff *skb;
44 char *p;
45
46 skb = new_skb(d->ifp, f->ndata + f->writedatalen);
47 if (!skb) {
48 printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
49 return NULL;
50 }
51
52 p = skb->mac.raw;
53 memcpy(p, f->data, f->ndata);
54
55 if (f->writedatalen) {
56 p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
57 memcpy(p, f->bufaddr, f->writedatalen);
58 }
59
60 return skb;
61}
62
63static struct frame *
64getframe(struct aoedev *d, int tag)
65{
66 struct frame *f, *e;
67
68 f = d->frames;
69 e = f + d->nframes;
70 for (; f<e; f++)
71 if (f->tag == tag)
72 return f;
73 return NULL;
74}
75
76/*
77 * Leave the top bit clear so we have tagspace for userland.
78 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
79 * This driver reserves tag -1 to mean "unused frame."
80 */
81static int
82newtag(struct aoedev *d)
83{
84 register ulong n;
85
86 n = jiffies & 0xffff;
87 return n |= (++d->lasttag & 0x7fff) << 16;
88}
89
90static int
91aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
92{
1da177e4 93 u32 host_tag = newtag(d);
1da177e4
LT
94
95 memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
96 memcpy(h->dst, d->addr, sizeof h->dst);
63e9cc5d 97 h->type = __constant_cpu_to_be16(ETH_P_AOE);
1da177e4 98 h->verfl = AOE_HVER;
63e9cc5d 99 h->major = cpu_to_be16(d->aoemajor);
1da177e4
LT
100 h->minor = d->aoeminor;
101 h->cmd = AOECMD_ATA;
63e9cc5d 102 h->tag = cpu_to_be32(host_tag);
1da177e4
LT
103
104 return host_tag;
105}
106
107static void
108aoecmd_ata_rw(struct aoedev *d, struct frame *f)
109{
110 struct aoe_hdr *h;
111 struct aoe_atahdr *ah;
112 struct buf *buf;
113 struct sk_buff *skb;
114 ulong bcnt;
115 register sector_t sector;
116 char writebit, extbit;
117
118 writebit = 0x10;
119 extbit = 0x4;
120
121 buf = d->inprocess;
122
123 sector = buf->sector;
124 bcnt = buf->bv_resid;
125 if (bcnt > MAXATADATA)
126 bcnt = MAXATADATA;
127
128 /* initialize the headers & frame */
129 h = (struct aoe_hdr *) f->data;
130 ah = (struct aoe_atahdr *) (h+1);
131 f->ndata = sizeof *h + sizeof *ah;
132 memset(h, 0, f->ndata);
133 f->tag = aoehdr_atainit(d, h);
134 f->waited = 0;
135 f->buf = buf;
136 f->bufaddr = buf->bufaddr;
137
138 /* set up ata header */
139 ah->scnt = bcnt >> 9;
140 ah->lba0 = sector;
141 ah->lba1 = sector >>= 8;
142 ah->lba2 = sector >>= 8;
143 ah->lba3 = sector >>= 8;
144 if (d->flags & DEVFL_EXT) {
145 ah->aflags |= AOEAFL_EXT;
146 ah->lba4 = sector >>= 8;
147 ah->lba5 = sector >>= 8;
148 } else {
149 extbit = 0;
150 ah->lba3 &= 0x0f;
151 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
152 }
153
154 if (bio_data_dir(buf->bio) == WRITE) {
155 ah->aflags |= AOEAFL_WRITE;
156 f->writedatalen = bcnt;
157 } else {
158 writebit = 0;
159 f->writedatalen = 0;
160 }
161
162 ah->cmdstat = WIN_READ | writebit | extbit;
163
164 /* mark all tracking fields and load out */
165 buf->nframesout += 1;
166 buf->bufaddr += bcnt;
167 buf->bv_resid -= bcnt;
168/* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
169 buf->resid -= bcnt;
170 buf->sector += bcnt >> 9;
171 if (buf->resid == 0) {
172 d->inprocess = NULL;
173 } else if (buf->bv_resid == 0) {
174 buf->bv++;
175 buf->bv_resid = buf->bv->bv_len;
176 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
177 }
178
179 skb = skb_prepare(d, f);
180 if (skb) {
a4b38364
EC
181 skb->next = NULL;
182 if (d->sendq_hd)
183 d->sendq_tl->next = skb;
184 else
185 d->sendq_hd = skb;
186 d->sendq_tl = skb;
1da177e4
LT
187 }
188}
189
190/* enters with d->lock held */
191void
192aoecmd_work(struct aoedev *d)
193{
194 struct frame *f;
195 struct buf *buf;
196loop:
197 f = getframe(d, FREETAG);
198 if (f == NULL)
199 return;
200 if (d->inprocess == NULL) {
201 if (list_empty(&d->bufq))
202 return;
203 buf = container_of(d->bufq.next, struct buf, bufs);
204 list_del(d->bufq.next);
205/*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
206 d->inprocess = buf;
207 }
208 aoecmd_ata_rw(d, f);
209 goto loop;
210}
211
212static void
213rexmit(struct aoedev *d, struct frame *f)
214{
215 struct sk_buff *skb;
216 struct aoe_hdr *h;
217 char buf[128];
218 u32 n;
1da177e4
LT
219
220 n = newtag(d);
221
222 snprintf(buf, sizeof buf,
223 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
224 "retransmit",
225 d->aoemajor, d->aoeminor, f->tag, jiffies, n);
226 aoechr_error(buf);
227
228 h = (struct aoe_hdr *) f->data;
229 f->tag = n;
63e9cc5d 230 h->tag = cpu_to_be32(n);
1da177e4
LT
231
232 skb = skb_prepare(d, f);
233 if (skb) {
a4b38364
EC
234 skb->next = NULL;
235 if (d->sendq_hd)
236 d->sendq_tl->next = skb;
237 else
238 d->sendq_hd = skb;
239 d->sendq_tl = skb;
1da177e4
LT
240 }
241}
242
243static int
244tsince(int tag)
245{
246 int n;
247
248 n = jiffies & 0xffff;
249 n -= tag & 0xffff;
250 if (n < 0)
251 n += 1<<16;
252 return n;
253}
254
255static void
256rexmit_timer(ulong vp)
257{
258 struct aoedev *d;
259 struct frame *f, *e;
260 struct sk_buff *sl;
261 register long timeout;
262 ulong flags, n;
263
264 d = (struct aoedev *) vp;
265 sl = NULL;
266
267 /* timeout is always ~150% of the moving average */
268 timeout = d->rttavg;
269 timeout += timeout >> 1;
270
271 spin_lock_irqsave(&d->lock, flags);
272
273 if (d->flags & DEVFL_TKILL) {
274tdie: spin_unlock_irqrestore(&d->lock, flags);
275 return;
276 }
277 f = d->frames;
278 e = f + d->nframes;
279 for (; f<e; f++) {
280 if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
281 n = f->waited += timeout;
282 n /= HZ;
283 if (n > MAXWAIT) { /* waited too long. device failure. */
284 aoedev_downdev(d);
285 goto tdie;
286 }
287 rexmit(d, f);
288 }
289 }
290
a4b38364
EC
291 sl = d->sendq_hd;
292 d->sendq_hd = d->sendq_tl = NULL;
1da177e4
LT
293 if (sl) {
294 n = d->rttavg <<= 1;
295 if (n > MAXTIMER)
296 d->rttavg = MAXTIMER;
297 }
298
299 d->timer.expires = jiffies + TIMERTICK;
300 add_timer(&d->timer);
301
302 spin_unlock_irqrestore(&d->lock, flags);
303
304 aoenet_xmit(sl);
305}
306
307static void
308ataid_complete(struct aoedev *d, unsigned char *id)
309{
310 u64 ssize;
311 u16 n;
312
313 /* word 83: command set supported */
63e9cc5d 314 n = le16_to_cpup((__le16 *) &id[83<<1]);
1da177e4
LT
315
316 /* word 86: command set/feature enabled */
63e9cc5d 317 n |= le16_to_cpup((__le16 *) &id[86<<1]);
1da177e4
LT
318
319 if (n & (1<<10)) { /* bit 10: LBA 48 */
320 d->flags |= DEVFL_EXT;
321
322 /* word 100: number lba48 sectors */
63e9cc5d 323 ssize = le64_to_cpup((__le64 *) &id[100<<1]);
1da177e4
LT
324
325 /* set as in ide-disk.c:init_idedisk_capacity */
326 d->geo.cylinders = ssize;
327 d->geo.cylinders /= (255 * 63);
328 d->geo.heads = 255;
329 d->geo.sectors = 63;
330 } else {
331 d->flags &= ~DEVFL_EXT;
332
333 /* number lba28 sectors */
63e9cc5d 334 ssize = le32_to_cpup((__le32 *) &id[60<<1]);
1da177e4
LT
335
336 /* NOTE: obsolete in ATA 6 */
63e9cc5d
EC
337 d->geo.cylinders = le16_to_cpup((__le16 *) &id[54<<1]);
338 d->geo.heads = le16_to_cpup((__le16 *) &id[55<<1]);
339 d->geo.sectors = le16_to_cpup((__le16 *) &id[56<<1]);
1da177e4
LT
340 }
341 d->ssize = ssize;
342 d->geo.start = 0;
343 if (d->gd != NULL) {
344 d->gd->capacity = ssize;
345 d->flags |= DEVFL_UP;
346 return;
347 }
348 if (d->flags & DEVFL_WORKON) {
349 printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! "
350 "(This really shouldn't happen).\n");
351 return;
352 }
353 INIT_WORK(&d->work, aoeblk_gdalloc, d);
354 schedule_work(&d->work);
355 d->flags |= DEVFL_WORKON;
356}
357
358static void
359calc_rttavg(struct aoedev *d, int rtt)
360{
361 register long n;
362
363 n = rtt;
364 if (n < MINTIMER)
365 n = MINTIMER;
366 else if (n > MAXTIMER)
367 n = MAXTIMER;
368
369 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
370 n -= d->rttavg;
371 d->rttavg += n >> 2;
372}
373
374void
375aoecmd_ata_rsp(struct sk_buff *skb)
376{
377 struct aoedev *d;
378 struct aoe_hdr *hin;
379 struct aoe_atahdr *ahin, *ahout;
380 struct frame *f;
381 struct buf *buf;
382 struct sk_buff *sl;
383 register long n;
384 ulong flags;
385 char ebuf[128];
32465c65
EC
386 u16 aoemajor;
387
1da177e4 388 hin = (struct aoe_hdr *) skb->mac.raw;
63e9cc5d 389 aoemajor = be16_to_cpu(hin->major);
32465c65 390 d = aoedev_by_aoeaddr(aoemajor, hin->minor);
1da177e4
LT
391 if (d == NULL) {
392 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
393 "for unknown device %d.%d\n",
32465c65 394 aoemajor, hin->minor);
1da177e4
LT
395 aoechr_error(ebuf);
396 return;
397 }
398
399 spin_lock_irqsave(&d->lock, flags);
400
63e9cc5d 401 f = getframe(d, be32_to_cpu(hin->tag));
1da177e4
LT
402 if (f == NULL) {
403 spin_unlock_irqrestore(&d->lock, flags);
404 snprintf(ebuf, sizeof ebuf,
405 "%15s e%d.%d tag=%08x@%08lx\n",
406 "unexpected rsp",
63e9cc5d 407 be16_to_cpu(hin->major),
1da177e4 408 hin->minor,
63e9cc5d 409 be32_to_cpu(hin->tag),
1da177e4
LT
410 jiffies);
411 aoechr_error(ebuf);
412 return;
413 }
414
415 calc_rttavg(d, tsince(f->tag));
416
417 ahin = (struct aoe_atahdr *) (hin+1);
418 ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
419 buf = f->buf;
420
421 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
422 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
423 "stat=%2.2Xh from e%ld.%ld\n",
424 ahout->cmdstat, ahin->cmdstat,
425 d->aoemajor, d->aoeminor);
426 if (buf)
427 buf->flags |= BUFFL_FAIL;
428 } else {
429 switch (ahout->cmdstat) {
430 case WIN_READ:
431 case WIN_READ_EXT:
432 n = ahout->scnt << 9;
433 if (skb->len - sizeof *hin - sizeof *ahin < n) {
434 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
435 "ata data size in read. skb->len=%d\n",
436 skb->len);
437 /* fail frame f? just returning will rexmit. */
438 spin_unlock_irqrestore(&d->lock, flags);
439 return;
440 }
441 memcpy(f->bufaddr, ahin+1, n);
442 case WIN_WRITE:
443 case WIN_WRITE_EXT:
444 break;
445 case WIN_IDENTIFY:
446 if (skb->len - sizeof *hin - sizeof *ahin < 512) {
447 printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
448 "in ataid. skb->len=%d\n", skb->len);
449 spin_unlock_irqrestore(&d->lock, flags);
450 return;
451 }
452 ataid_complete(d, (char *) (ahin+1));
453 /* d->flags |= DEVFL_WC_UPDATE; */
454 break;
455 default:
456 printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
457 "outbound ata command %2.2Xh for %d.%d\n",
458 ahout->cmdstat,
63e9cc5d 459 be16_to_cpu(hin->major),
1da177e4
LT
460 hin->minor);
461 }
462 }
463
464 if (buf) {
465 buf->nframesout -= 1;
466 if (buf->nframesout == 0 && buf->resid == 0) {
0c6f0e79
EC
467 unsigned long duration = jiffies - buf->start_time;
468 unsigned long n_sect = buf->bio->bi_size >> 9;
469 struct gendisk *disk = d->gd;
470
471 if (bio_data_dir(buf->bio) == WRITE) {
472 disk_stat_inc(disk, writes);
473 disk_stat_add(disk, write_ticks, duration);
474 disk_stat_add(disk, write_sectors, n_sect);
475 } else {
476 disk_stat_inc(disk, reads);
477 disk_stat_add(disk, read_ticks, duration);
478 disk_stat_add(disk, read_sectors, n_sect);
479 }
480 disk_stat_add(disk, io_ticks, duration);
1da177e4
LT
481 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
482 bio_endio(buf->bio, buf->bio->bi_size, n);
483 mempool_free(buf, d->bufpool);
484 }
485 }
486
487 f->buf = NULL;
488 f->tag = FREETAG;
489
490 aoecmd_work(d);
491
a4b38364
EC
492 sl = d->sendq_hd;
493 d->sendq_hd = d->sendq_tl = NULL;
1da177e4
LT
494
495 spin_unlock_irqrestore(&d->lock, flags);
496
497 aoenet_xmit(sl);
498}
499
500void
501aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
502{
503 struct aoe_hdr *h;
504 struct aoe_cfghdr *ch;
505 struct sk_buff *skb, *sl;
506 struct net_device *ifp;
1da177e4
LT
507
508 sl = NULL;
509
510 read_lock(&dev_base_lock);
511 for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
512 dev_hold(ifp);
513 if (!is_aoe_netif(ifp))
514 continue;
515
516 skb = new_skb(ifp, sizeof *h + sizeof *ch);
517 if (skb == NULL) {
518 printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
519 continue;
520 }
521 h = (struct aoe_hdr *) skb->mac.raw;
522 memset(h, 0, sizeof *h + sizeof *ch);
523
524 memset(h->dst, 0xff, sizeof h->dst);
525 memcpy(h->src, ifp->dev_addr, sizeof h->src);
63e9cc5d 526 h->type = __constant_cpu_to_be16(ETH_P_AOE);
1da177e4 527 h->verfl = AOE_HVER;
63e9cc5d 528 h->major = cpu_to_be16(aoemajor);
1da177e4
LT
529 h->minor = aoeminor;
530 h->cmd = AOECMD_CFG;
531
532 skb->next = sl;
533 sl = skb;
534 }
535 read_unlock(&dev_base_lock);
536
537 aoenet_xmit(sl);
538}
539
540/*
541 * Since we only call this in one place (and it only prepares one frame)
a4b38364 542 * we just return the skb. Usually we'd chain it up to the aoedev sendq.
1da177e4
LT
543 */
544static struct sk_buff *
545aoecmd_ata_id(struct aoedev *d)
546{
547 struct aoe_hdr *h;
548 struct aoe_atahdr *ah;
549 struct frame *f;
550 struct sk_buff *skb;
551
552 f = getframe(d, FREETAG);
553 if (f == NULL) {
554 printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. "
555 "This shouldn't happen.\n");
556 return NULL;
557 }
558
559 /* initialize the headers & frame */
560 h = (struct aoe_hdr *) f->data;
561 ah = (struct aoe_atahdr *) (h+1);
562 f->ndata = sizeof *h + sizeof *ah;
563 memset(h, 0, f->ndata);
564 f->tag = aoehdr_atainit(d, h);
565 f->waited = 0;
566 f->writedatalen = 0;
567
568 /* this message initializes the device, so we reset the rttavg */
569 d->rttavg = MAXTIMER;
570
571 /* set up ata header */
572 ah->scnt = 1;
573 ah->cmdstat = WIN_IDENTIFY;
574 ah->lba3 = 0xa0;
575
576 skb = skb_prepare(d, f);
577
578 /* we now want to start the rexmit tracking */
579 d->flags &= ~DEVFL_TKILL;
580 d->timer.data = (ulong) d;
581 d->timer.function = rexmit_timer;
582 d->timer.expires = jiffies + TIMERTICK;
583 add_timer(&d->timer);
584
585 return skb;
586}
587
588void
589aoecmd_cfg_rsp(struct sk_buff *skb)
590{
591 struct aoedev *d;
592 struct aoe_hdr *h;
593 struct aoe_cfghdr *ch;
63e9cc5d
EC
594 ulong flags, sysminor, aoemajor;
595 u16 bufcnt;
1da177e4 596 struct sk_buff *sl;
fc458dcd 597 enum { MAXFRAMES = 8 };
1da177e4
LT
598
599 h = (struct aoe_hdr *) skb->mac.raw;
600 ch = (struct aoe_cfghdr *) (h+1);
601
602 /*
603 * Enough people have their dip switches set backwards to
604 * warrant a loud message for this special case.
605 */
63e9cc5d 606 aoemajor = be16_to_cpu(h->major);
1da177e4
LT
607 if (aoemajor == 0xfff) {
608 printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
609 "address is all ones. Check shelf dip switches\n");
610 return;
611 }
612
613 sysminor = SYSMINOR(aoemajor, h->minor);
fc458dcd
EC
614 if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
615 printk(KERN_INFO
616 "aoe: e%ld.%d: minor number too large\n",
617 aoemajor, (int) h->minor);
1da177e4
LT
618 return;
619 }
620
63e9cc5d 621 bufcnt = be16_to_cpu(ch->bufcnt);
1da177e4
LT
622 if (bufcnt > MAXFRAMES) /* keep it reasonable */
623 bufcnt = MAXFRAMES;
624
625 d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
626 if (d == NULL) {
627 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
628 return;
629 }
630
631 spin_lock_irqsave(&d->lock, flags);
632
633 if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
634 spin_unlock_irqrestore(&d->lock, flags);
635 return;
636 }
637
63e9cc5d 638 d->fw_ver = be16_to_cpu(ch->fwver);
1da177e4
LT
639
640 /* we get here only if the device is new */
641 sl = aoecmd_ata_id(d);
642
643 spin_unlock_irqrestore(&d->lock, flags);
644
645 aoenet_xmit(sl);
646}
647