Commit | Line | Data |
---|---|---|
ca47bbd9 | 1 | /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ |
1da177e4 LT |
2 | /* |
3 | * aoedev.c | |
4 | * AoE device utility functions; maintains device list. | |
5 | */ | |
6 | ||
7 | #include <linux/hdreg.h> | |
8 | #include <linux/blkdev.h> | |
9 | #include <linux/netdevice.h> | |
9bb237b6 | 10 | #include <linux/delay.h> |
5a0e3ad6 | 11 | #include <linux/slab.h> |
0c966214 EC |
12 | #include <linux/bitmap.h> |
13 | #include <linux/kdev_t.h> | |
4bcce1a3 | 14 | #include <linux/moduleparam.h> |
a88c1f0c | 15 | #include <linux/string.h> |
1da177e4 LT |
16 | #include "aoe.h" |
17 | ||
262bf541 | 18 | static void dummy_timer(ulong); |
9bb237b6 EC |
19 | static void freetgt(struct aoedev *d, struct aoetgt *t); |
20 | static void skbpoolfree(struct aoedev *d); | |
262bf541 | 21 | |
08b60623 | 22 | static int aoe_dyndevs = 1; |
4bcce1a3 EC |
23 | module_param(aoe_dyndevs, int, 0644); |
24 | MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices."); | |
25 | ||
1da177e4 | 26 | static struct aoedev *devlist; |
476aed38 | 27 | static DEFINE_SPINLOCK(devlist_lock); |
1da177e4 | 28 | |
0c966214 EC |
29 | /* Because some systems will have one, many, or no |
30 | * - partitions, | |
31 | * - slots per shelf, | |
32 | * - or shelves, | |
33 | * we need some flexibility in the way the minor numbers | |
34 | * are allocated. So they are dynamic. | |
69cf2d85 | 35 | */ |
0c966214 EC |
36 | #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS) |
37 | ||
38 | static DEFINE_SPINLOCK(used_minors_lock); | |
39 | static DECLARE_BITMAP(used_minors, N_DEVS); | |
40 | ||
41 | static int | |
4bcce1a3 | 42 | minor_get_dyn(ulong *sysminor) |
1da177e4 | 43 | { |
1da177e4 | 44 | ulong flags; |
0c966214 EC |
45 | ulong n; |
46 | int error = 0; | |
47 | ||
48 | spin_lock_irqsave(&used_minors_lock, flags); | |
49 | n = find_first_zero_bit(used_minors, N_DEVS); | |
50 | if (n < N_DEVS) | |
51 | set_bit(n, used_minors); | |
52 | else | |
53 | error = -1; | |
54 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
55 | ||
4bcce1a3 | 56 | *sysminor = n * AOE_PARTITIONS; |
0c966214 EC |
57 | return error; |
58 | } | |
1da177e4 | 59 | |
4bcce1a3 EC |
60 | static int |
61 | minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin) | |
62 | { | |
63 | ulong flags; | |
64 | ulong n; | |
65 | int error = 0; | |
66 | enum { | |
67 | /* for backwards compatibility when !aoe_dyndevs, | |
68 | * a static number of supported slots per shelf */ | |
69 | NPERSHELF = 16, | |
70 | }; | |
71 | ||
e0b2bbab EC |
72 | if (aoemin >= NPERSHELF) { |
73 | pr_err("aoe: %s %d slots per shelf\n", | |
74 | "static minor device numbers support only", | |
75 | NPERSHELF); | |
76 | error = -1; | |
77 | goto out; | |
78 | } | |
79 | ||
4bcce1a3 | 80 | n = aoemaj * NPERSHELF + aoemin; |
e0b2bbab | 81 | if (n >= N_DEVS) { |
4bcce1a3 EC |
82 | pr_err("aoe: %s with e%ld.%d\n", |
83 | "cannot use static minor device numbers", | |
84 | aoemaj, aoemin); | |
85 | error = -1; | |
e0b2bbab | 86 | goto out; |
4bcce1a3 EC |
87 | } |
88 | ||
e0b2bbab EC |
89 | spin_lock_irqsave(&used_minors_lock, flags); |
90 | if (test_bit(n, used_minors)) { | |
91 | pr_err("aoe: %s %lu\n", | |
92 | "existing device already has static minor number", | |
93 | n); | |
94 | error = -1; | |
95 | } else | |
96 | set_bit(n, used_minors); | |
97 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
b91316f2 | 98 | *sysminor = n * AOE_PARTITIONS; |
e0b2bbab | 99 | out: |
4bcce1a3 EC |
100 | return error; |
101 | } | |
102 | ||
103 | static int | |
104 | minor_get(ulong *sysminor, ulong aoemaj, int aoemin) | |
105 | { | |
106 | if (aoe_dyndevs) | |
107 | return minor_get_dyn(sysminor); | |
108 | else | |
109 | return minor_get_static(sysminor, aoemaj, aoemin); | |
110 | } | |
111 | ||
0c966214 EC |
112 | static void |
113 | minor_free(ulong minor) | |
114 | { | |
115 | ulong flags; | |
1da177e4 | 116 | |
0c966214 EC |
117 | minor /= AOE_PARTITIONS; |
118 | BUG_ON(minor >= N_DEVS); | |
1da177e4 | 119 | |
0c966214 EC |
120 | spin_lock_irqsave(&used_minors_lock, flags); |
121 | BUG_ON(!test_bit(minor, used_minors)); | |
122 | clear_bit(minor, used_minors); | |
123 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
1da177e4 LT |
124 | } |
125 | ||
0c966214 EC |
126 | /* |
127 | * Users who grab a pointer to the device with aoedev_by_aoeaddr | |
128 | * automatically get a reference count and must be responsible | |
129 | * for performing a aoedev_put. With the addition of async | |
130 | * kthread processing I'm no longer confident that we can | |
131 | * guarantee consistency in the face of device flushes. | |
132 | * | |
133 | * For the time being, we only bother to add extra references for | |
134 | * frames sitting on the iocq. When the kthreads finish processing | |
135 | * these frames, they will aoedev_put the device. | |
136 | */ | |
137 | ||
69cf2d85 EC |
138 | void |
139 | aoedev_put(struct aoedev *d) | |
140 | { | |
141 | ulong flags; | |
142 | ||
143 | spin_lock_irqsave(&devlist_lock, flags); | |
144 | d->ref--; | |
145 | spin_unlock_irqrestore(&devlist_lock, flags); | |
146 | } | |
147 | ||
3ae1c24e EC |
148 | static void |
149 | dummy_timer(ulong vp) | |
150 | { | |
151 | struct aoedev *d; | |
152 | ||
153 | d = (struct aoedev *)vp; | |
154 | if (d->flags & DEVFL_TKILL) | |
155 | return; | |
156 | d->timer.expires = jiffies + HZ; | |
157 | add_timer(&d->timer); | |
158 | } | |
159 | ||
69cf2d85 EC |
160 | static void |
161 | aoe_failip(struct aoedev *d) | |
1da177e4 | 162 | { |
69cf2d85 | 163 | struct request *rq; |
1da177e4 | 164 | struct bio *bio; |
69cf2d85 EC |
165 | unsigned long n; |
166 | ||
167 | aoe_failbuf(d, d->ip.buf); | |
1da177e4 | 168 | |
69cf2d85 EC |
169 | rq = d->ip.rq; |
170 | if (rq == NULL) | |
896831f5 | 171 | return; |
69cf2d85 | 172 | while ((bio = d->ip.nxbio)) { |
4246a0b6 | 173 | bio->bi_error = -EIO; |
69cf2d85 EC |
174 | d->ip.nxbio = bio->bi_next; |
175 | n = (unsigned long) rq->special; | |
176 | rq->special = (void *) --n; | |
1da177e4 | 177 | } |
69cf2d85 EC |
178 | if ((unsigned long) rq->special == 0) |
179 | aoe_end_request(d, rq, 0); | |
896831f5 EC |
180 | } |
181 | ||
3fc9b032 EC |
182 | static void |
183 | downdev_frame(struct list_head *pos) | |
184 | { | |
185 | struct frame *f; | |
186 | ||
187 | f = list_entry(pos, struct frame, head); | |
188 | list_del(pos); | |
189 | if (f->buf) { | |
190 | f->buf->nframesout--; | |
191 | aoe_failbuf(f->t->d, f->buf); | |
192 | } | |
193 | aoe_freetframe(f); | |
194 | } | |
195 | ||
896831f5 EC |
196 | void |
197 | aoedev_downdev(struct aoedev *d) | |
198 | { | |
199 | struct aoetgt *t, **tt, **te; | |
896831f5 | 200 | struct list_head *head, *pos, *nx; |
69cf2d85 | 201 | struct request *rq; |
896831f5 EC |
202 | int i; |
203 | ||
69cf2d85 EC |
204 | d->flags &= ~DEVFL_UP; |
205 | ||
3fc9b032 | 206 | /* clean out active and to-be-retransmitted buffers */ |
64a80f5a EC |
207 | for (i = 0; i < NFACTIVE; i++) { |
208 | head = &d->factive[i]; | |
3fc9b032 EC |
209 | list_for_each_safe(pos, nx, head) |
210 | downdev_frame(pos); | |
64a80f5a | 211 | } |
3fc9b032 EC |
212 | head = &d->rexmitq; |
213 | list_for_each_safe(pos, nx, head) | |
214 | downdev_frame(pos); | |
215 | ||
64a80f5a | 216 | /* reset window dressings */ |
896831f5 | 217 | tt = d->targets; |
71114ec4 | 218 | te = tt + d->ntargets; |
896831f5 | 219 | for (; tt < te && (t = *tt); tt++) { |
3a0c40d2 | 220 | aoecmd_wreset(t); |
896831f5 EC |
221 | t->nout = 0; |
222 | } | |
223 | ||
69cf2d85 EC |
224 | /* clean out the in-process request (if any) */ |
225 | aoe_failip(d); | |
1da177e4 | 226 | |
69cf2d85 EC |
227 | /* fast fail all pending I/O */ |
228 | if (d->blkq) { | |
229 | while ((rq = blk_peek_request(d->blkq))) { | |
230 | blk_start_request(rq); | |
231 | aoe_end_request(d, rq, 1); | |
232 | } | |
1da177e4 LT |
233 | } |
234 | ||
1da177e4 | 235 | if (d->gd) |
80795aef | 236 | set_capacity(d->gd, 0); |
1da177e4 LT |
237 | } |
238 | ||
4ba9aa7f EC |
239 | /* return whether the user asked for this particular |
240 | * device to be flushed | |
241 | */ | |
242 | static int | |
243 | user_req(char *s, size_t slen, struct aoedev *d) | |
244 | { | |
a88c1f0c | 245 | const char *p; |
4ba9aa7f EC |
246 | size_t lim; |
247 | ||
248 | if (!d->gd) | |
249 | return 0; | |
a88c1f0c | 250 | p = kbasename(d->gd->disk_name); |
4ba9aa7f EC |
251 | lim = sizeof(d->gd->disk_name); |
252 | lim -= p - d->gd->disk_name; | |
253 | if (slen < lim) | |
254 | lim = slen; | |
255 | ||
256 | return !strncmp(s, p, lim); | |
257 | } | |
258 | ||
e52a2932 EC |
259 | static void |
260 | freedev(struct aoedev *d) | |
261 | { | |
262 | struct aoetgt **t, **e; | |
263 | int freeing = 0; | |
264 | unsigned long flags; | |
265 | ||
266 | spin_lock_irqsave(&d->lock, flags); | |
267 | if (d->flags & DEVFL_TKILL | |
268 | && !(d->flags & DEVFL_FREEING)) { | |
269 | d->flags |= DEVFL_FREEING; | |
270 | freeing = 1; | |
271 | } | |
272 | spin_unlock_irqrestore(&d->lock, flags); | |
273 | if (!freeing) | |
274 | return; | |
275 | ||
276 | del_timer_sync(&d->timer); | |
277 | if (d->gd) { | |
e8866cf2 | 278 | aoedisk_rm_debugfs(d); |
e52a2932 EC |
279 | aoedisk_rm_sysfs(d); |
280 | del_gendisk(d->gd); | |
281 | put_disk(d->gd); | |
282 | blk_cleanup_queue(d->blkq); | |
283 | } | |
284 | t = d->targets; | |
71114ec4 | 285 | e = t + d->ntargets; |
e52a2932 EC |
286 | for (; t < e && *t; t++) |
287 | freetgt(d, *t); | |
288 | if (d->bufpool) | |
289 | mempool_destroy(d->bufpool); | |
290 | skbpoolfree(d); | |
291 | minor_free(d->sysminor); | |
292 | ||
293 | spin_lock_irqsave(&d->lock, flags); | |
294 | d->flags |= DEVFL_FREED; | |
295 | spin_unlock_irqrestore(&d->lock, flags); | |
296 | } | |
297 | ||
298 | enum flush_parms { | |
299 | NOT_EXITING = 0, | |
300 | EXITING = 1, | |
301 | }; | |
302 | ||
303 | static int | |
304 | flush(const char __user *str, size_t cnt, int exiting) | |
262bf541 EC |
305 | { |
306 | ulong flags; | |
307 | struct aoedev *d, **dd; | |
262bf541 EC |
308 | char buf[16]; |
309 | int all = 0; | |
4ba9aa7f | 310 | int specified = 0; /* flush a specific device */ |
e52a2932 EC |
311 | unsigned int skipflags; |
312 | ||
313 | skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL; | |
262bf541 | 314 | |
e52a2932 | 315 | if (!exiting && cnt >= 3) { |
262bf541 EC |
316 | if (cnt > sizeof buf) |
317 | cnt = sizeof buf; | |
318 | if (copy_from_user(buf, str, cnt)) | |
319 | return -EFAULT; | |
320 | all = !strncmp(buf, "all", 3); | |
4ba9aa7f EC |
321 | if (!all) |
322 | specified = 1; | |
262bf541 EC |
323 | } |
324 | ||
e52a2932 EC |
325 | flush_scheduled_work(); |
326 | /* pass one: without sleeping, do aoedev_downdev */ | |
262bf541 | 327 | spin_lock_irqsave(&devlist_lock, flags); |
e52a2932 | 328 | for (d = devlist; d; d = d->next) { |
262bf541 | 329 | spin_lock(&d->lock); |
e52a2932 EC |
330 | if (exiting) { |
331 | /* unconditionally take each device down */ | |
332 | } else if (specified) { | |
4ba9aa7f | 333 | if (!user_req(buf, cnt, d)) |
e52a2932 | 334 | goto cont; |
4ba9aa7f | 335 | } else if ((!all && (d->flags & DEVFL_UP)) |
e52a2932 | 336 | || d->flags & skipflags |
69cf2d85 | 337 | || d->nopen |
4ba9aa7f | 338 | || d->ref) |
e52a2932 | 339 | goto cont; |
4ba9aa7f | 340 | |
262bf541 EC |
341 | aoedev_downdev(d); |
342 | d->flags |= DEVFL_TKILL; | |
e52a2932 | 343 | cont: |
262bf541 | 344 | spin_unlock(&d->lock); |
262bf541 EC |
345 | } |
346 | spin_unlock_irqrestore(&devlist_lock, flags); | |
e52a2932 EC |
347 | |
348 | /* pass two: call freedev, which might sleep, | |
349 | * for aoedevs marked with DEVFL_TKILL | |
350 | */ | |
351 | restart: | |
352 | spin_lock_irqsave(&devlist_lock, flags); | |
353 | for (d = devlist; d; d = d->next) { | |
354 | spin_lock(&d->lock); | |
355 | if (d->flags & DEVFL_TKILL | |
356 | && !(d->flags & DEVFL_FREEING)) { | |
357 | spin_unlock(&d->lock); | |
358 | spin_unlock_irqrestore(&devlist_lock, flags); | |
359 | freedev(d); | |
360 | goto restart; | |
361 | } | |
362 | spin_unlock(&d->lock); | |
262bf541 | 363 | } |
e52a2932 EC |
364 | |
365 | /* pass three: remove aoedevs marked with DEVFL_FREED */ | |
366 | for (dd = &devlist, d = *dd; d; d = *dd) { | |
367 | struct aoedev *doomed = NULL; | |
368 | ||
369 | spin_lock(&d->lock); | |
370 | if (d->flags & DEVFL_FREED) { | |
371 | *dd = d->next; | |
372 | doomed = d; | |
373 | } else { | |
374 | dd = &d->next; | |
375 | } | |
376 | spin_unlock(&d->lock); | |
71114ec4 EC |
377 | if (doomed) |
378 | kfree(doomed->targets); | |
e52a2932 EC |
379 | kfree(doomed); |
380 | } | |
381 | spin_unlock_irqrestore(&devlist_lock, flags); | |
382 | ||
262bf541 EC |
383 | return 0; |
384 | } | |
385 | ||
e52a2932 EC |
386 | int |
387 | aoedev_flush(const char __user *str, size_t cnt) | |
388 | { | |
389 | return flush(str, cnt, NOT_EXITING); | |
390 | } | |
391 | ||
69cf2d85 EC |
392 | /* This has been confirmed to occur once with Tms=3*1000 due to the |
393 | * driver changing link and not processing its transmit ring. The | |
394 | * problem is hard enough to solve by returning an error that I'm | |
395 | * still punting on "solving" this. | |
396 | */ | |
9bb237b6 EC |
397 | static void |
398 | skbfree(struct sk_buff *skb) | |
399 | { | |
69cf2d85 | 400 | enum { Sms = 250, Tms = 30 * 1000}; |
9bb237b6 EC |
401 | int i = Tms / Sms; |
402 | ||
403 | if (skb == NULL) | |
404 | return; | |
405 | while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0) | |
406 | msleep(Sms); | |
94873111 | 407 | if (i < 0) { |
9bb237b6 EC |
408 | printk(KERN_ERR |
409 | "aoe: %s holds ref: %s\n", | |
410 | skb->dev ? skb->dev->name : "netif", | |
411 | "cannot free skb -- memory leaked."); | |
412 | return; | |
413 | } | |
3d5b0605 | 414 | skb->truesize -= skb->data_len; |
9bb237b6 EC |
415 | skb_shinfo(skb)->nr_frags = skb->data_len = 0; |
416 | skb_trim(skb, 0); | |
417 | dev_kfree_skb(skb); | |
418 | } | |
419 | ||
420 | static void | |
421 | skbpoolfree(struct aoedev *d) | |
422 | { | |
e9bb8fb0 | 423 | struct sk_buff *skb, *tmp; |
9bb237b6 | 424 | |
e9bb8fb0 | 425 | skb_queue_walk_safe(&d->skbpool, skb, tmp) |
9bb237b6 | 426 | skbfree(skb); |
e9bb8fb0 DM |
427 | |
428 | __skb_queue_head_init(&d->skbpool); | |
9bb237b6 EC |
429 | } |
430 | ||
0c966214 | 431 | /* find it or allocate it */ |
1da177e4 | 432 | struct aoedev * |
0c966214 | 433 | aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) |
1da177e4 LT |
434 | { |
435 | struct aoedev *d; | |
64a80f5a | 436 | int i; |
1da177e4 | 437 | ulong flags; |
10935d05 | 438 | ulong sysminor = 0; |
1da177e4 LT |
439 | |
440 | spin_lock_irqsave(&devlist_lock, flags); | |
441 | ||
442 | for (d=devlist; d; d=d->next) | |
0c966214 | 443 | if (d->aoemajor == maj && d->aoeminor == min) { |
e52a2932 EC |
444 | spin_lock(&d->lock); |
445 | if (d->flags & DEVFL_TKILL) { | |
446 | spin_unlock(&d->lock); | |
447 | d = NULL; | |
448 | goto out; | |
449 | } | |
69cf2d85 | 450 | d->ref++; |
e52a2932 | 451 | spin_unlock(&d->lock); |
1da177e4 | 452 | break; |
69cf2d85 | 453 | } |
4bcce1a3 | 454 | if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0) |
68e0d42f EC |
455 | goto out; |
456 | d = kcalloc(1, sizeof *d, GFP_ATOMIC); | |
457 | if (!d) | |
458 | goto out; | |
71114ec4 EC |
459 | d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC); |
460 | if (!d->targets) { | |
461 | kfree(d); | |
31279b14 | 462 | d = NULL; |
71114ec4 EC |
463 | goto out; |
464 | } | |
465 | d->ntargets = NTARGETS; | |
68e0d42f EC |
466 | INIT_WORK(&d->work, aoecmd_sleepwork); |
467 | spin_lock_init(&d->lock); | |
e9bb8fb0 | 468 | skb_queue_head_init(&d->skbpool); |
68e0d42f EC |
469 | init_timer(&d->timer); |
470 | d->timer.data = (ulong) d; | |
471 | d->timer.function = dummy_timer; | |
472 | d->timer.expires = jiffies + HZ; | |
473 | add_timer(&d->timer); | |
474 | d->bufpool = NULL; /* defer to aoeblk_gdalloc */ | |
475 | d->tgt = d->targets; | |
69cf2d85 | 476 | d->ref = 1; |
64a80f5a EC |
477 | for (i = 0; i < NFACTIVE; i++) |
478 | INIT_LIST_HEAD(&d->factive[i]); | |
3a0c40d2 | 479 | INIT_LIST_HEAD(&d->rexmitq); |
68e0d42f | 480 | d->sysminor = sysminor; |
0c966214 EC |
481 | d->aoemajor = maj; |
482 | d->aoeminor = min; | |
3a0c40d2 EC |
483 | d->rttavg = RTTAVG_INIT; |
484 | d->rttdev = RTTDEV_INIT; | |
68e0d42f EC |
485 | d->next = devlist; |
486 | devlist = d; | |
487 | out: | |
3ae1c24e | 488 | spin_unlock_irqrestore(&devlist_lock, flags); |
1da177e4 LT |
489 | return d; |
490 | } | |
491 | ||
492 | static void | |
9bb237b6 | 493 | freetgt(struct aoedev *d, struct aoetgt *t) |
1da177e4 | 494 | { |
896831f5 EC |
495 | struct frame *f; |
496 | struct list_head *pos, *nx, *head; | |
1b86fda9 EC |
497 | struct aoeif *ifp; |
498 | ||
499 | for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) { | |
500 | if (!ifp->nd) | |
501 | break; | |
502 | dev_put(ifp->nd); | |
503 | } | |
e407a7f6 | 504 | |
896831f5 EC |
505 | head = &t->ffree; |
506 | list_for_each_safe(pos, nx, head) { | |
507 | list_del(pos); | |
508 | f = list_entry(pos, struct frame, head); | |
9bb237b6 | 509 | skbfree(f->skb); |
896831f5 EC |
510 | kfree(f); |
511 | } | |
68e0d42f EC |
512 | kfree(t); |
513 | } | |
514 | ||
1da177e4 LT |
515 | void |
516 | aoedev_exit(void) | |
517 | { | |
e52a2932 | 518 | flush_scheduled_work(); |
e52a2932 | 519 | flush(NULL, 0, EXITING); |
1da177e4 LT |
520 | } |
521 | ||
522 | int __init | |
523 | aoedev_init(void) | |
524 | { | |
1da177e4 LT |
525 | return 0; |
526 | } |