Commit | Line | Data |
---|---|---|
fea05a26 | 1 | /* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */ |
1da177e4 LT |
2 | /* |
3 | * aoedev.c | |
4 | * AoE device utility functions; maintains device list. | |
5 | */ | |
6 | ||
7 | #include <linux/hdreg.h> | |
8 | #include <linux/blkdev.h> | |
9 | #include <linux/netdevice.h> | |
9bb237b6 | 10 | #include <linux/delay.h> |
5a0e3ad6 | 11 | #include <linux/slab.h> |
0c966214 EC |
12 | #include <linux/bitmap.h> |
13 | #include <linux/kdev_t.h> | |
1da177e4 LT |
14 | #include "aoe.h" |
15 | ||
262bf541 EC |
16 | static void dummy_timer(ulong); |
17 | static void aoedev_freedev(struct aoedev *); | |
9bb237b6 EC |
18 | static void freetgt(struct aoedev *d, struct aoetgt *t); |
19 | static void skbpoolfree(struct aoedev *d); | |
262bf541 | 20 | |
1da177e4 | 21 | static struct aoedev *devlist; |
476aed38 | 22 | static DEFINE_SPINLOCK(devlist_lock); |
1da177e4 | 23 | |
0c966214 EC |
24 | /* Because some systems will have one, many, or no |
25 | * - partitions, | |
26 | * - slots per shelf, | |
27 | * - or shelves, | |
28 | * we need some flexibility in the way the minor numbers | |
29 | * are allocated. So they are dynamic. | |
69cf2d85 | 30 | */ |
0c966214 EC |
31 | #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS) |
32 | ||
33 | static DEFINE_SPINLOCK(used_minors_lock); | |
34 | static DECLARE_BITMAP(used_minors, N_DEVS); | |
35 | ||
36 | static int | |
37 | minor_get(ulong *minor) | |
1da177e4 | 38 | { |
1da177e4 | 39 | ulong flags; |
0c966214 EC |
40 | ulong n; |
41 | int error = 0; | |
42 | ||
43 | spin_lock_irqsave(&used_minors_lock, flags); | |
44 | n = find_first_zero_bit(used_minors, N_DEVS); | |
45 | if (n < N_DEVS) | |
46 | set_bit(n, used_minors); | |
47 | else | |
48 | error = -1; | |
49 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
50 | ||
51 | *minor = n * AOE_PARTITIONS; | |
52 | return error; | |
53 | } | |
1da177e4 | 54 | |
0c966214 EC |
55 | static void |
56 | minor_free(ulong minor) | |
57 | { | |
58 | ulong flags; | |
1da177e4 | 59 | |
0c966214 EC |
60 | minor /= AOE_PARTITIONS; |
61 | BUG_ON(minor >= N_DEVS); | |
1da177e4 | 62 | |
0c966214 EC |
63 | spin_lock_irqsave(&used_minors_lock, flags); |
64 | BUG_ON(!test_bit(minor, used_minors)); | |
65 | clear_bit(minor, used_minors); | |
66 | spin_unlock_irqrestore(&used_minors_lock, flags); | |
1da177e4 LT |
67 | } |
68 | ||
0c966214 EC |
69 | /* |
70 | * Users who grab a pointer to the device with aoedev_by_aoeaddr | |
71 | * automatically get a reference count and must be responsible | |
72 | * for performing a aoedev_put. With the addition of async | |
73 | * kthread processing I'm no longer confident that we can | |
74 | * guarantee consistency in the face of device flushes. | |
75 | * | |
76 | * For the time being, we only bother to add extra references for | |
77 | * frames sitting on the iocq. When the kthreads finish processing | |
78 | * these frames, they will aoedev_put the device. | |
79 | */ | |
80 | ||
69cf2d85 EC |
81 | void |
82 | aoedev_put(struct aoedev *d) | |
83 | { | |
84 | ulong flags; | |
85 | ||
86 | spin_lock_irqsave(&devlist_lock, flags); | |
87 | d->ref--; | |
88 | spin_unlock_irqrestore(&devlist_lock, flags); | |
89 | } | |
90 | ||
3ae1c24e EC |
91 | static void |
92 | dummy_timer(ulong vp) | |
93 | { | |
94 | struct aoedev *d; | |
95 | ||
96 | d = (struct aoedev *)vp; | |
97 | if (d->flags & DEVFL_TKILL) | |
98 | return; | |
99 | d->timer.expires = jiffies + HZ; | |
100 | add_timer(&d->timer); | |
101 | } | |
102 | ||
69cf2d85 EC |
103 | static void |
104 | aoe_failip(struct aoedev *d) | |
1da177e4 | 105 | { |
69cf2d85 | 106 | struct request *rq; |
1da177e4 | 107 | struct bio *bio; |
69cf2d85 EC |
108 | unsigned long n; |
109 | ||
110 | aoe_failbuf(d, d->ip.buf); | |
1da177e4 | 111 | |
69cf2d85 EC |
112 | rq = d->ip.rq; |
113 | if (rq == NULL) | |
896831f5 | 114 | return; |
69cf2d85 EC |
115 | while ((bio = d->ip.nxbio)) { |
116 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | |
117 | d->ip.nxbio = bio->bi_next; | |
118 | n = (unsigned long) rq->special; | |
119 | rq->special = (void *) --n; | |
1da177e4 | 120 | } |
69cf2d85 EC |
121 | if ((unsigned long) rq->special == 0) |
122 | aoe_end_request(d, rq, 0); | |
896831f5 EC |
123 | } |
124 | ||
125 | void | |
126 | aoedev_downdev(struct aoedev *d) | |
127 | { | |
128 | struct aoetgt *t, **tt, **te; | |
129 | struct frame *f; | |
130 | struct list_head *head, *pos, *nx; | |
69cf2d85 | 131 | struct request *rq; |
896831f5 EC |
132 | int i; |
133 | ||
69cf2d85 EC |
134 | d->flags &= ~DEVFL_UP; |
135 | ||
64a80f5a EC |
136 | /* clean out active buffers */ |
137 | for (i = 0; i < NFACTIVE; i++) { | |
138 | head = &d->factive[i]; | |
139 | list_for_each_safe(pos, nx, head) { | |
140 | f = list_entry(pos, struct frame, head); | |
141 | list_del(pos); | |
142 | if (f->buf) { | |
143 | f->buf->nframesout--; | |
144 | aoe_failbuf(d, f->buf); | |
145 | } | |
146 | aoe_freetframe(f); | |
147 | } | |
148 | } | |
149 | /* reset window dressings */ | |
896831f5 EC |
150 | tt = d->targets; |
151 | te = tt + NTARGETS; | |
152 | for (; tt < te && (t = *tt); tt++) { | |
896831f5 EC |
153 | t->maxout = t->nframes; |
154 | t->nout = 0; | |
155 | } | |
156 | ||
69cf2d85 EC |
157 | /* clean out the in-process request (if any) */ |
158 | aoe_failip(d); | |
68e0d42f | 159 | d->htgt = NULL; |
1da177e4 | 160 | |
69cf2d85 EC |
161 | /* fast fail all pending I/O */ |
162 | if (d->blkq) { | |
163 | while ((rq = blk_peek_request(d->blkq))) { | |
164 | blk_start_request(rq); | |
165 | aoe_end_request(d, rq, 1); | |
166 | } | |
1da177e4 LT |
167 | } |
168 | ||
1da177e4 | 169 | if (d->gd) |
80795aef | 170 | set_capacity(d->gd, 0); |
1da177e4 LT |
171 | } |
172 | ||
262bf541 EC |
173 | static void |
174 | aoedev_freedev(struct aoedev *d) | |
175 | { | |
176 | struct aoetgt **t, **e; | |
177 | ||
5ad21a33 | 178 | cancel_work_sync(&d->work); |
262bf541 EC |
179 | if (d->gd) { |
180 | aoedisk_rm_sysfs(d); | |
181 | del_gendisk(d->gd); | |
182 | put_disk(d->gd); | |
69cf2d85 | 183 | blk_cleanup_queue(d->blkq); |
262bf541 EC |
184 | } |
185 | t = d->targets; | |
186 | e = t + NTARGETS; | |
187 | for (; t < e && *t; t++) | |
9bb237b6 | 188 | freetgt(d, *t); |
262bf541 EC |
189 | if (d->bufpool) |
190 | mempool_destroy(d->bufpool); | |
9bb237b6 | 191 | skbpoolfree(d); |
0c966214 | 192 | minor_free(d->sysminor); |
262bf541 EC |
193 | kfree(d); |
194 | } | |
195 | ||
196 | int | |
197 | aoedev_flush(const char __user *str, size_t cnt) | |
198 | { | |
199 | ulong flags; | |
200 | struct aoedev *d, **dd; | |
201 | struct aoedev *rmd = NULL; | |
202 | char buf[16]; | |
203 | int all = 0; | |
204 | ||
205 | if (cnt >= 3) { | |
206 | if (cnt > sizeof buf) | |
207 | cnt = sizeof buf; | |
208 | if (copy_from_user(buf, str, cnt)) | |
209 | return -EFAULT; | |
210 | all = !strncmp(buf, "all", 3); | |
211 | } | |
212 | ||
262bf541 EC |
213 | spin_lock_irqsave(&devlist_lock, flags); |
214 | dd = &devlist; | |
215 | while ((d = *dd)) { | |
216 | spin_lock(&d->lock); | |
217 | if ((!all && (d->flags & DEVFL_UP)) | |
218 | || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) | |
69cf2d85 EC |
219 | || d->nopen |
220 | || d->ref) { | |
262bf541 EC |
221 | spin_unlock(&d->lock); |
222 | dd = &d->next; | |
223 | continue; | |
224 | } | |
225 | *dd = d->next; | |
226 | aoedev_downdev(d); | |
227 | d->flags |= DEVFL_TKILL; | |
228 | spin_unlock(&d->lock); | |
229 | d->next = rmd; | |
230 | rmd = d; | |
231 | } | |
232 | spin_unlock_irqrestore(&devlist_lock, flags); | |
233 | while ((d = rmd)) { | |
234 | rmd = d->next; | |
235 | del_timer_sync(&d->timer); | |
236 | aoedev_freedev(d); /* must be able to sleep */ | |
237 | } | |
238 | return 0; | |
239 | } | |
240 | ||
69cf2d85 EC |
241 | /* This has been confirmed to occur once with Tms=3*1000 due to the |
242 | * driver changing link and not processing its transmit ring. The | |
243 | * problem is hard enough to solve by returning an error that I'm | |
244 | * still punting on "solving" this. | |
245 | */ | |
9bb237b6 EC |
246 | static void |
247 | skbfree(struct sk_buff *skb) | |
248 | { | |
69cf2d85 | 249 | enum { Sms = 250, Tms = 30 * 1000}; |
9bb237b6 EC |
250 | int i = Tms / Sms; |
251 | ||
252 | if (skb == NULL) | |
253 | return; | |
254 | while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0) | |
255 | msleep(Sms); | |
94873111 | 256 | if (i < 0) { |
9bb237b6 EC |
257 | printk(KERN_ERR |
258 | "aoe: %s holds ref: %s\n", | |
259 | skb->dev ? skb->dev->name : "netif", | |
260 | "cannot free skb -- memory leaked."); | |
261 | return; | |
262 | } | |
3d5b0605 | 263 | skb->truesize -= skb->data_len; |
9bb237b6 EC |
264 | skb_shinfo(skb)->nr_frags = skb->data_len = 0; |
265 | skb_trim(skb, 0); | |
266 | dev_kfree_skb(skb); | |
267 | } | |
268 | ||
269 | static void | |
270 | skbpoolfree(struct aoedev *d) | |
271 | { | |
e9bb8fb0 | 272 | struct sk_buff *skb, *tmp; |
9bb237b6 | 273 | |
e9bb8fb0 | 274 | skb_queue_walk_safe(&d->skbpool, skb, tmp) |
9bb237b6 | 275 | skbfree(skb); |
e9bb8fb0 DM |
276 | |
277 | __skb_queue_head_init(&d->skbpool); | |
9bb237b6 EC |
278 | } |
279 | ||
0c966214 | 280 | /* find it or allocate it */ |
1da177e4 | 281 | struct aoedev * |
0c966214 | 282 | aoedev_by_aoeaddr(ulong maj, int min, int do_alloc) |
1da177e4 LT |
283 | { |
284 | struct aoedev *d; | |
64a80f5a | 285 | int i; |
1da177e4 | 286 | ulong flags; |
0c966214 | 287 | ulong sysminor; |
1da177e4 LT |
288 | |
289 | spin_lock_irqsave(&devlist_lock, flags); | |
290 | ||
291 | for (d=devlist; d; d=d->next) | |
0c966214 | 292 | if (d->aoemajor == maj && d->aoeminor == min) { |
69cf2d85 | 293 | d->ref++; |
1da177e4 | 294 | break; |
69cf2d85 | 295 | } |
0c966214 | 296 | if (d || !do_alloc || minor_get(&sysminor) < 0) |
68e0d42f EC |
297 | goto out; |
298 | d = kcalloc(1, sizeof *d, GFP_ATOMIC); | |
299 | if (!d) | |
300 | goto out; | |
301 | INIT_WORK(&d->work, aoecmd_sleepwork); | |
302 | spin_lock_init(&d->lock); | |
e9bb8fb0 | 303 | skb_queue_head_init(&d->skbpool); |
68e0d42f EC |
304 | init_timer(&d->timer); |
305 | d->timer.data = (ulong) d; | |
306 | d->timer.function = dummy_timer; | |
307 | d->timer.expires = jiffies + HZ; | |
308 | add_timer(&d->timer); | |
309 | d->bufpool = NULL; /* defer to aoeblk_gdalloc */ | |
310 | d->tgt = d->targets; | |
69cf2d85 | 311 | d->ref = 1; |
64a80f5a EC |
312 | for (i = 0; i < NFACTIVE; i++) |
313 | INIT_LIST_HEAD(&d->factive[i]); | |
68e0d42f | 314 | d->sysminor = sysminor; |
0c966214 EC |
315 | d->aoemajor = maj; |
316 | d->aoeminor = min; | |
68e0d42f EC |
317 | d->mintimer = MINTIMER; |
318 | d->next = devlist; | |
319 | devlist = d; | |
320 | out: | |
3ae1c24e | 321 | spin_unlock_irqrestore(&devlist_lock, flags); |
1da177e4 LT |
322 | return d; |
323 | } | |
324 | ||
325 | static void | |
9bb237b6 | 326 | freetgt(struct aoedev *d, struct aoetgt *t) |
1da177e4 | 327 | { |
896831f5 EC |
328 | struct frame *f; |
329 | struct list_head *pos, *nx, *head; | |
1b86fda9 EC |
330 | struct aoeif *ifp; |
331 | ||
332 | for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) { | |
333 | if (!ifp->nd) | |
334 | break; | |
335 | dev_put(ifp->nd); | |
336 | } | |
e407a7f6 | 337 | |
896831f5 EC |
338 | head = &t->ffree; |
339 | list_for_each_safe(pos, nx, head) { | |
340 | list_del(pos); | |
341 | f = list_entry(pos, struct frame, head); | |
9bb237b6 | 342 | skbfree(f->skb); |
896831f5 EC |
343 | kfree(f); |
344 | } | |
68e0d42f EC |
345 | kfree(t); |
346 | } | |
347 | ||
1da177e4 LT |
348 | void |
349 | aoedev_exit(void) | |
350 | { | |
351 | struct aoedev *d; | |
352 | ulong flags; | |
353 | ||
69cf2d85 | 354 | aoe_flush_iocq(); |
1da177e4 LT |
355 | while ((d = devlist)) { |
356 | devlist = d->next; | |
357 | ||
358 | spin_lock_irqsave(&d->lock, flags); | |
359 | aoedev_downdev(d); | |
3ae1c24e | 360 | d->flags |= DEVFL_TKILL; |
1da177e4 LT |
361 | spin_unlock_irqrestore(&d->lock, flags); |
362 | ||
363 | del_timer_sync(&d->timer); | |
364 | aoedev_freedev(d); | |
365 | } | |
366 | } | |
367 | ||
368 | int __init | |
369 | aoedev_init(void) | |
370 | { | |
1da177e4 LT |
371 | return 0; |
372 | } |