2 * Intel MIC Platform Software Stack (MPSS)
4 * Copyright(c) 2013 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
18 * Intel MIC Host driver.
21 #include <linux/pci.h>
22 #include <linux/sched.h>
23 #include <linux/uaccess.h>
25 #include <linux/mic_common.h>
26 #include "../common/mic_dev.h"
27 #include "mic_device.h"
29 #include "mic_virtio.h"
32 * Initiates the copies across the PCIe bus from card memory to
33 * a user space buffer.
35 static int mic_virtio_copy_to_user(struct mic_vdev
*mvdev
,
36 void __user
*ubuf
, size_t len
, u64 addr
)
39 void __iomem
*dbuf
= mvdev
->mdev
->aper
.va
+ addr
;
41 * We are copying from IO below an should ideally use something
42 * like copy_to_user_fromio(..) if it existed.
44 if (copy_to_user(ubuf
, dbuf
, len
)) {
46 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
47 __func__
, __LINE__
, err
);
50 mvdev
->in_bytes
+= len
;
57 * Initiates copies across the PCIe bus from a user space
58 * buffer to card memory.
60 static int mic_virtio_copy_from_user(struct mic_vdev
*mvdev
,
61 void __user
*ubuf
, size_t len
, u64 addr
)
64 void __iomem
*dbuf
= mvdev
->mdev
->aper
.va
+ addr
;
66 * We are copying to IO below and should ideally use something
67 * like copy_from_user_toio(..) if it existed.
69 if (copy_from_user(dbuf
, ubuf
, len
)) {
71 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
72 __func__
, __LINE__
, err
);
75 mvdev
->out_bytes
+= len
;
81 #define MIC_VRINGH_READ true
83 /* The function to call to notify the card about added buffers */
84 static void mic_notify(struct vringh
*vrh
)
86 struct mic_vringh
*mvrh
= container_of(vrh
, struct mic_vringh
, vrh
);
87 struct mic_vdev
*mvdev
= mvrh
->mvdev
;
88 s8 db
= mvdev
->dc
->h2c_vdev_db
;
91 mvdev
->mdev
->ops
->send_intr(mvdev
->mdev
, db
);
94 /* Determine the total number of bytes consumed in a VRINGH KIOV */
95 static inline u32
mic_vringh_iov_consumed(struct vringh_kiov
*iov
)
98 u32 total
= iov
->consumed
;
100 for (i
= 0; i
< iov
->i
; i
++)
101 total
+= iov
->iov
[i
].iov_len
;
106 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
107 * This API is heavily based on the vringh_iov_xfer(..) implementation
108 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
109 * and vringh_iov_push_kern(..) directly is because there is no
110 * way to override the VRINGH xfer(..) routines as of v3.10.
112 static int mic_vringh_copy(struct mic_vdev
*mvdev
, struct vringh_kiov
*iov
,
113 void __user
*ubuf
, size_t len
, bool read
, size_t *out_len
)
116 size_t partlen
, tot_len
= 0;
118 while (len
&& iov
->i
< iov
->used
) {
119 partlen
= min(iov
->iov
[iov
->i
].iov_len
, len
);
121 ret
= mic_virtio_copy_to_user(mvdev
,
123 (u64
)iov
->iov
[iov
->i
].iov_base
);
125 ret
= mic_virtio_copy_from_user(mvdev
,
127 (u64
)iov
->iov
[iov
->i
].iov_base
);
129 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
130 __func__
, __LINE__
, ret
);
136 iov
->consumed
+= partlen
;
137 iov
->iov
[iov
->i
].iov_len
-= partlen
;
138 iov
->iov
[iov
->i
].iov_base
+= partlen
;
139 if (!iov
->iov
[iov
->i
].iov_len
) {
140 /* Fix up old iov element then increment. */
141 iov
->iov
[iov
->i
].iov_len
= iov
->consumed
;
142 iov
->iov
[iov
->i
].iov_base
-= iov
->consumed
;
153 * Use the standard VRINGH infrastructure in the kernel to fetch new
154 * descriptors, initiate the copies and update the used ring.
156 static int _mic_virtio_copy(struct mic_vdev
*mvdev
,
157 struct mic_copy_desc
*copy
)
159 int ret
= 0, iovcnt
= copy
->iovcnt
;
161 struct iovec __user
*u_iov
= copy
->iov
;
162 void __user
*ubuf
= NULL
;
163 struct mic_vringh
*mvr
= &mvdev
->mvr
[copy
->vr_idx
];
164 struct vringh_kiov
*riov
= &mvr
->riov
;
165 struct vringh_kiov
*wiov
= &mvr
->wiov
;
166 struct vringh
*vrh
= &mvr
->vrh
;
167 u16
*head
= &mvr
->head
;
168 struct mic_vring
*vr
= &mvr
->vring
;
169 size_t len
= 0, out_len
;
172 /* Fetch a new IOVEC if all previous elements have been processed */
173 if (riov
->i
== riov
->used
&& wiov
->i
== wiov
->used
) {
174 ret
= vringh_getdesc_kern(vrh
, riov
, wiov
,
176 /* Check if there are available descriptors */
182 /* Copy over a new iovec from user space. */
183 ret
= copy_from_user(&iov
, u_iov
, sizeof(*u_iov
));
186 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
187 __func__
, __LINE__
, ret
);
193 /* Issue all the read descriptors first */
194 ret
= mic_vringh_copy(mvdev
, riov
, ubuf
, len
,
195 MIC_VRINGH_READ
, &out_len
);
197 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
198 __func__
, __LINE__
, ret
);
203 copy
->out_len
+= out_len
;
204 /* Issue the write descriptors next */
205 ret
= mic_vringh_copy(mvdev
, wiov
, ubuf
, len
,
206 !MIC_VRINGH_READ
, &out_len
);
208 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
209 __func__
, __LINE__
, ret
);
214 copy
->out_len
+= out_len
;
216 /* One user space iovec is now completed */
220 /* Exit loop if all elements in KIOVs have been processed. */
221 if (riov
->i
== riov
->used
&& wiov
->i
== wiov
->used
)
225 * Update the used ring if a descriptor was available and some data was
226 * copied in/out and the user asked for a used ring update.
228 if (*head
!= USHRT_MAX
&& copy
->out_len
&& copy
->update_used
) {
231 /* Determine the total data consumed */
232 total
+= mic_vringh_iov_consumed(riov
);
233 total
+= mic_vringh_iov_consumed(wiov
);
234 vringh_complete_kern(vrh
, *head
, total
);
236 if (vringh_need_notify_kern(vrh
) > 0)
238 vringh_kiov_cleanup(riov
);
239 vringh_kiov_cleanup(wiov
);
240 /* Update avail idx for user space */
241 vr
->info
->avail_idx
= vrh
->last_avail_idx
;
246 static inline int mic_verify_copy_args(struct mic_vdev
*mvdev
,
247 struct mic_copy_desc
*copy
)
249 if (copy
->vr_idx
>= mvdev
->dd
->num_vq
) {
250 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
251 __func__
, __LINE__
, -EINVAL
);
257 /* Copy a specified number of virtio descriptors in a chain */
258 int mic_virtio_copy_desc(struct mic_vdev
*mvdev
,
259 struct mic_copy_desc
*copy
)
262 struct mic_vringh
*mvr
= &mvdev
->mvr
[copy
->vr_idx
];
264 err
= mic_verify_copy_args(mvdev
, copy
);
268 mutex_lock(&mvr
->vr_mutex
);
269 if (!mic_vdevup(mvdev
)) {
271 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
272 __func__
, __LINE__
, err
);
275 err
= _mic_virtio_copy(mvdev
, copy
);
277 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
278 __func__
, __LINE__
, err
);
281 mutex_unlock(&mvr
->vr_mutex
);
285 static void mic_virtio_init_post(struct mic_vdev
*mvdev
)
287 struct mic_vqconfig
*vqconfig
= mic_vq_config(mvdev
->dd
);
290 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++) {
291 if (!le64_to_cpu(vqconfig
[i
].used_address
)) {
292 dev_warn(mic_dev(mvdev
), "used_address zero??\n");
295 mvdev
->mvr
[i
].vrh
.vring
.used
=
296 mvdev
->mdev
->aper
.va
+
297 le64_to_cpu(vqconfig
[i
].used_address
);
300 mvdev
->dc
->used_address_updated
= 0;
302 dev_dbg(mic_dev(mvdev
), "%s: device type %d LINKUP\n",
303 __func__
, mvdev
->virtio_id
);
306 static inline void mic_virtio_device_reset(struct mic_vdev
*mvdev
)
310 dev_dbg(mic_dev(mvdev
), "%s: status %d device type %d RESET\n",
311 __func__
, mvdev
->dd
->status
, mvdev
->virtio_id
);
313 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++)
315 * Avoid lockdep false positive. The + 1 is for the mic
316 * mutex which is held in the reset devices code path.
318 mutex_lock_nested(&mvdev
->mvr
[i
].vr_mutex
, i
+ 1);
320 /* 0 status means "reset" */
321 mvdev
->dd
->status
= 0;
322 mvdev
->dc
->vdev_reset
= 0;
323 mvdev
->dc
->host_ack
= 1;
325 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++) {
326 struct vringh
*vrh
= &mvdev
->mvr
[i
].vrh
;
327 mvdev
->mvr
[i
].vring
.info
->avail_idx
= 0;
329 vrh
->last_avail_idx
= 0;
330 vrh
->last_used_idx
= 0;
333 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++)
334 mutex_unlock(&mvdev
->mvr
[i
].vr_mutex
);
337 void mic_virtio_reset_devices(struct mic_device
*mdev
)
339 struct list_head
*pos
, *tmp
;
340 struct mic_vdev
*mvdev
;
342 dev_dbg(mdev
->sdev
->parent
, "%s\n", __func__
);
344 list_for_each_safe(pos
, tmp
, &mdev
->vdev_list
) {
345 mvdev
= list_entry(pos
, struct mic_vdev
, list
);
346 mic_virtio_device_reset(mvdev
);
347 mvdev
->poll_wake
= 1;
348 wake_up(&mvdev
->waitq
);
352 void mic_bh_handler(struct work_struct
*work
)
354 struct mic_vdev
*mvdev
= container_of(work
, struct mic_vdev
,
357 if (mvdev
->dc
->used_address_updated
)
358 mic_virtio_init_post(mvdev
);
360 if (mvdev
->dc
->vdev_reset
)
361 mic_virtio_device_reset(mvdev
);
363 mvdev
->poll_wake
= 1;
364 wake_up(&mvdev
->waitq
);
367 static irqreturn_t
mic_virtio_intr_handler(int irq
, void *data
)
369 struct mic_vdev
*mvdev
= data
;
370 struct mic_device
*mdev
= mvdev
->mdev
;
372 mdev
->ops
->ack_interrupt(mdev
);
373 schedule_work(&mvdev
->virtio_bh_work
);
377 int mic_virtio_config_change(struct mic_vdev
*mvdev
,
380 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake
);
381 int ret
= 0, retry
= 100, i
;
382 struct mic_bootparam
*bootparam
= mvdev
->mdev
->dp
;
383 s8 db
= bootparam
->h2c_config_db
;
385 mutex_lock(&mvdev
->mdev
->mic_mutex
);
386 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++)
387 mutex_lock_nested(&mvdev
->mvr
[i
].vr_mutex
, i
+ 1);
389 if (db
== -1 || mvdev
->dd
->type
== -1) {
394 if (copy_from_user(mic_vq_configspace(mvdev
->dd
),
395 argp
, mvdev
->dd
->config_len
)) {
396 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
397 __func__
, __LINE__
, -EFAULT
);
401 mvdev
->dc
->config_change
= MIC_VIRTIO_PARAM_CONFIG_CHANGED
;
402 mvdev
->mdev
->ops
->send_intr(mvdev
->mdev
, db
);
404 for (i
= retry
; i
--;) {
405 ret
= wait_event_timeout(wake
,
406 mvdev
->dc
->guest_ack
, msecs_to_jiffies(100));
411 dev_dbg(mic_dev(mvdev
),
412 "%s %d retry: %d\n", __func__
, __LINE__
, retry
);
413 mvdev
->dc
->config_change
= 0;
414 mvdev
->dc
->guest_ack
= 0;
416 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++)
417 mutex_unlock(&mvdev
->mvr
[i
].vr_mutex
);
418 mutex_unlock(&mvdev
->mdev
->mic_mutex
);
422 static int mic_copy_dp_entry(struct mic_vdev
*mvdev
,
425 struct mic_device_desc
**devpage
)
427 struct mic_device
*mdev
= mvdev
->mdev
;
428 struct mic_device_desc dd
, *dd_config
, *devp
;
429 struct mic_vqconfig
*vqconfig
;
431 bool slot_found
= false;
433 if (copy_from_user(&dd
, argp
, sizeof(dd
))) {
434 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
435 __func__
, __LINE__
, -EFAULT
);
439 if (mic_aligned_desc_size(&dd
) > MIC_MAX_DESC_BLK_SIZE
||
440 dd
.num_vq
> MIC_MAX_VRINGS
) {
441 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
442 __func__
, __LINE__
, -EINVAL
);
446 dd_config
= kmalloc(mic_desc_size(&dd
), GFP_KERNEL
);
447 if (dd_config
== NULL
) {
448 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
449 __func__
, __LINE__
, -ENOMEM
);
452 if (copy_from_user(dd_config
, argp
, mic_desc_size(&dd
))) {
454 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
455 __func__
, __LINE__
, ret
);
459 vqconfig
= mic_vq_config(dd_config
);
460 for (i
= 0; i
< dd
.num_vq
; i
++) {
461 if (le16_to_cpu(vqconfig
[i
].num
) > MIC_MAX_VRING_ENTRIES
) {
463 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
464 __func__
, __LINE__
, ret
);
469 /* Find the first free device page entry */
470 for (i
= mic_aligned_size(struct mic_bootparam
);
471 i
< MIC_DP_SIZE
- mic_total_desc_size(dd_config
);
472 i
+= mic_total_desc_size(devp
)) {
474 if (devp
->type
== 0 || devp
->type
== -1) {
481 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
482 __func__
, __LINE__
, ret
);
486 * Save off the type before doing the memcpy. Type will be set in the
487 * end after completing all initialization for the new device.
489 *type
= dd_config
->type
;
491 memcpy(devp
, dd_config
, mic_desc_size(dd_config
));
499 static void mic_init_device_ctrl(struct mic_vdev
*mvdev
,
500 struct mic_device_desc
*devpage
)
502 struct mic_device_ctrl
*dc
;
504 dc
= (void *)devpage
+ mic_aligned_desc_size(devpage
);
506 dc
->config_change
= 0;
510 dc
->used_address_updated
= 0;
511 dc
->c2h_vdev_db
= -1;
512 dc
->h2c_vdev_db
= -1;
516 int mic_virtio_add_device(struct mic_vdev
*mvdev
,
519 struct mic_device
*mdev
= mvdev
->mdev
;
520 struct mic_device_desc
*dd
= NULL
;
521 struct mic_vqconfig
*vqconfig
;
522 int vr_size
, i
, j
, ret
;
526 struct mic_bootparam
*bootparam
= mdev
->dp
;
529 mutex_lock(&mdev
->mic_mutex
);
531 ret
= mic_copy_dp_entry(mvdev
, argp
, &type
, &dd
);
533 mutex_unlock(&mdev
->mic_mutex
);
537 mic_init_device_ctrl(mvdev
, dd
);
540 mvdev
->virtio_id
= type
;
541 vqconfig
= mic_vq_config(dd
);
542 INIT_WORK(&mvdev
->virtio_bh_work
, mic_bh_handler
);
544 for (i
= 0; i
< dd
->num_vq
; i
++) {
545 struct mic_vringh
*mvr
= &mvdev
->mvr
[i
];
546 struct mic_vring
*vr
= &mvdev
->mvr
[i
].vring
;
547 num
= le16_to_cpu(vqconfig
[i
].num
);
548 mutex_init(&mvr
->vr_mutex
);
549 vr_size
= PAGE_ALIGN(vring_size(num
, MIC_VIRTIO_RING_ALIGN
) +
550 sizeof(struct _mic_vring_info
));
552 __get_free_pages(GFP_KERNEL
| __GFP_ZERO
,
556 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
557 __func__
, __LINE__
, ret
);
561 vr
->info
= vr
->va
+ vring_size(num
, MIC_VIRTIO_RING_ALIGN
);
562 vr
->info
->magic
= MIC_MAGIC
+ mvdev
->virtio_id
+ i
;
563 vqconfig
[i
].address
= mic_map_single(mdev
,
565 if (mic_map_error(vqconfig
[i
].address
)) {
566 free_pages((unsigned long)vr
->va
, get_order(vr_size
));
568 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
569 __func__
, __LINE__
, ret
);
572 vqconfig
[i
].address
= cpu_to_le64(vqconfig
[i
].address
);
574 vring_init(&vr
->vr
, num
, vr
->va
, MIC_VIRTIO_RING_ALIGN
);
575 ret
= vringh_init_kern(&mvr
->vrh
,
576 *(u32
*)mic_vq_features(mvdev
->dd
), num
, false,
577 vr
->vr
.desc
, vr
->vr
.avail
, vr
->vr
.used
);
579 dev_err(mic_dev(mvdev
), "%s %d err %d\n",
580 __func__
, __LINE__
, ret
);
583 vringh_kiov_init(&mvr
->riov
, NULL
, 0);
584 vringh_kiov_init(&mvr
->wiov
, NULL
, 0);
585 mvr
->head
= USHRT_MAX
;
587 mvr
->vrh
.notify
= mic_notify
;
588 dev_dbg(mdev
->sdev
->parent
,
589 "%s %d index %d va %p info %p vr_size 0x%x\n",
590 __func__
, __LINE__
, i
, vr
->va
, vr
->info
, vr_size
);
593 snprintf(irqname
, sizeof(irqname
), "mic%dvirtio%d", mdev
->id
,
595 mvdev
->virtio_db
= mic_next_db(mdev
);
596 mvdev
->virtio_cookie
= mic_request_irq(mdev
, mic_virtio_intr_handler
,
597 irqname
, mvdev
, mvdev
->virtio_db
, MIC_INTR_DB
);
598 if (IS_ERR(mvdev
->virtio_cookie
)) {
599 ret
= PTR_ERR(mvdev
->virtio_cookie
);
600 dev_dbg(mdev
->sdev
->parent
, "request irq failed\n");
604 mvdev
->dc
->c2h_vdev_db
= mvdev
->virtio_db
;
606 list_add_tail(&mvdev
->list
, &mdev
->vdev_list
);
608 * Order the type update with previous stores. This write barrier
609 * is paired with the corresponding read barrier before the uncached
610 * system memory read of the type, on the card while scanning the
616 dev_dbg(mdev
->sdev
->parent
, "Added virtio device id %d\n", dd
->type
);
618 db
= bootparam
->h2c_config_db
;
620 mdev
->ops
->send_intr(mdev
, db
);
621 mutex_unlock(&mdev
->mic_mutex
);
624 vqconfig
= mic_vq_config(dd
);
625 for (j
= 0; j
< i
; j
++) {
626 struct mic_vringh
*mvr
= &mvdev
->mvr
[j
];
627 mic_unmap_single(mdev
, le64_to_cpu(vqconfig
[j
].address
),
629 free_pages((unsigned long)mvr
->vring
.va
,
630 get_order(mvr
->vring
.len
));
632 mutex_unlock(&mdev
->mic_mutex
);
636 void mic_virtio_del_device(struct mic_vdev
*mvdev
)
638 struct list_head
*pos
, *tmp
;
639 struct mic_vdev
*tmp_mvdev
;
640 struct mic_device
*mdev
= mvdev
->mdev
;
641 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake
);
642 int i
, ret
, retry
= 100;
643 struct mic_vqconfig
*vqconfig
;
644 struct mic_bootparam
*bootparam
= mdev
->dp
;
647 mutex_lock(&mdev
->mic_mutex
);
648 db
= bootparam
->h2c_config_db
;
650 goto skip_hot_remove
;
651 dev_dbg(mdev
->sdev
->parent
,
652 "Requesting hot remove id %d\n", mvdev
->virtio_id
);
653 mvdev
->dc
->config_change
= MIC_VIRTIO_PARAM_DEV_REMOVE
;
654 mdev
->ops
->send_intr(mdev
, db
);
655 for (i
= retry
; i
--;) {
656 ret
= wait_event_timeout(wake
,
657 mvdev
->dc
->guest_ack
, msecs_to_jiffies(100));
661 dev_dbg(mdev
->sdev
->parent
,
662 "Device id %d config_change %d guest_ack %d\n",
663 mvdev
->virtio_id
, mvdev
->dc
->config_change
,
664 mvdev
->dc
->guest_ack
);
665 mvdev
->dc
->config_change
= 0;
666 mvdev
->dc
->guest_ack
= 0;
668 mic_free_irq(mdev
, mvdev
->virtio_cookie
, mvdev
);
669 flush_work(&mvdev
->virtio_bh_work
);
670 vqconfig
= mic_vq_config(mvdev
->dd
);
671 for (i
= 0; i
< mvdev
->dd
->num_vq
; i
++) {
672 struct mic_vringh
*mvr
= &mvdev
->mvr
[i
];
673 vringh_kiov_cleanup(&mvr
->riov
);
674 vringh_kiov_cleanup(&mvr
->wiov
);
675 mic_unmap_single(mdev
, le64_to_cpu(vqconfig
[i
].address
),
677 free_pages((unsigned long)mvr
->vring
.va
,
678 get_order(mvr
->vring
.len
));
681 list_for_each_safe(pos
, tmp
, &mdev
->vdev_list
) {
682 tmp_mvdev
= list_entry(pos
, struct mic_vdev
, list
);
683 if (tmp_mvdev
== mvdev
) {
685 dev_dbg(mdev
->sdev
->parent
,
686 "Removing virtio device id %d\n",
692 * Order the type update with previous stores. This write barrier
693 * is paired with the corresponding read barrier before the uncached
694 * system memory read of the type, on the card while scanning the
698 mvdev
->dd
->type
= -1;
699 mutex_unlock(&mdev
->mic_mutex
);