This separates nvme queue allocation from creation, and queue deletion
from freeing. This is so that we may in the future temporarily disable
queues and reuse the same memory when bringing them back online, like
coming back from suspend state.
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
u16 cq_head;
u8 cq_phase;
u8 cqe_seen;
u16 cq_head;
u8 cq_phase;
u8 cqe_seen;
unsigned long cmdid_data[];
};
unsigned long cmdid_data[];
};
return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)];
}
return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)];
}
+static unsigned nvme_queue_extra(int depth)
+{
+ return DIV_ROUND_UP(depth, 8) + (depth * sizeof(struct nvme_cmd_info));
+}
+
/**
* alloc_cmdid() - Allocate a Command ID
* @nvmeq: The queue that will be used for this command
/**
* alloc_cmdid() - Allocate a Command ID
* @nvmeq: The queue that will be used for this command
int result = -EBUSY;
spin_lock_irq(&nvmeq->q_lock);
int result = -EBUSY;
spin_lock_irq(&nvmeq->q_lock);
- if (bio_list_empty(&nvmeq->sq_cong))
+ if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong))
result = nvme_submit_bio_queue(nvmeq, ns, bio);
if (unlikely(result)) {
if (bio_list_empty(&nvmeq->sq_cong))
result = nvme_submit_bio_queue(nvmeq, ns, bio);
if (unlikely(result)) {
if (bio_list_empty(&nvmeq->sq_cong))
-static void nvme_free_queue_mem(struct nvme_queue *nvmeq)
+static void nvme_free_queue(struct nvme_queue *nvmeq)
+ spin_lock_irq(&nvmeq->q_lock);
+ while (bio_list_peek(&nvmeq->sq_cong)) {
+ struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
+ bio_endio(bio, -EIO);
+ }
+ spin_unlock_irq(&nvmeq->q_lock);
+
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
-static void nvme_free_queue(struct nvme_dev *dev, int qid)
+static void nvme_free_queues(struct nvme_dev *dev)
+{
+ int i;
+
+ for (i = dev->queue_count - 1; i >= 0; i--) {
+ nvme_free_queue(dev->queues[i]);
+ dev->queue_count--;
+ dev->queues[i] = NULL;
+ }
+}
+
+static void nvme_disable_queue(struct nvme_dev *dev, int qid)
{
struct nvme_queue *nvmeq = dev->queues[qid];
int vector = dev->entry[nvmeq->cq_vector].vector;
spin_lock_irq(&nvmeq->q_lock);
{
struct nvme_queue *nvmeq = dev->queues[qid];
int vector = dev->entry[nvmeq->cq_vector].vector;
spin_lock_irq(&nvmeq->q_lock);
- nvme_cancel_ios(nvmeq, false);
- while (bio_list_peek(&nvmeq->sq_cong)) {
- struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
- bio_endio(bio, -EIO);
+ if (nvmeq->q_suspended) {
+ spin_unlock_irq(&nvmeq->q_lock);
+ return;
+ nvmeq->q_suspended = 1;
spin_unlock_irq(&nvmeq->q_lock);
irq_set_affinity_hint(vector, NULL);
spin_unlock_irq(&nvmeq->q_lock);
irq_set_affinity_hint(vector, NULL);
adapter_delete_cq(dev, qid);
}
adapter_delete_cq(dev, qid);
}
- nvme_free_queue_mem(nvmeq);
+ spin_lock_irq(&nvmeq->q_lock);
+ nvme_process_cq(nvmeq);
+ nvme_cancel_ios(nvmeq, false);
+ spin_unlock_irq(&nvmeq->q_lock);
}
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth, int vector)
{
struct device *dmadev = &dev->pci_dev->dev;
}
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth, int vector)
{
struct device *dmadev = &dev->pci_dev->dev;
- unsigned extra = DIV_ROUND_UP(depth, 8) + (depth *
- sizeof(struct nvme_cmd_info));
+ unsigned extra = nvme_queue_extra(depth);
struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
if (!nvmeq)
return NULL;
struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
if (!nvmeq)
return NULL;
nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
nvmeq->q_depth = depth;
nvmeq->cq_vector = vector;
nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
nvmeq->q_depth = depth;
nvmeq->cq_vector = vector;
+ nvmeq->q_suspended = 1;
+ dev->queue_count++;
IRQF_DISABLED | IRQF_SHARED, name, nvmeq);
}
IRQF_DISABLED | IRQF_SHARED, name, nvmeq);
}
-static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid,
- int cq_size, int vector)
+static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
- int result;
- struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector);
+ struct nvme_dev *dev = nvmeq->dev;
+ unsigned extra = nvme_queue_extra(nvmeq->q_depth);
- if (!nvmeq)
- return ERR_PTR(-ENOMEM);
+ nvmeq->sq_tail = 0;
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = 1;
+ nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
+ memset(nvmeq->cmdid_data, 0, extra);
+ memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
+ nvme_cancel_ios(nvmeq, false);
+ nvmeq->q_suspended = 0;
+}
+
+static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
+{
+ struct nvme_dev *dev = nvmeq->dev;
+ int result;
result = adapter_alloc_cq(dev, qid, nvmeq);
if (result < 0)
result = adapter_alloc_cq(dev, qid, nvmeq);
if (result < 0)
result = adapter_alloc_sq(dev, qid, nvmeq);
if (result < 0)
result = adapter_alloc_sq(dev, qid, nvmeq);
if (result < 0)
if (result < 0)
goto release_sq;
if (result < 0)
goto release_sq;
+ spin_lock(&nvmeq->q_lock);
+ nvme_init_queue(nvmeq, qid);
+ spin_unlock(&nvmeq->q_lock);
+
+ return result;
release_sq:
adapter_delete_sq(dev, qid);
release_cq:
adapter_delete_cq(dev, qid);
release_sq:
adapter_delete_sq(dev, qid);
release_cq:
adapter_delete_cq(dev, qid);
- free_nvmeq:
- dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
- (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
- dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
- nvmeq->sq_cmds, nvmeq->sq_dma_addr);
- kfree(nvmeq);
- return ERR_PTR(result);
}
static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
}
static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
goto free_q;
dev->queues[0] = nvmeq;
goto free_q;
dev->queues[0] = nvmeq;
+ spin_lock(&nvmeq->q_lock);
+ nvme_init_queue(nvmeq, 0);
+ spin_unlock(&nvmeq->q_lock);
- nvme_free_queue_mem(nvmeq);
+ nvme_free_queue(nvmeq);
put_nvmeq(nvmeq);
if (length != (io.nblocks + 1) << ns->lba_shift)
status = -ENOMEM;
put_nvmeq(nvmeq);
if (length != (io.nblocks + 1) << ns->lba_shift)
status = -ENOMEM;
+ else if (!nvmeq || nvmeq->q_suspended)
+ status = -EBUSY;
else
status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
else
status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
if (!nvmeq)
continue;
spin_lock_irq(&nvmeq->q_lock);
if (!nvmeq)
continue;
spin_lock_irq(&nvmeq->q_lock);
+ if (nvmeq->q_suspended)
+ goto unlock;
nvme_process_cq(nvmeq);
nvme_cancel_ios(nvmeq, true);
nvme_resubmit_bios(nvmeq);
nvme_process_cq(nvmeq);
nvme_cancel_ios(nvmeq, true);
nvme_resubmit_bios(nvmeq);
spin_unlock_irq(&nvmeq->q_lock);
}
}
spin_unlock_irq(&nvmeq->q_lock);
}
}
nr_io_queues = vecs;
result = queue_request_irq(dev, dev->queues[0], "nvme admin");
nr_io_queues = vecs;
result = queue_request_irq(dev, dev->queues[0], "nvme admin");
- /* XXX: handle failure here */
+ if (result)
+ goto free_queues;
cpu = cpumask_first(cpu_online_mask);
for (i = 0; i < nr_io_queues; i++) {
cpu = cpumask_first(cpu_online_mask);
for (i = 0; i < nr_io_queues; i++) {
q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1,
NVME_Q_DEPTH);
for (i = 0; i < nr_io_queues; i++) {
q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1,
NVME_Q_DEPTH);
for (i = 0; i < nr_io_queues; i++) {
- dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i);
- if (IS_ERR(dev->queues[i + 1]))
- return PTR_ERR(dev->queues[i + 1]);
- dev->queue_count++;
+ dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i);
+ if (!dev->queues[i + 1]) {
+ result = -ENOMEM;
+ goto free_queues;
+ }
}
for (; i < num_possible_cpus(); i++) {
}
for (; i < num_possible_cpus(); i++) {
dev->queues[i + 1] = dev->queues[target + 1];
}
dev->queues[i + 1] = dev->queues[target + 1];
}
+ for (i = 1; i < dev->queue_count; i++) {
+ result = nvme_create_queue(dev->queues[i], i);
+ if (result) {
+ for (--i; i > 0; i--)
+ nvme_disable_queue(dev, i);
+ goto free_queues;
+ }
+ }
-static void nvme_free_queues(struct nvme_dev *dev)
-{
- int i;
- for (i = dev->queue_count - 1; i >= 0; i--)
- nvme_free_queue(dev, i);
+ free_queues:
+ nvme_free_queues(dev);
+ return result;
static int nvme_dev_remove(struct nvme_dev *dev)
{
struct nvme_ns *ns, *next;
static int nvme_dev_remove(struct nvme_dev *dev)
{
struct nvme_ns *ns, *next;
+ int i;
+
+ for (i = dev->queue_count - 1; i >= 0; i--)
+ nvme_disable_queue(dev, i);
spin_lock(&dev_list_lock);
list_del(&dev->node);
spin_lock(&dev_list_lock);
list_del(&dev->node);
result = nvme_configure_admin_queue(dev);
if (result)
goto unmap;
result = nvme_configure_admin_queue(dev);
if (result)
goto unmap;
spin_lock(&dev_list_lock);
list_add(&dev->node, &dev_list);
spin_lock(&dev_list_lock);
list_add(&dev->node, &dev_list);