#include "dm.h"
#include <linux/hash.h>
+#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/slab.h>
* sorted queue.
*/
#define NR_QUEUE_LEVELS 16u
+#define NR_SENTINELS NR_QUEUE_LEVELS * 3
+
+#define WRITEBACK_PERIOD HZ
struct queue {
unsigned nr_elts;
+ bool current_writeback_sentinels;
+ unsigned long next_writeback;
struct list_head qs[NR_QUEUE_LEVELS];
+ struct list_head sentinels[NR_SENTINELS];
};
static void queue_init(struct queue *q)
unsigned i;
q->nr_elts = 0;
- for (i = 0; i < NR_QUEUE_LEVELS; i++)
+ q->current_writeback_sentinels = false;
+ q->next_writeback = 0;
+ for (i = 0; i < NR_QUEUE_LEVELS; i++) {
INIT_LIST_HEAD(q->qs + i);
+ INIT_LIST_HEAD(q->sentinels + i);
+ INIT_LIST_HEAD(q->sentinels + NR_QUEUE_LEVELS + i);
+ INIT_LIST_HEAD(q->sentinels + (2 * NR_QUEUE_LEVELS) + i);
+ }
+}
+
+static unsigned queue_size(struct queue *q)
+{
+ return q->nr_elts;
}
static bool queue_empty(struct queue *q)
list_del(elt);
}
+static bool is_sentinel(struct queue *q, struct list_head *h)
+{
+ return (h >= q->sentinels) && (h < (q->sentinels + NR_SENTINELS));
+}
+
/*
* Gives us the oldest entry of the lowest popoulated level. If the first
* level is emptied then we shift down one level.
static struct list_head *queue_peek(struct queue *q)
{
unsigned level;
+ struct list_head *h;
for (level = 0; level < NR_QUEUE_LEVELS; level++)
- if (!list_empty(q->qs + level))
- return q->qs[level].next;
+ list_for_each(h, q->qs + level)
+ if (!is_sentinel(q, h))
+ return h;
return NULL;
}
return r;
}
+/*
+ * Pops an entry from a level that is not past a sentinel.
+ */
+static struct list_head *queue_pop_old(struct queue *q)
+{
+ unsigned level;
+ struct list_head *h;
+
+ for (level = 0; level < NR_QUEUE_LEVELS; level++)
+ list_for_each(h, q->qs + level) {
+ if (is_sentinel(q, h))
+ break;
+
+ q->nr_elts--;
+ list_del(h);
+ return h;
+ }
+
+ return NULL;
+}
+
static struct list_head *list_pop(struct list_head *lh)
{
struct list_head *r = lh->next;
return r;
}
+static struct list_head *writeback_sentinel(struct queue *q, unsigned level)
+{
+ if (q->current_writeback_sentinels)
+ return q->sentinels + NR_QUEUE_LEVELS + level;
+ else
+ return q->sentinels + 2 * NR_QUEUE_LEVELS + level;
+}
+
+static void queue_update_writeback_sentinels(struct queue *q)
+{
+ unsigned i;
+ struct list_head *h;
+
+ if (time_after(jiffies, q->next_writeback)) {
+ for (i = 0; i < NR_QUEUE_LEVELS; i++) {
+ h = writeback_sentinel(q, i);
+ list_del(h);
+ list_add_tail(h, q->qs + i);
+ }
+
+ q->next_writeback = jiffies + WRITEBACK_PERIOD;
+ q->current_writeback_sentinels = !q->current_writeback_sentinels;
+ }
+}
+
+/*
+ * Sometimes we want to iterate through entries that have been pushed since
+ * a certain event. We use sentinel entries on the queues to delimit these
+ * 'tick' events.
+ */
+static void queue_tick(struct queue *q)
+{
+ unsigned i;
+
+ for (i = 0; i < NR_QUEUE_LEVELS; i++) {
+ list_del(q->sentinels + i);
+ list_add_tail(q->sentinels + i, q->qs + i);
+ }
+}
+
+typedef void (*iter_fn)(struct list_head *, void *);
+static void queue_iterate_tick(struct queue *q, iter_fn fn, void *context)
+{
+ unsigned i;
+ struct list_head *h;
+
+ for (i = 0; i < NR_QUEUE_LEVELS; i++) {
+ list_for_each_prev(h, q->qs + i) {
+ if (is_sentinel(q, h))
+ break;
+
+ fn(h, context);
+ }
+ }
+}
+
/*----------------------------------------------------------------*/
/*
*/
bool dirty:1;
unsigned hit_count;
- unsigned generation;
- unsigned tick;
};
/*
*/
static void push(struct mq_policy *mq, struct entry *e)
{
- e->tick = mq->tick;
hash_insert(mq, e);
if (in_cache(mq, e))
return e;
}
-static struct entry *peek(struct queue *q)
+static struct entry *pop_old(struct mq_policy *mq, struct queue *q)
{
- struct list_head *h = queue_peek(q);
- return h ? container_of(h, struct entry, list) : NULL;
+ struct entry *e;
+ struct list_head *h = queue_pop_old(q);
+
+ if (!h)
+ return NULL;
+
+ e = container_of(h, struct entry, list);
+ hash_remove(e);
+
+ return e;
}
-/*
- * Has this entry already been updated?
- */
-static bool updated_this_tick(struct mq_policy *mq, struct entry *e)
+static struct entry *peek(struct queue *q)
{
- return mq->tick == e->tick;
+ struct list_head *h = queue_peek(q);
+ return h ? container_of(h, struct entry, list) : NULL;
}
/*
* Whenever we use an entry we bump up it's hit counter, and push it to the
* back to it's current level.
*/
-static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e)
+static void requeue(struct mq_policy *mq, struct entry *e)
{
- if (updated_this_tick(mq, e))
- return;
-
- e->hit_count++;
- mq->hit_count++;
check_generation(mq);
-
- /* generation adjustment, to stop the counts increasing forever. */
- /* FIXME: divide? */
- /* e->hit_count -= min(e->hit_count - 1, mq->generation - e->generation); */
- e->generation = mq->generation;
-
del(mq, e);
push(mq, e);
}
struct entry *e,
struct policy_result *result)
{
- requeue_and_update_tick(mq, e);
+ requeue(mq, e);
if (in_cache(mq, e)) {
result->op = POLICY_HIT;
new_e->oblock = e->oblock;
new_e->dirty = false;
new_e->hit_count = e->hit_count;
- new_e->generation = e->generation;
- new_e->tick = e->tick;
del(mq, e);
free_entry(&mq->pre_cache_pool, e);
int data_dir, struct policy_result *result)
{
int r = 0;
- bool updated = updated_this_tick(mq, e);
- if ((!discarded_oblock && updated) ||
- !should_promote(mq, e, discarded_oblock, data_dir)) {
- requeue_and_update_tick(mq, e);
+ if (!should_promote(mq, e, discarded_oblock, data_dir)) {
+ requeue(mq, e);
result->op = POLICY_MISS;
} else if (!can_migrate)
r = -EWOULDBLOCK;
else {
- requeue_and_update_tick(mq, e);
+ requeue(mq, e);
r = pre_cache_to_cache(mq, e, result);
}
e->dirty = false;
e->oblock = oblock;
e->hit_count = 1;
- e->generation = mq->generation;
push(mq, e);
}
e->oblock = oblock;
e->dirty = false;
e->hit_count = 1;
- e->generation = mq->generation;
push(mq, e);
result->cblock = infer_cblock(&mq->cache_pool, e);
kfree(mq);
}
+static void update_pre_cache_hits(struct list_head *h, void *context)
+{
+ struct entry *e = container_of(h, struct entry, list);
+ e->hit_count++;
+}
+
+static void update_cache_hits(struct list_head *h, void *context)
+{
+ struct mq_policy *mq = context;
+ struct entry *e = container_of(h, struct entry, list);
+ e->hit_count++;
+ mq->hit_count++;
+}
+
static void copy_tick(struct mq_policy *mq)
{
- unsigned long flags;
+ unsigned long flags, tick;
spin_lock_irqsave(&mq->tick_lock, flags);
- mq->tick = mq->tick_protected;
+ tick = mq->tick_protected;
+ if (tick != mq->tick) {
+ queue_iterate_tick(&mq->pre_cache, update_pre_cache_hits, mq);
+ queue_iterate_tick(&mq->cache_dirty, update_cache_hits, mq);
+ queue_iterate_tick(&mq->cache_clean, update_cache_hits, mq);
+ mq->tick = tick;
+ }
+
+ queue_tick(&mq->pre_cache);
+ queue_tick(&mq->cache_dirty);
+ queue_tick(&mq->cache_clean);
+ queue_update_writeback_sentinels(&mq->cache_dirty);
spin_unlock_irqrestore(&mq->tick_lock, flags);
}
e->oblock = oblock;
e->dirty = false; /* this gets corrected in a minute */
e->hit_count = hint_valid ? hint : 1;
- e->generation = mq->generation;
push(mq, e);
return 0;
{
int r;
unsigned level;
+ struct list_head *h;
struct entry *e;
for (level = 0; level < NR_QUEUE_LEVELS; level++)
- list_for_each_entry(e, q->qs + level, list) {
+ list_for_each(h, q->qs + level) {
+ if (is_sentinel(q, h))
+ continue;
+
+ e = container_of(h, struct entry, list);
r = fn(context, infer_cblock(&mq->cache_pool, e),
e->oblock, e->hit_count);
if (r)
return r;
}
+#define CLEAN_TARGET_PERCENTAGE 25
+
+static bool clean_target_met(struct mq_policy *mq)
+{
+ /*
+ * Cache entries may not be populated. So we're cannot rely on the
+ * size of the clean queue.
+ */
+ unsigned nr_clean = from_cblock(mq->cache_size) - queue_size(&mq->cache_dirty);
+ unsigned target = from_cblock(mq->cache_size) * CLEAN_TARGET_PERCENTAGE / 100;
+
+ return nr_clean >= target;
+}
+
static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock,
dm_cblock_t *cblock)
{
- struct entry *e = pop(mq, &mq->cache_dirty);
+ struct entry *e = pop_old(mq, &mq->cache_dirty);
+
+ if (!e && !clean_target_met(mq))
+ e = pop(mq, &mq->cache_dirty);
if (!e)
return -ENODATA;