netns xfrm: per-netns xfrm_state_byspi hash
[deliverable/linux.git] / net / xfrm / xfrm_state.c
index 0a8f09c3144c835fccbc1e3de4855b654c45a3b9..66ca1ef7f8eb55481b5b5224e0104ca4f96c84ce 100644 (file)
@@ -44,16 +44,6 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30;
 
 static DEFINE_SPINLOCK(xfrm_state_lock);
 
-/* Hash table to find appropriate SA towards given target (endpoint
- * of tunnel or destination of transport mode) allowed by selector.
- *
- * Main use is finding SA after policy selected tunnel or transport mode.
- * Also, it can be used by ah/esp icmp error handler to find offending SA.
- */
-static LIST_HEAD(xfrm_state_all);
-static struct hlist_head *xfrm_state_bydst __read_mostly;
-static struct hlist_head *xfrm_state_bysrc __read_mostly;
-static struct hlist_head *xfrm_state_byspi __read_mostly;
 static unsigned int xfrm_state_hmask __read_mostly;
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
 static unsigned int xfrm_state_num;
@@ -158,17 +148,17 @@ static void xfrm_hash_resize(struct work_struct *__unused)
 
        nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
        for (i = xfrm_state_hmask; i >= 0; i--)
-               xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
+               xfrm_hash_transfer(init_net.xfrm.state_bydst+i, ndst, nsrc, nspi,
                                   nhashmask);
 
-       odst = xfrm_state_bydst;
-       osrc = xfrm_state_bysrc;
-       ospi = xfrm_state_byspi;
+       odst = init_net.xfrm.state_bydst;
+       osrc = init_net.xfrm.state_bysrc;
+       ospi = init_net.xfrm.state_byspi;
        ohashmask = xfrm_state_hmask;
 
-       xfrm_state_bydst = ndst;
-       xfrm_state_bysrc = nsrc;
-       xfrm_state_byspi = nspi;
+       init_net.xfrm.state_bydst = ndst;
+       init_net.xfrm.state_bysrc = nsrc;
+       init_net.xfrm.state_byspi = nspi;
        xfrm_state_hmask = nhashmask;
 
        spin_unlock_bh(&xfrm_state_lock);
@@ -408,11 +398,10 @@ static void xfrm_state_gc_task(struct work_struct *data)
        struct hlist_head gc_list;
 
        spin_lock_bh(&xfrm_state_gc_lock);
-       gc_list.first = xfrm_state_gc_list.first;
-       INIT_HLIST_HEAD(&xfrm_state_gc_list);
+       hlist_move_list(&xfrm_state_gc_list, &gc_list);
        spin_unlock_bh(&xfrm_state_gc_lock);
 
-       hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
+       hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist)
                xfrm_state_gc_destroy(x);
 
        wake_up(&km_waitq);
@@ -505,16 +494,17 @@ out:
 
 static void xfrm_replay_timer_handler(unsigned long data);
 
-struct xfrm_state *xfrm_state_alloc(void)
+struct xfrm_state *xfrm_state_alloc(struct net *net)
 {
        struct xfrm_state *x;
 
        x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
 
        if (x) {
+               write_pnet(&x->xs_net, net);
                atomic_set(&x->refcnt, 1);
                atomic_set(&x->tunnel_users, 0);
-               INIT_LIST_HEAD(&x->all);
+               INIT_LIST_HEAD(&x->km.all);
                INIT_HLIST_NODE(&x->bydst);
                INIT_HLIST_NODE(&x->bysrc);
                INIT_HLIST_NODE(&x->byspi);
@@ -540,12 +530,8 @@ void __xfrm_state_destroy(struct xfrm_state *x)
 {
        WARN_ON(x->km.state != XFRM_STATE_DEAD);
 
-       spin_lock_bh(&xfrm_state_lock);
-       list_del(&x->all);
-       spin_unlock_bh(&xfrm_state_lock);
-
        spin_lock_bh(&xfrm_state_gc_lock);
-       hlist_add_head(&x->bydst, &xfrm_state_gc_list);
+       hlist_add_head(&x->gclist, &xfrm_state_gc_list);
        spin_unlock_bh(&xfrm_state_gc_lock);
        schedule_work(&xfrm_state_gc_work);
 }
@@ -558,6 +544,7 @@ int __xfrm_state_delete(struct xfrm_state *x)
        if (x->km.state != XFRM_STATE_DEAD) {
                x->km.state = XFRM_STATE_DEAD;
                spin_lock(&xfrm_state_lock);
+               list_del(&x->km.all);
                hlist_del(&x->bydst);
                hlist_del(&x->bysrc);
                if (x->id.spi)
@@ -599,7 +586,7 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
                struct hlist_node *entry;
                struct xfrm_state *x;
 
-               hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+               hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) {
                        if (xfrm_id_proto_match(x->id.proto, proto) &&
                           (err = security_xfrm_state_delete(x)) != 0) {
                                xfrm_audit_state_delete(x, 0,
@@ -634,7 +621,7 @@ int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
                struct hlist_node *entry;
                struct xfrm_state *x;
 restart:
-               hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+               hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) {
                        if (!xfrm_state_kern(x) &&
                            xfrm_id_proto_match(x->id.proto, proto)) {
                                xfrm_state_hold(x);
@@ -691,7 +678,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi,
        struct xfrm_state *x;
        struct hlist_node *entry;
 
-       hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
+       hlist_for_each_entry(x, entry, init_net.xfrm.state_byspi+h, byspi) {
                if (x->props.family != family ||
                    x->id.spi       != spi ||
                    x->id.proto     != proto)
@@ -723,7 +710,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm
        struct xfrm_state *x;
        struct hlist_node *entry;
 
-       hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
+       hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) {
                if (x->props.family != family ||
                    x->id.proto     != proto)
                        continue;
@@ -789,7 +776,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 
        spin_lock_bh(&xfrm_state_lock);
        h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
-       hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+       hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
                if (x->props.family == family &&
                    x->props.reqid == tmpl->reqid &&
                    !(x->props.flags & XFRM_STATE_WILDRECV) &&
@@ -839,7 +826,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
                        error = -EEXIST;
                        goto out;
                }
-               x = xfrm_state_alloc();
+               x = xfrm_state_alloc(&init_net);
                if (x == NULL) {
                        error = -ENOMEM;
                        goto out;
@@ -858,13 +845,13 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 
                if (km_query(x, tmpl, pol) == 0) {
                        x->km.state = XFRM_STATE_ACQ;
-                       list_add_tail(&x->all, &xfrm_state_all);
-                       hlist_add_head(&x->bydst, xfrm_state_bydst+h);
+                       list_add(&x->km.all, &init_net.xfrm.state_all);
+                       hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h);
                        h = xfrm_src_hash(daddr, saddr, family);
-                       hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
+                       hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h);
                        if (x->id.spi) {
                                h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
-                               hlist_add_head(&x->byspi, xfrm_state_byspi+h);
+                               hlist_add_head(&x->byspi, init_net.xfrm.state_byspi+h);
                        }
                        x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
                        x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
@@ -899,7 +886,7 @@ xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 
        spin_lock(&xfrm_state_lock);
        h = xfrm_dst_hash(daddr, saddr, reqid, family);
-       hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+       hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
                if (x->props.family == family &&
                    x->props.reqid == reqid &&
                    !(x->props.flags & XFRM_STATE_WILDRECV) &&
@@ -927,20 +914,20 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 
        x->genid = ++xfrm_state_genid;
 
-       list_add_tail(&x->all, &xfrm_state_all);
+       list_add(&x->km.all, &init_net.xfrm.state_all);
 
        h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
                          x->props.reqid, x->props.family);
-       hlist_add_head(&x->bydst, xfrm_state_bydst+h);
+       hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h);
 
        h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
-       hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
+       hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h);
 
        if (x->id.spi) {
                h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
                                  x->props.family);
 
-               hlist_add_head(&x->byspi, xfrm_state_byspi+h);
+               hlist_add_head(&x->byspi, init_net.xfrm.state_byspi+h);
        }
 
        mod_timer(&x->timer, jiffies + HZ);
@@ -964,7 +951,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
        unsigned int h;
 
        h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
-       hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+       hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
                if (x->props.family     == family &&
                    x->props.reqid      == reqid &&
                    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
@@ -989,7 +976,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
        struct hlist_node *entry;
        struct xfrm_state *x;
 
-       hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+       hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
                if (x->props.reqid  != reqid ||
                    x->props.mode   != mode ||
                    x->props.family != family ||
@@ -1021,7 +1008,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
        if (!create)
                return NULL;
 
-       x = xfrm_state_alloc();
+       x = xfrm_state_alloc(&init_net);
        if (likely(x)) {
                switch (family) {
                case AF_INET:
@@ -1056,10 +1043,10 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
                xfrm_state_hold(x);
                x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
                add_timer(&x->timer);
-               list_add_tail(&x->all, &xfrm_state_all);
-               hlist_add_head(&x->bydst, xfrm_state_bydst+h);
+               list_add(&x->km.all, &init_net.xfrm.state_all);
+               hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h);
                h = xfrm_src_hash(daddr, saddr, family);
-               hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
+               hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h);
 
                xfrm_state_num++;
 
@@ -1129,7 +1116,7 @@ EXPORT_SYMBOL(xfrm_state_add);
 static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
 {
        int err = -ENOMEM;
-       struct xfrm_state *x = xfrm_state_alloc();
+       struct xfrm_state *x = xfrm_state_alloc(&init_net);
        if (!x)
                goto error;
 
@@ -1212,7 +1199,7 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
        if (m->reqid) {
                h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr,
                                  m->reqid, m->old_family);
-               hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
+               hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) {
                        if (x->props.mode != m->mode ||
                            x->id.proto != m->proto)
                                continue;
@@ -1229,7 +1216,7 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
        } else {
                h = xfrm_src_hash(&m->old_daddr, &m->old_saddr,
                                  m->old_family);
-               hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
+               hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) {
                        if (x->props.mode != m->mode ||
                            x->id.proto != m->proto)
                                continue;
@@ -1461,7 +1448,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
                struct hlist_node *entry;
                struct xfrm_state *x;
 
-               hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
+               hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) {
                        if (x->km.seq == seq &&
                            x->km.state == XFRM_STATE_ACQ) {
                                xfrm_state_hold(x);
@@ -1536,7 +1523,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
        if (x->id.spi) {
                spin_lock_bh(&xfrm_state_lock);
                h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
-               hlist_add_head(&x->byspi, xfrm_state_byspi+h);
+               hlist_add_head(&x->byspi, init_net.xfrm.state_byspi+h);
                spin_unlock_bh(&xfrm_state_lock);
 
                err = 0;
@@ -1553,47 +1540,62 @@ int xfrm_state_walk(struct xfrm_state_walk *walk,
                    int (*func)(struct xfrm_state *, int, void*),
                    void *data)
 {
-       struct xfrm_state *old, *x, *last = NULL;
+       struct xfrm_state *state;
+       struct xfrm_state_walk *x;
        int err = 0;
 
-       if (walk->state == NULL && walk->count != 0)
+       if (walk->seq != 0 && list_empty(&walk->all))
                return 0;
 
-       old = x = walk->state;
-       walk->state = NULL;
        spin_lock_bh(&xfrm_state_lock);
-       if (x == NULL)
-               x = list_first_entry(&xfrm_state_all, struct xfrm_state, all);
-       list_for_each_entry_from(x, &xfrm_state_all, all) {
-               if (x->km.state == XFRM_STATE_DEAD)
+       if (list_empty(&walk->all))
+               x = list_first_entry(&init_net.xfrm.state_all, struct xfrm_state_walk, all);
+       else
+               x = list_entry(&walk->all, struct xfrm_state_walk, all);
+       list_for_each_entry_from(x, &init_net.xfrm.state_all, all) {
+               if (x->state == XFRM_STATE_DEAD)
                        continue;
-               if (!xfrm_id_proto_match(x->id.proto, walk->proto))
+               state = container_of(x, struct xfrm_state, km);
+               if (!xfrm_id_proto_match(state->id.proto, walk->proto))
                        continue;
-               if (last) {
-                       err = func(last, walk->count, data);
-                       if (err) {
-                               xfrm_state_hold(last);
-                               walk->state = last;
-                               goto out;
-                       }
+               err = func(state, walk->seq, data);
+               if (err) {
+                       list_move_tail(&walk->all, &x->all);
+                       goto out;
                }
-               last = x;
-               walk->count++;
+               walk->seq++;
        }
-       if (walk->count == 0) {
+       if (walk->seq == 0) {
                err = -ENOENT;
                goto out;
        }
-       if (last)
-               err = func(last, 0, data);
+       list_del_init(&walk->all);
 out:
        spin_unlock_bh(&xfrm_state_lock);
-       if (old != NULL)
-               xfrm_state_put(old);
        return err;
 }
 EXPORT_SYMBOL(xfrm_state_walk);
 
+void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
+{
+       INIT_LIST_HEAD(&walk->all);
+       walk->proto = proto;
+       walk->state = XFRM_STATE_DEAD;
+       walk->seq = 0;
+}
+EXPORT_SYMBOL(xfrm_state_walk_init);
+
+void xfrm_state_walk_done(struct xfrm_state_walk *walk)
+{
+       if (list_empty(&walk->all))
+               return;
+
+       spin_lock_bh(&xfrm_state_lock);
+       list_del(&walk->all);
+       spin_lock_bh(&xfrm_state_lock);
+}
+EXPORT_SYMBOL(xfrm_state_walk_done);
+
 
 void xfrm_replay_notify(struct xfrm_state *x, int event)
 {
@@ -1803,7 +1805,8 @@ EXPORT_SYMBOL(km_policy_expired);
 
 #ifdef CONFIG_XFRM_MIGRATE
 int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
-              struct xfrm_migrate *m, int num_migrate)
+              struct xfrm_migrate *m, int num_migrate,
+              struct xfrm_kmaddress *k)
 {
        int err = -EINVAL;
        int ret;
@@ -1812,7 +1815,7 @@ int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
        read_lock(&xfrm_km_lock);
        list_for_each_entry(km, &xfrm_km_list, list) {
                if (km->migrate) {
-                       ret = km->migrate(sel, dir, type, m, num_migrate);
+                       ret = km->migrate(sel, dir, type, m, num_migrate, k);
                        if (!ret)
                                err = ret;
                }
@@ -2068,20 +2071,49 @@ error:
 
 EXPORT_SYMBOL(xfrm_init_state);
 
-void __init xfrm_state_init(void)
+int __net_init xfrm_state_init(struct net *net)
 {
        unsigned int sz;
 
+       INIT_LIST_HEAD(&net->xfrm.state_all);
+
        sz = sizeof(struct hlist_head) * 8;
 
-       xfrm_state_bydst = xfrm_hash_alloc(sz);
-       xfrm_state_bysrc = xfrm_hash_alloc(sz);
-       xfrm_state_byspi = xfrm_hash_alloc(sz);
-       if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
-               panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
+       net->xfrm.state_bydst = xfrm_hash_alloc(sz);
+       if (!net->xfrm.state_bydst)
+               goto out_bydst;
+       net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
+       if (!net->xfrm.state_bysrc)
+               goto out_bysrc;
+       net->xfrm.state_byspi = xfrm_hash_alloc(sz);
+       if (!net->xfrm.state_byspi)
+               goto out_byspi;
        xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
        INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
+       return 0;
+
+out_byspi:
+       xfrm_hash_free(net->xfrm.state_bysrc, sz);
+out_bysrc:
+       xfrm_hash_free(net->xfrm.state_bydst, sz);
+out_bydst:
+       return -ENOMEM;
+}
+
+void xfrm_state_fini(struct net *net)
+{
+       unsigned int sz;
+
+       WARN_ON(!list_empty(&net->xfrm.state_all));
+
+       sz = (xfrm_state_hmask + 1) * sizeof(struct hlist_head);
+       WARN_ON(!hlist_empty(net->xfrm.state_byspi));
+       xfrm_hash_free(net->xfrm.state_byspi, sz);
+       WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
+       xfrm_hash_free(net->xfrm.state_bysrc, sz);
+       WARN_ON(!hlist_empty(net->xfrm.state_bydst));
+       xfrm_hash_free(net->xfrm.state_bydst, sz);
 }
 
 #ifdef CONFIG_AUDITSYSCALL
@@ -2097,16 +2129,12 @@ static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
 
        switch(x->props.family) {
        case AF_INET:
-               audit_log_format(audit_buf,
-                                " src=" NIPQUAD_FMT " dst=" NIPQUAD_FMT,
-                                NIPQUAD(x->props.saddr.a4),
-                                NIPQUAD(x->id.daddr.a4));
+               audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
+                                &x->props.saddr.a4, &x->id.daddr.a4);
                break;
        case AF_INET6:
-               audit_log_format(audit_buf,
-                                " src=" NIP6_FMT " dst=" NIP6_FMT,
-                                NIP6(*(struct in6_addr *)x->props.saddr.a6),
-                                NIP6(*(struct in6_addr *)x->id.daddr.a6));
+               audit_log_format(audit_buf, " src=%pI6 dst=%pI6",
+                                x->props.saddr.a6, x->id.daddr.a6);
                break;
        }
 
@@ -2122,18 +2150,14 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
        switch (family) {
        case AF_INET:
                iph4 = ip_hdr(skb);
-               audit_log_format(audit_buf,
-                                " src=" NIPQUAD_FMT " dst=" NIPQUAD_FMT,
-                                NIPQUAD(iph4->saddr),
-                                NIPQUAD(iph4->daddr));
+               audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
+                                &iph4->saddr, &iph4->daddr);
                break;
        case AF_INET6:
                iph6 = ipv6_hdr(skb);
                audit_log_format(audit_buf,
-                                " src=" NIP6_FMT " dst=" NIP6_FMT
-                                " flowlbl=0x%x%02x%02x",
-                                NIP6(iph6->saddr),
-                                NIP6(iph6->daddr),
+                                " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x",
+                                &iph6->saddr,&iph6->daddr,
                                 iph6->flow_lbl[0] & 0x0f,
                                 iph6->flow_lbl[1],
                                 iph6->flow_lbl[2]);
This page took 0.03441 seconds and 5 git commands to generate.