libceph: init osd->o_node in create_osd()
[deliverable/linux.git] / net / ceph / osd_client.c
index ca59e66c9787303805519f2bf325cc5d5817ff55..a6dc6acd656637e8e1f2afa01b5f6408b31b0ec4 100644 (file)
@@ -52,7 +52,7 @@ static int op_has_extent(int op)
                op == CEPH_OSD_OP_WRITE);
 }
 
-void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
+int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
                        struct ceph_file_layout *layout,
                        u64 snapid,
                        u64 off, u64 *plen, u64 *bno,
@@ -62,12 +62,15 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
        struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
        u64 orig_len = *plen;
        u64 objoff, objlen;    /* extent in object */
+       int r;
 
        reqhead->snapid = cpu_to_le64(snapid);
 
        /* object extent? */
-       ceph_calc_file_object_mapping(layout, off, plen, bno,
-                                     &objoff, &objlen);
+       r = ceph_calc_file_object_mapping(layout, off, plen, bno,
+                                         &objoff, &objlen);
+       if (r < 0)
+               return r;
        if (*plen < orig_len)
                dout(" skipping last %llu, final file extent %llu~%llu\n",
                     orig_len - *plen, off, *plen);
@@ -83,7 +86,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
 
        dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
             *bno, objoff, objlen, req->r_num_pages);
-
+       return 0;
 }
 EXPORT_SYMBOL(ceph_calc_raw_layout);
 
@@ -112,20 +115,25 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
  *
  * fill osd op in request message.
  */
-static void calc_layout(struct ceph_osd_client *osdc,
-                       struct ceph_vino vino,
-                       struct ceph_file_layout *layout,
-                       u64 off, u64 *plen,
-                       struct ceph_osd_request *req,
-                       struct ceph_osd_req_op *op)
+static int calc_layout(struct ceph_osd_client *osdc,
+                      struct ceph_vino vino,
+                      struct ceph_file_layout *layout,
+                      u64 off, u64 *plen,
+                      struct ceph_osd_request *req,
+                      struct ceph_osd_req_op *op)
 {
        u64 bno;
+       int r;
 
-       ceph_calc_raw_layout(osdc, layout, vino.snap, off,
-                            plen, &bno, req, op);
+       r = ceph_calc_raw_layout(osdc, layout, vino.snap, off,
+                                plen, &bno, req, op);
+       if (r < 0)
+               return r;
 
        snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
        req->r_oid_len = strlen(req->r_oid);
+
+       return r;
 }
 
 /*
@@ -140,10 +148,9 @@ void ceph_osdc_release_request(struct kref *kref)
        if (req->r_request)
                ceph_msg_put(req->r_request);
        if (req->r_con_filling_msg) {
-               dout("release_request revoking pages %p from con %p\n",
+               dout("%s revoking pages %p from con %p\n", __func__,
                     req->r_pages, req->r_con_filling_msg);
-               ceph_con_revoke_message(req->r_con_filling_msg,
-                                     req->r_reply);
+               ceph_msg_revoke_incoming(req->r_reply);
                req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
        }
        if (req->r_reply)
@@ -214,10 +221,13 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
        kref_init(&req->r_kref);
        init_completion(&req->r_completion);
        init_completion(&req->r_safe_completion);
+       rb_init_node(&req->r_node);
        INIT_LIST_HEAD(&req->r_unsafe_item);
        INIT_LIST_HEAD(&req->r_linger_item);
        INIT_LIST_HEAD(&req->r_linger_osd);
        INIT_LIST_HEAD(&req->r_req_lru_item);
+       INIT_LIST_HEAD(&req->r_osd_item);
+
        req->r_flags = flags;
 
        WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
@@ -243,6 +253,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
                }
                ceph_pagelist_init(req->r_trail);
        }
+
        /* create request message; allow space for oid */
        msg_size += MAX_OBJ_NAME_SIZE;
        if (snapc)
@@ -256,7 +267,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
                return NULL;
        }
 
-       msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP);
        memset(msg->front.iov_base, 0, msg->front.iov_len);
 
        req->r_request = msg;
@@ -454,6 +464,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 {
        struct ceph_osd_req_op ops[3];
        struct ceph_osd_request *req;
+       int r;
 
        ops[0].op = opcode;
        ops[0].extent.truncate_seq = truncate_seq;
@@ -472,10 +483,12 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
                                         use_mempool,
                                         GFP_NOFS, NULL, NULL);
        if (!req)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        /* calculate max write size */
-       calc_layout(osdc, vino, layout, off, plen, req, ops);
+       r = calc_layout(osdc, vino, layout, off, plen, req, ops);
+       if (r < 0)
+               return ERR_PTR(r);
        req->r_file_layout = *layout;  /* keep a copy */
 
        /* in case it differs from natural (file) alignment that
@@ -568,7 +581,7 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
 
        dout("__kick_osd_requests osd%d\n", osd->o_osd);
        err = __reset_osd(osdc, osd);
-       if (err == -EAGAIN)
+       if (err)
                return;
 
        list_for_each_entry(req, &osd->o_requests, r_osd_item) {
@@ -595,14 +608,6 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
        }
 }
 
-static void kick_osd_requests(struct ceph_osd_client *osdc,
-                             struct ceph_osd *kickosd)
-{
-       mutex_lock(&osdc->request_mutex);
-       __kick_osd_requests(osdc, kickosd);
-       mutex_unlock(&osdc->request_mutex);
-}
-
 /*
  * If the osd connection drops, we need to resubmit all requests.
  */
@@ -616,7 +621,9 @@ static void osd_reset(struct ceph_connection *con)
        dout("osd_reset osd%d\n", osd->o_osd);
        osdc = osd->o_osdc;
        down_read(&osdc->map_sem);
-       kick_osd_requests(osdc, osd);
+       mutex_lock(&osdc->request_mutex);
+       __kick_osd_requests(osdc, osd);
+       mutex_unlock(&osdc->request_mutex);
        send_queued(osdc);
        up_read(&osdc->map_sem);
 }
@@ -624,7 +631,7 @@ static void osd_reset(struct ceph_connection *con)
 /*
  * Track open sessions with osds.
  */
-static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
+static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
 {
        struct ceph_osd *osd;
 
@@ -634,15 +641,14 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
 
        atomic_set(&osd->o_ref, 1);
        osd->o_osdc = osdc;
+       osd->o_osd = onum;
+       RB_CLEAR_NODE(&osd->o_node);
        INIT_LIST_HEAD(&osd->o_requests);
        INIT_LIST_HEAD(&osd->o_linger_requests);
        INIT_LIST_HEAD(&osd->o_osd_lru);
        osd->o_incarnation = 1;
 
-       ceph_con_init(osdc->client->msgr, &osd->o_con);
-       osd->o_con.private = osd;
-       osd->o_con.ops = &osd_con_ops;
-       osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
+       ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
 
        INIT_LIST_HEAD(&osd->o_keepalive_item);
        return osd;
@@ -688,7 +694,7 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
 
 static void remove_all_osds(struct ceph_osd_client *osdc)
 {
-       dout("__remove_old_osds %p\n", osdc);
+       dout("%s %p\n", __func__, osdc);
        mutex_lock(&osdc->request_mutex);
        while (!RB_EMPTY_ROOT(&osdc->osds)) {
                struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
@@ -740,6 +746,7 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
        if (list_empty(&osd->o_requests) &&
            list_empty(&osd->o_linger_requests)) {
                __remove_osd(osdc, osd);
+               ret = -ENODEV;
        } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
                          &osd->o_con.peer_addr,
                          sizeof(osd->o_con.peer_addr)) == 0 &&
@@ -752,7 +759,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
                ret = -EAGAIN;
        } else {
                ceph_con_close(&osd->o_con);
-               ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
+               ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd,
+                             &osdc->osdmap->osd_addr[osd->o_osd]);
                osd->o_incarnation++;
        }
        return ret;
@@ -853,7 +861,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
 
        if (req->r_osd) {
                /* make sure the original request isn't in flight. */
-               ceph_con_revoke(&req->r_osd->o_con, req->r_request);
+               ceph_msg_revoke(req->r_request);
 
                list_del_init(&req->r_osd_item);
                if (list_empty(&req->r_osd->o_requests) &&
@@ -865,9 +873,9 @@ static void __unregister_request(struct ceph_osd_client *osdc,
                        req->r_osd = NULL;
        }
 
+       list_del_init(&req->r_req_lru_item);
        ceph_osdc_put_request(req);
 
-       list_del_init(&req->r_req_lru_item);
        if (osdc->num_requests == 0) {
                dout(" no requests, canceling timeout\n");
                __cancel_osd_timeout(osdc);
@@ -880,7 +888,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
 static void __cancel_request(struct ceph_osd_request *req)
 {
        if (req->r_sent && req->r_osd) {
-               ceph_con_revoke(&req->r_osd->o_con, req->r_request);
+               ceph_msg_revoke(req->r_request);
                req->r_sent = 0;
        }
 }
@@ -890,15 +898,17 @@ static void __register_linger_request(struct ceph_osd_client *osdc,
 {
        dout("__register_linger_request %p\n", req);
        list_add_tail(&req->r_linger_item, &osdc->req_linger);
-       list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
+       if (req->r_osd)
+               list_add_tail(&req->r_linger_osd,
+                             &req->r_osd->o_linger_requests);
 }
 
 static void __unregister_linger_request(struct ceph_osd_client *osdc,
                                        struct ceph_osd_request *req)
 {
        dout("__unregister_linger_request %p\n", req);
+       list_del_init(&req->r_linger_item);
        if (req->r_osd) {
-               list_del_init(&req->r_linger_item);
                list_del_init(&req->r_linger_osd);
 
                if (list_empty(&req->r_osd->o_requests) &&
@@ -998,18 +1008,18 @@ static int __map_request(struct ceph_osd_client *osdc,
        req->r_osd = __lookup_osd(osdc, o);
        if (!req->r_osd && o >= 0) {
                err = -ENOMEM;
-               req->r_osd = create_osd(osdc);
+               req->r_osd = create_osd(osdc, o);
                if (!req->r_osd) {
                        list_move(&req->r_req_lru_item, &osdc->req_notarget);
                        goto out;
                }
 
                dout("map_request osd %p is osd%d\n", req->r_osd, o);
-               req->r_osd->o_osd = o;
-               req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
                __insert_osd(osdc, req->r_osd);
 
-               ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]);
+               ceph_con_open(&req->r_osd->o_con,
+                             CEPH_ENTITY_TYPE_OSD, o,
+                             &osdc->osdmap->osd_addr[o]);
        }
 
        if (req->r_osd) {
@@ -1077,12 +1087,10 @@ static void handle_timeout(struct work_struct *work)
 {
        struct ceph_osd_client *osdc =
                container_of(work, struct ceph_osd_client, timeout_work.work);
-       struct ceph_osd_request *req, *last_req = NULL;
+       struct ceph_osd_request *req;
        struct ceph_osd *osd;
-       unsigned long timeout = osdc->client->options->osd_timeout * HZ;
        unsigned long keepalive =
                osdc->client->options->osd_keepalive_timeout * HZ;
-       unsigned long last_stamp = 0;
        struct list_head slow_osds;
        dout("timeout\n");
        down_read(&osdc->map_sem);
@@ -1091,37 +1099,6 @@ static void handle_timeout(struct work_struct *work)
 
        mutex_lock(&osdc->request_mutex);
 
-       /*
-        * reset osds that appear to be _really_ unresponsive.  this
-        * is a failsafe measure.. we really shouldn't be getting to
-        * this point if the system is working properly.  the monitors
-        * should mark the osd as failed and we should find out about
-        * it from an updated osd map.
-        */
-       while (timeout && !list_empty(&osdc->req_lru)) {
-               req = list_entry(osdc->req_lru.next, struct ceph_osd_request,
-                                r_req_lru_item);
-
-               /* hasn't been long enough since we sent it? */
-               if (time_before(jiffies, req->r_stamp + timeout))
-                       break;
-
-               /* hasn't been long enough since it was acked? */
-               if (req->r_request->ack_stamp == 0 ||
-                   time_before(jiffies, req->r_request->ack_stamp + timeout))
-                       break;
-
-               BUG_ON(req == last_req && req->r_stamp == last_stamp);
-               last_req = req;
-               last_stamp = req->r_stamp;
-
-               osd = req->r_osd;
-               BUG_ON(!osd);
-               pr_warning(" tid %llu timed out on osd%d, will reset osd\n",
-                          req->r_tid, osd->o_osd);
-               __kick_osd_requests(osdc, osd);
-       }
-
        /*
         * ping osds that are a bit slow.  this ensures that if there
         * is a break in the TCP connection we will notice, and reopen
@@ -1304,8 +1281,9 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
 
        dout("kick_requests %s\n", force_resend ? " (force resend)" : "");
        mutex_lock(&osdc->request_mutex);
-       for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
+       for (p = rb_first(&osdc->requests); p; ) {
                req = rb_entry(p, struct ceph_osd_request, r_node);
+               p = rb_next(p);
                err = __map_request(osdc, req, force_resend);
                if (err < 0)
                        continue;  /* error */
@@ -1313,10 +1291,23 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
                        dout("%p tid %llu maps to no osd\n", req, req->r_tid);
                        needmap++;  /* request a newer map */
                } else if (err > 0) {
-                       dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
-                            req->r_osd ? req->r_osd->o_osd : -1);
-                       if (!req->r_linger)
+                       if (!req->r_linger) {
+                               dout("%p tid %llu requeued on osd%d\n", req,
+                                    req->r_tid,
+                                    req->r_osd ? req->r_osd->o_osd : -1);
                                req->r_flags |= CEPH_OSD_FLAG_RETRY;
+                       }
+               }
+               if (req->r_linger && list_empty(&req->r_linger_item)) {
+                       /*
+                        * register as a linger so that we will
+                        * re-submit below and get a new tid
+                        */
+                       dout("%p tid %llu restart on osd%d\n",
+                            req, req->r_tid,
+                            req->r_osd ? req->r_osd->o_osd : -1);
+                       __register_linger_request(osdc, req);
+                       __unregister_request(osdc, req);
                }
        }
 
@@ -1391,7 +1382,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
                             epoch, maplen);
                        newmap = osdmap_apply_incremental(&p, next,
                                                          osdc->osdmap,
-                                                         osdc->client->msgr);
+                                                         &osdc->client->msgr);
                        if (IS_ERR(newmap)) {
                                err = PTR_ERR(newmap);
                                goto bad;
@@ -1839,11 +1830,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
        if (!osdc->req_mempool)
                goto out;
 
-       err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true,
+       err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
+                               OSD_OP_FRONT_LEN, 10, true,
                                "osd_op");
        if (err < 0)
                goto out_mempool;
-       err = ceph_msgpool_init(&osdc->msgpool_op_reply,
+       err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
                                OSD_OPREPLY_FRONT_LEN, 10, true,
                                "osd_op_reply");
        if (err < 0)
@@ -1902,8 +1894,8 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
                                    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
                                    NULL, 0, truncate_seq, truncate_size, NULL,
                                    false, 1, page_align);
-       if (!req)
-               return -ENOMEM;
+       if (IS_ERR(req))
+               return PTR_ERR(req);
 
        /* it may be a short read due to an object boundary */
        req->r_pages = pages;
@@ -1945,8 +1937,8 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
                                    snapc, do_sync,
                                    truncate_seq, truncate_size, mtime,
                                    nofail, 1, page_align);
-       if (!req)
-               return -ENOMEM;
+       if (IS_ERR(req))
+               return PTR_ERR(req);
 
        /* it may be a short write due to an object boundary */
        req->r_pages = pages;
@@ -2019,15 +2011,15 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
        if (!req) {
                *skip = 1;
                m = NULL;
-               pr_info("get_reply unknown tid %llu from osd%d\n", tid,
-                       osd->o_osd);
+               dout("get_reply unknown tid %llu from osd%d\n", tid,
+                    osd->o_osd);
                goto out;
        }
 
        if (req->r_con_filling_msg) {
-               dout("get_reply revoking msg %p from old con %p\n",
+               dout("%s revoking msg %p from old con %p\n", __func__,
                     req->r_reply, req->r_con_filling_msg);
-               ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply);
+               ceph_msg_revoke_incoming(req->r_reply);
                req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
                req->r_con_filling_msg = NULL;
        }
@@ -2080,6 +2072,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
        int type = le16_to_cpu(hdr->type);
        int front = le32_to_cpu(hdr->front_len);
 
+       *skip = 0;
        switch (type) {
        case CEPH_MSG_OSD_MAP:
        case CEPH_MSG_WATCH_NOTIFY:
This page took 0.044315 seconds and 5 git commands to generate.